From 9ec15f15832faebf876ceb70bc0f992900e01a5d Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 14:37:00 +0200 Subject: [PATCH 1/6] tests/tail: Add, fix tests. Note some tests are not passing yet --- tests/by-util/test_tail.rs | 374 +++++++++++++++++++++++-------------- 1 file changed, 235 insertions(+), 139 deletions(-) diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index a8039cef665..6858e614c31 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT // spell-checker:ignore (libs) kqueue -// spell-checker:ignore (jargon) tailable untailable datasame runneradmin tmpi +// spell-checker:ignore (jargon) tailable untailable datasame runneradmin tmpi Signum extern crate tail; @@ -17,6 +17,7 @@ use crate::common::util::is_ci; use crate::common::util::TestScenario; use pretty_assertions::assert_eq; use rand::distributions::Alphanumeric; +use regex::Regex; use rstest::rstest; use std::char::from_digit; use std::fs::File; @@ -48,7 +49,6 @@ static FOLLOW_NAME_SHORT_EXP: &str = "follow_name_short.expected"; #[allow(dead_code)] static FOLLOW_NAME_EXP: &str = "follow_name.expected"; -#[cfg(not(windows))] const DEFAULT_SLEEP_INTERVAL_MILLIS: u64 = 1000; // The binary integer "10000000" is *not* a valid UTF-8 encoding @@ -58,6 +58,24 @@ const INVALID_UTF8: u8 = 0x80; #[cfg(windows)] const INVALID_UTF16: u16 = 0xD800; +/// Copied this from test_tee. test_tee uses this on linux targets only. +#[cfg(target_os = "linux")] +fn make_broken_pipe() -> File { + use libc::c_int; + use std::os::unix::io::FromRawFd; + + let mut fds: [c_int; 2] = [0, 0]; + if unsafe { libc::pipe(&mut fds as *mut c_int) } != 0 { + panic!("Failed to create pipe"); + } + + // Drop the read end of the pipe + let _ = unsafe { File::from_raw_fd(fds[0]) }; + + // Make the write end of the pipe into a Rust File + unsafe { File::from_raw_fd(fds[1]) } +} + #[test] fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1); @@ -147,7 +165,7 @@ fn test_stdin_redirect_offset() { } #[test] -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] // FIXME: for currently not working platforms +#[cfg(all(not(target_vendor = "apple")))] // FIXME: for currently not working platforms fn test_stdin_redirect_offset2() { // FIXME: windows: Failed because of difference in printed header. See below. // actual : ==> - <== @@ -288,12 +306,7 @@ fn test_follow_redirect_stdin_name_retry() { } #[test] -#[cfg(all( - not(target_vendor = "apple"), - not(target_os = "windows"), - not(target_os = "android"), - not(target_os = "freebsd") -))] // FIXME: for currently not working platforms +#[cfg(all(unix, not(any(target_os = "android"))))] // FIXME: fix this test for Android fn test_stdin_redirect_dir() { // $ mkdir dir // $ tail < dir, $ tail - < dir @@ -318,39 +331,6 @@ fn test_stdin_redirect_dir() { .code_is(1); } -// On macOS path.is_dir() can be false for directories if it was a redirect, -// e.g. `$ tail < DIR. The library feature to detect the -// std::io::ErrorKind::IsADirectory isn't stable so we currently show the a wrong -// error message. -// FIXME: If `std::io::ErrorKind::IsADirectory` becomes stable or macos handles -// redirected directories like linux show the correct message like in -// `test_stdin_redirect_dir` -#[test] -#[cfg(target_vendor = "apple")] -fn test_stdin_redirect_dir_when_target_os_is_macos() { - // $ mkdir dir - // $ tail < dir, $ tail - < dir - // tail: error reading 'standard input': Is a directory - - let ts = TestScenario::new(util_name!()); - let at = &ts.fixtures; - at.mkdir("dir"); - - ts.ucmd() - .set_stdin(File::open(at.plus("dir")).unwrap()) - .fails() - .no_stdout() - .stderr_is("tail: cannot open 'standard input' for reading: No such file or directory\n") - .code_is(1); - ts.ucmd() - .set_stdin(File::open(at.plus("dir")).unwrap()) - .arg("-") - .fails() - .no_stdout() - .stderr_is("tail: cannot open 'standard input' for reading: No such file or directory\n") - .code_is(1); -} - #[test] fn test_follow_stdin_descriptor() { let ts = TestScenario::new(util_name!()); @@ -389,25 +369,6 @@ fn test_follow_stdin_name_retry() { } } -#[test] -fn test_follow_bad_fd() { - // Provoke a "bad file descriptor" error by closing the fd - // inspired by: "gnu/tests/tail-2/follow-stdin.sh" - - // `$ tail -f <&-` OR `$ tail -f - <&-` - // tail: cannot fstat 'standard input': Bad file descriptor - // tail: error reading 'standard input': Bad file descriptor - // tail: no files remaining - // tail: -: Bad file descriptor - // - // $ `tail <&-` - // tail: cannot fstat 'standard input': Bad file descriptor - // tail: -: Bad file descriptor - - // WONT-FIX: - // see also: https://github.com/uutils/coreutils/issues/2873 -} - #[test] fn test_single_default() { new_ucmd!() @@ -582,12 +543,21 @@ fn test_follow_multiple_untailable() { // tail: DIR2: cannot follow end of this type of file; giving up on this name // tail: no files remaining + // FIXME: DIR1 and DIR2 need to be in single quotes let expected_stdout = "==> DIR1 <==\n\n==> DIR2 <==\n"; + #[cfg(not(windows))] let expected_stderr = "tail: error reading 'DIR1': Is a directory\n\ tail: DIR1: cannot follow end of this type of file; giving up on this name\n\ tail: error reading 'DIR2': Is a directory\n\ tail: DIR2: cannot follow end of this type of file; giving up on this name\n\ tail: no files remaining\n"; + #[cfg(windows)] + let expected_stderr = + "tail: error reading 'DIR1': An operation is not supported on a directory.\n\ + tail: DIR1: cannot follow end of this type of file; giving up on this name\n\ + tail: error reading 'DIR2': An operation is not supported on a directory.\n\ + tail: DIR2: cannot follow end of this type of file; giving up on this name\n\ + tail: no files remaining\n"; let (at, mut ucmd) = at_and_ucmd!(); at.mkdir("DIR1"); @@ -909,10 +879,11 @@ fn test_multiple_input_quiet_flag_overrides_verbose_flag_for_suppressing_headers fn test_dir() { let (at, mut ucmd) = at_and_ucmd!(); at.mkdir("DIR"); - ucmd.arg("DIR") - .run() - .stderr_is("tail: error reading 'DIR': Is a directory\n") - .code_is(1); + #[cfg(not(windows))] + let expected = "tail: error reading 'DIR': Is a directory\n"; + #[cfg(windows)] + let expected = "tail: error reading 'DIR': An operation is not supported on a directory.\n"; + ucmd.arg("DIR").run().stderr_is(expected).code_is(1); } #[test] @@ -920,17 +891,24 @@ fn test_dir_follow() { let ts = TestScenario::new(util_name!()); let at = &ts.fixtures; at.mkdir("DIR"); + + #[cfg(not(windows))] + let expected = "tail: error reading 'DIR': Is a directory\n\ + tail: DIR: cannot follow end of this type of file; giving up on this name\n\ + tail: no files remaining\n"; + #[cfg(windows)] + let expected = "tail: error reading 'DIR': An operation is not supported on a directory.\n\ + tail: DIR: cannot follow end of this type of file; giving up on this name\n\ + tail: no files remaining\n"; + + // FIXME: DIR must be single quoted for mode in &["--follow=descriptor", "--follow=name"] { ts.ucmd() .arg(mode) .arg("DIR") .run() .no_stdout() - .stderr_is( - "tail: error reading 'DIR': Is a directory\n\ - tail: DIR: cannot follow end of this type of file; giving up on this name\n\ - tail: no files remaining\n", - ) + .stderr_is(expected) .code_is(1); } } @@ -940,17 +918,22 @@ fn test_dir_follow_retry() { let ts = TestScenario::new(util_name!()); let at = &ts.fixtures; at.mkdir("DIR"); + #[cfg(not(windows))] + let expected = "tail: warning: --retry only effective for the initial open\n\ + tail: error reading 'DIR': Is a directory\n\ + tail: DIR: cannot follow end of this type of file\n\ + tail: no files remaining\n"; + #[cfg(windows)] + let expected = "tail: warning: --retry only effective for the initial open\n\ + tail: error reading 'DIR': An operation is not supported on a directory.\n\ + tail: DIR: cannot follow end of this type of file\n\ + tail: no files remaining\n"; ts.ucmd() .arg("--follow=descriptor") .arg("--retry") .arg("DIR") .run() - .stderr_is( - "tail: warning: --retry only effective for the initial open\n\ - tail: error reading 'DIR': Is a directory\n\ - tail: DIR: cannot follow end of this type of file\n\ - tail: no files remaining\n", - ) + .stderr_is(expected) .code_is(1); } @@ -1408,6 +1391,7 @@ fn test_retry7() { let at = &ts.fixtures; let untailable = "untailable"; + // FIXME: >>tail: untailable: cannot follow ...<< untailable must be quoted >>tail: 'untailable': cannot follow ...<< let expected_stderr = "tail: error reading 'untailable': Is a directory\n\ tail: untailable: cannot follow end of this type of file\n\ tail: 'untailable' has become accessible\n\ @@ -3438,10 +3422,16 @@ fn test_when_follow_retry_given_redirected_stdin_from_directory_then_correct_err let at = &ts.fixtures; at.mkdir("dir"); + #[cfg(not(windows))] let expected = "tail: warning: --retry only effective for the initial open\n\ tail: error reading 'standard input': Is a directory\n\ tail: 'standard input': cannot follow end of this type of file\n\ tail: no files remaining\n"; + #[cfg(windows)] + let expected = "tail: warning: --retry only effective for the initial open\n\ + tail: error reading 'standard input': An operation is not supported on a directory.\n\ + tail: 'standard input': cannot follow end of this type of file\n\ + tail: no files remaining\n"; ts.ucmd() .set_stdin(File::open(at.plus("dir")).unwrap()) .args(&["-f", "--retry"]) @@ -3456,7 +3446,10 @@ fn test_when_argument_file_is_a_directory() { let at = &ts.fixtures; at.mkdir("dir"); + #[cfg(not(windows))] let expected = "tail: error reading 'dir': Is a directory\n"; + #[cfg(windows)] + let expected = "tail: error reading 'dir': An operation is not supported on a directory.\n"; ts.ucmd() .arg("dir") .fails() @@ -3523,7 +3516,6 @@ fn test_when_argument_file_is_a_symlink_to_directory_then_error() { // TODO: make this work on windows #[test] #[cfg(unix)] -#[cfg(disabled_until_fixed)] fn test_when_argument_file_is_a_faulty_symlink_then_error() { let ts = TestScenario::new(util_name!()); let at = &ts.fixtures; @@ -3531,11 +3523,11 @@ fn test_when_argument_file_is_a_faulty_symlink_then_error() { at.symlink_file("self", "self"); #[cfg(all(not(target_env = "musl"), not(target_os = "android")))] - let expected = "tail: cannot open 'self' for reading: Too many levels of symbolic links"; + let expected = "tail: cannot open 'self' for reading: Too many levels of symbolic links\n"; #[cfg(all(not(target_env = "musl"), target_os = "android"))] - let expected = "tail: cannot open 'self' for reading: Too many symbolic links encountered"; + let expected = "tail: cannot open 'self' for reading: Too many symbolic links encountered\n"; #[cfg(all(target_env = "musl", not(target_os = "android")))] - let expected = "tail: cannot open 'self' for reading: Symbolic link loop"; + let expected = "tail: cannot open 'self' for reading: Symbolic link loop\n"; ts.ucmd() .arg("self") @@ -3545,7 +3537,7 @@ fn test_when_argument_file_is_a_faulty_symlink_then_error() { at.symlink_file("missing", "broken"); - let expected = "tail: cannot open 'broken' for reading: No such file or directory"; + let expected = "tail: cannot open 'broken' for reading: No such file or directory\n"; ts.ucmd() .arg("broken") .fails() @@ -3555,7 +3547,6 @@ fn test_when_argument_file_is_a_faulty_symlink_then_error() { #[test] #[cfg(unix)] -#[cfg(disabled_until_fixed)] fn test_when_argument_file_is_non_existent_unix_socket_address_then_error() { use std::os::unix::net; @@ -3614,7 +3605,6 @@ fn test_when_argument_file_is_non_existent_unix_socket_address_then_error() { } #[test] -#[cfg(disabled_until_fixed)] fn test_when_argument_files_are_simple_combinations_of_stdin_and_regular_file() { let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; @@ -3716,7 +3706,6 @@ fn test_when_argument_files_are_simple_combinations_of_stdin_and_regular_file() } #[test] -#[cfg(disabled_until_fixed)] fn test_when_argument_files_are_triple_combinations_of_fifo_pipe_and_regular_file() { let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; @@ -3770,25 +3759,26 @@ fn test_when_argument_files_are_triple_combinations_of_fifo_pipe_and_regular_fil // print the fifo twice. This matches the behavior, if only the pipe is present without fifo // (See test above). Note that for example a zsh shell prints the pipe data and has therefore // different output from the sh shell (or cmd shell on windows). - + // // windows: tail returns with success although there is an error message present (on some // windows systems). This error message comes from `echo` (the line ending `\r\n` indicates that - // too) which cannot write to the pipe because tail finished before echo was able to write to - // the pipe. Seems that windows `cmd` (like posix shells) ignores pipes when a fifo is present. - // This is actually the wished behavior and the test therefore succeeds. + // too) which cannot write to the pipe. This is actually the wished behavior and the test + // therefore succeeds in such cases. + // + // [`CmdResult::stdout_matches`] trims the stdout, so we ignore the final newlines in the regex. #[cfg(windows)] - let expected = "==> standard input <==\n\ + let expected = "^==> standard input <==\n\ fifo data\n\ ==> data <==\n\ file data\n\ - ==> standard input <==\n\ - (The process tried to write to a nonexistent pipe.\r\n)?"; + ==> standard input <==(\n)?\ + (The process tried to write to a nonexistent pipe.(\r\n)?)?$"; #[cfg(unix)] - let expected = "==> standard input <==\n\ + let expected = "^==> standard input <==\n\ fifo data\n\ ==> data <==\n\ file data\n\ - ==> standard input <==\n"; + ==> standard input <==(\n)?$"; #[cfg(windows)] let cmd = ["cmd", "/C"]; @@ -3802,8 +3792,9 @@ fn test_when_argument_files_are_triple_combinations_of_fifo_pipe_and_regular_fil "echo pipe data | {} tail -c +0 - data - < fifo", scene.bin_path.display(), )) + .stderr_to_stdout() .run() - .stdout_only(expected) + .stdout_matches(&Regex::new(expected).unwrap()) .success(); let expected = "==> standard input <==\n\ @@ -3827,7 +3818,6 @@ fn test_when_argument_files_are_triple_combinations_of_fifo_pipe_and_regular_fil // test system. However, this behavior shows up on the command line and, at the time of writing this // description, with this test on macos and windows. #[test] -#[cfg(disable_until_fixed)] fn test_when_follow_retry_then_initial_print_of_file_is_written_to_stdout() { let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; @@ -4106,27 +4096,27 @@ fn test_args_when_settings_check_warnings_follow_indefinitely_then_no_warning() .make_assertion_with_delay(delay) .with_current_output() .stdout_only(expected_stdout); + } - let expected_stdout = format!( - "==> standard input <==\n\ + let expected_stdout = format!( + "==> standard input <==\n\ {fifo_data}\n\ ==> {file_name} <==\n\ {file_data}" - ); - let mut child = scene - .ucmd() - .args(&["--follow=descriptor", "--pid=0", "-", file_name]) - .set_stdin(File::open(fixtures.plus(fifo_name)).unwrap()) - .stderr_to_stdout() - .run_no_wait(); + ); + let mut child = scene + .ucmd() + .args(&["--follow=descriptor", "--pid=0", "-", file_name]) + .set_stdin(File::open(fixtures.plus(fifo_name)).unwrap()) + .stderr_to_stdout() + .run_no_wait(); - child.make_assertion_with_delay(delay).is_alive(); - child - .kill() - .make_assertion_with_delay(delay) - .with_current_output() - .stdout_only(expected_stdout); - } + child.make_assertion_with_delay(delay).is_alive(); + child + .kill() + .make_assertion_with_delay(delay) + .with_current_output() + .stdout_only(expected_stdout); } /// The expected test outputs come from gnu's tail. @@ -4360,28 +4350,37 @@ fn test_follow_when_file_and_symlink_are_pointing_to_same_file_and_append_data() .stdout_only(expected_stdout); } -// Fails with: -// 'Assertion failed. Expected 'tail' to be running but exited with status=exit status: 1. -// stdout: -// stderr: tail: warning: --retry ignored; --retry is useful only when following -// tail: error reading 'dir': Is a directory -// ' #[test] -#[cfg(disabled_until_fixed)] fn test_args_when_directory_given_shorthand_big_f_together_with_retry() { let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; let dirname = "dir"; fixtures.mkdir(dirname); + #[cfg(not(windows))] let expected_stderr = format!( "tail: error reading '{0}': Is a directory\n\ tail: {0}: cannot follow end of this type of file\n", dirname ); + #[cfg(windows)] + let expected_stderr = format!( + "tail: error reading '{0}': An operation is not supported on a directory.\n\ + tail: {0}: cannot follow end of this type of file\n", + dirname + ); let mut child = scene.ucmd().args(&["-F", "--retry", "dir"]).run_no_wait(); + child.make_assertion_with_delay(500).is_alive(); + child + .kill() + .make_assertion() + .with_current_output() + .stderr_only(&expected_stderr); + + let mut child = scene.ucmd().args(&["--retry", "-F", "dir"]).run_no_wait(); + child.make_assertion_with_delay(500).is_alive(); child .kill() @@ -4397,28 +4396,8 @@ fn test_args_when_directory_given_shorthand_big_f_together_with_retry() { /// ==> /absolute/path/to/data <== /// file data -/// -/// Fails on windows with -/// Diff < left / right > : -// ==> data <== -// file data -// ==> \\?\C:\Users\runneradmin\AppData\Local\Temp\.tmpi6lNnX\data <== -// >file data -// < -// -// Fails on freebsd with -// Diff < left / right > : -// ==> data <== -// file data -// ==> /tmp/.tmpZPXPlS/data <== -// >file data -// < #[test] -#[cfg(all( - not(target_vendor = "apple"), - not(target_os = "windows"), - not(target_os = "freebsd") -))] +#[cfg(all(not(target_vendor = "apple")))] fn test_follow_when_files_are_pointing_to_same_relative_file_and_file_stays_same_size() { let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; @@ -4799,3 +4778,120 @@ fn test_obsolete_encoding_windows() { .stderr_is("tail: bad argument encoding: '-�b'\n") .code_is(1); } + +#[rstest] +#[case::when_not_follow(&["-c", "+0", "-"])] +// The error output deviates from gnu's tail which is: +// tail: cannot fstat 'standard input': Bad file descriptor +// tail: error reading 'standard input': Bad file descriptor +// tail: no files remaining +// tail: -: Bad file descriptor +#[case::when_follow(&["-c", "+0", "-f", "-"])] +fn test_when_stdin_is_bad_file_descriptor(#[case] args: &[&str]) { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + // Opens the file in write only mode and leads to the bad file descriptor error when used as stdin + let file = File::create(at.plus_as_string("name")).unwrap(); + #[cfg(not(windows))] + let expected = "tail: error reading 'standard input': Bad file descriptor\n"; + #[cfg(windows)] + let expected = "tail: error reading 'standard input': Permission denied\n"; + ts.ucmd() + .args(args) + .set_stdin(file) + .run() + .stderr_only(expected); +} + +#[test] +fn test_when_follow_two_files_and_stdin_is_bad_file_descriptor() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + at.write("good", "good content"); + // Opens a file in write only mode leading to the bad file descriptor error when used as stdin + // which needs read permissions + let bad_file = File::create(at.plus_as_string("name")).unwrap(); + + // The error output deviates from gnu's tail in so far, that the Bad file descriptor error + // message is printed only once: + // tail: error reading 'standard input': Bad file descriptor + // instead of: + // tail: cannot fstat 'standard input': Bad file descriptor + // tail: error reading 'standard input': Bad file descriptor + #[cfg(not(windows))] + let expected_stdout = "==> standard input <==\n\ + tail: error reading 'standard input': Bad file descriptor\n\ + \n\ + ==> good <==\n\ + good content"; + #[cfg(windows)] + let expected_stdout = "==> standard input <==\n\ + tail: error reading 'standard input': Permission denied\n\ + \n\ + ==> good <==\n\ + good content"; + + let mut child = ts + .ucmd() + .args(&["-c", "+0", "-f", "-", "good"]) + .set_stdin(bad_file) + .stderr_to_stdout() + .run_no_wait(); + + child + .make_assertion_with_delay(DEFAULT_SLEEP_INTERVAL_MILLIS) + .is_alive() + .with_current_output() + .stdout_only(expected_stdout); + + child + .kill() + .make_assertion_with_delay(DEFAULT_SLEEP_INTERVAL_MILLIS) + .is_not_alive() + .with_current_output() + .no_output(); +} + +#[rstest] +#[case::when_not_follow(&["-c", "+0", "-"])] +#[case::when_follow(&["-c", "+0", "-f", "-"])] +#[cfg(target_os = "linux")] +fn test_when_stdout_is_broken_pipe(#[case] args: &[&str]) { + new_ucmd!() + .args(args) + .set_stdout(make_broken_pipe()) + .run() + .stderr_only("tail: write error: Broken pipe\n"); +} + +#[test] +#[cfg(feature = "sleep")] +#[cfg(unix)] +/// tail with follow (-f) should be smart enough to terminate when the output is a pipe and the pipe +/// breaks. Currently uu_tail is running forever even if it'll never produce output due to the +/// broken pipe. gnu's tail in contrast does terminate after the 5 seconds sleep interval (without +/// error message). +#[cfg(disabled_until_fixed)] +fn test_when_follow_and_pipe_breaks_then_terminate() { + use std::time::Duration; + + use crate::common::util::{UCommand, TESTS_BINARY}; + + let mut child = UCommand::new() + .arg(format!( + "{TESTS_BINARY} tail -f /dev/null | {TESTS_BINARY} sleep 5" + )) + .timeout(Duration::from_secs(30)) + .run_no_wait(); + + child + .make_assertion_with_delay(DEFAULT_SLEEP_INTERVAL_MILLIS) + .is_alive(); + + child + .delay(10000) + .make_assertion() + .is_not_alive() + .with_all_output() + .no_output(); +} From b7588be84757cbc3560c57f14b2aee8601cff152 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 14:43:42 +0200 Subject: [PATCH 2/6] tail: Refactor Input::from parameter to use AsRef. Settings::inputs now uses Vec --- src/uu/tail/src/args.rs | 26 ++++++++++---------------- src/uu/tail/src/follow/watch.rs | 3 +-- src/uu/tail/src/paths.rs | 17 ++++++----------- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 8e039b5f4b0..177ff541a12 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -12,7 +12,6 @@ use clap::{parser::ValueSource, Arg, ArgAction, ArgMatches, Command}; use fundu::DurationParser; use is_terminal::IsTerminal; use same_file::Handle; -use std::collections::VecDeque; use std::ffi::OsString; use std::time::Duration; use uucore::error::{UResult, USimpleError, UUsageError}; @@ -141,7 +140,8 @@ pub struct Settings { pub use_polling: bool, pub verbose: bool, pub presume_input_pipe: bool, - pub inputs: VecDeque, + /// `FILE(s)` positional arguments + pub inputs: Vec, } impl Default for Settings { @@ -173,11 +173,11 @@ impl Settings { } settings.mode = FilterMode::from_obsolete_args(args); let input = if let Some(name) = name { - Input::from(&name) + Input::from(name) } else { Input::default() }; - settings.inputs.push_back(input); + settings.inputs.push(input); settings } @@ -257,19 +257,13 @@ impl Settings { } } - let mut inputs: VecDeque = matches - .get_many::(options::ARG_FILES) - .map(|v| v.map(|string| Input::from(&string)).collect()) - .unwrap_or_default(); + settings.inputs = matches + .get_raw(options::ARG_FILES) + .map(|v| v.map(Input::from).collect()) + .unwrap_or_else(|| vec![Input::default()]); - // apply default and add '-' to inputs if none is present - if inputs.is_empty() { - inputs.push_front(Input::default()); - } - - settings.verbose = inputs.len() > 1 && !matches.get_flag(options::verbosity::QUIET); - - settings.inputs = inputs; + settings.verbose = + settings.inputs.len() > 1 && !matches.get_flag(options::verbosity::QUIET); Ok(settings) } diff --git a/src/uu/tail/src/follow/watch.rs b/src/uu/tail/src/follow/watch.rs index 2c3cf10b855..966f3912040 100644 --- a/src/uu/tail/src/follow/watch.rs +++ b/src/uu/tail/src/follow/watch.rs @@ -10,7 +10,6 @@ use crate::follow::files::{FileHandling, PathData}; use crate::paths::{Input, InputKind, MetadataExtTail, PathExtTail}; use crate::{platform, text}; use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind}; -use std::collections::VecDeque; use std::io::BufRead; use std::path::{Path, PathBuf}; use std::sync::mpsc::{self, channel, Receiver}; @@ -270,7 +269,7 @@ impl Observer { self.follow_name() && self.retry } - fn init_files(&mut self, inputs: &VecDeque) -> UResult<()> { + fn init_files(&mut self, inputs: &Vec) -> UResult<()> { if let Some(watcher_rx) = &mut self.watcher_rx { for input in inputs { match input.kind() { diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs index 4badd68660a..bf2a4e4c267 100644 --- a/src/uu/tail/src/paths.rs +++ b/src/uu/tail/src/paths.rs @@ -27,22 +27,17 @@ pub struct Input { } impl Input { - pub fn from>(string: &T) -> Self { - let kind = if string.as_ref() == Path::new(text::DASH) { + pub fn from>(string: T) -> Self { + let string = string.as_ref(); + let kind = if string == OsStr::new(text::DASH) { InputKind::Stdin } else { - InputKind::File(PathBuf::from(string.as_ref())) + InputKind::File(PathBuf::from(string)) }; let display_name = match kind { - InputKind::File(_) => string.as_ref().to_string_lossy().to_string(), - InputKind::Stdin => { - if cfg!(unix) { - text::STDIN_HEADER.to_string() - } else { - string.as_ref().to_string_lossy().to_string() - } - } + InputKind::File(_) => string.to_string_lossy().to_string(), + InputKind::Stdin => text::STDIN_HEADER.to_string(), }; Self { kind, display_name } From 3c9171feff6a62b054c03b3f0e689a07e0368020 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 15:55:27 +0200 Subject: [PATCH 3/6] tail: Refactor chunks.rs and small parts of tail.rs. Replace unwraps with error propagation ... Return io::Result from methods in chunks.rs to be able to replace unwraps with error propagation and return the bytes read from the source so far. Store how many bytes are read in LinesChunkBuffer. Fixes: tail does not output anything when there's no newline in the source and the `--follow` option is given. Flush the buffered writer after a write explicitly. Simplify the BytesChunkBuffer::fill and LinesChunkBuffer::fill methods and make use of some new helper methods in these structs. Add more unit tests for structs in chunks.rs --- src/uu/tail/src/chunks.rs | 600 +++++++++++++++++++++++++++----- src/uu/tail/src/follow/files.rs | 7 +- src/uu/tail/src/tail.rs | 96 ++--- 3 files changed, 575 insertions(+), 128 deletions(-) diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs index 3aa380e20e0..b34907d489a 100644 --- a/src/uu/tail/src/chunks.rs +++ b/src/uu/tail/src/chunks.rs @@ -4,7 +4,7 @@ // * file that was distributed with this source code. //! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`] -//! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`]. +//! or else with [`LinesChunk`] or [`BytesChunk`]. //! //! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file. @@ -12,8 +12,7 @@ use std::collections::VecDeque; use std::fs::File; -use std::io::{BufRead, Read, Seek, SeekFrom, Write}; -use uucore::error::UResult; +use std::io::{self, Read, Seek, SeekFrom, Write}; /// When reading files in reverse in `bounded_tail`, this is the size of each /// block read at a time. @@ -46,26 +45,28 @@ pub struct ReverseChunks<'a> { } impl<'a> ReverseChunks<'a> { - pub fn new(file: &'a mut File) -> ReverseChunks<'a> { + pub fn new(file: &'a mut File) -> io::Result> { + // TODO: why is this platform dependent ? let current = if cfg!(unix) { - file.stream_position().unwrap() + file.stream_position()? } else { 0 }; - let size = file.seek(SeekFrom::End(0)).unwrap() - current; + let size = file.seek(SeekFrom::End(0))? - current; + // TODO: is the cast to usize safe ? on 32-bit systems ? let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; let block_idx = 0; - ReverseChunks { + Ok(ReverseChunks { file, size, max_blocks_to_read, block_idx, - } + }) } } impl<'a> Iterator for ReverseChunks<'a> { - type Item = Vec; + type Item = io::Result>; fn next(&mut self) -> Option { // If there are no more chunks to read, terminate the iterator. @@ -85,22 +86,24 @@ impl<'a> Iterator for ReverseChunks<'a> { // Seek backwards by the next chunk, read the full chunk into // `buf`, and then seek back to the start of the chunk again. let mut buf = vec![0; BLOCK_SIZE as usize]; - let pos = self - .file - .seek(SeekFrom::Current(-(block_size as i64))) - .unwrap(); - self.file - .read_exact(&mut buf[0..(block_size as usize)]) - .unwrap(); - let pos2 = self - .file - .seek(SeekFrom::Current(-(block_size as i64))) - .unwrap(); + let pos = match self.file.seek(SeekFrom::Current(-(block_size as i64))) { + Ok(pos) => pos, + Err(error) => return Some(Err(error)), + }; + + if let Err(error) = self.file.read_exact(&mut buf[0..(block_size as usize)]) { + return Some(Err(error)); + } + + let pos2 = match self.file.seek(SeekFrom::Current(-(block_size as i64))) { + Ok(pos) => pos, + Err(error) => return Some(Err(error)), + }; assert_eq!(pos, pos2); self.block_idx += 1; - Some(buf[0..(block_size as usize)].to_vec()) + Some(Ok(buf[0..(block_size as usize)].to_vec())) } } @@ -202,15 +205,27 @@ impl BytesChunk { &self.buffer[offset..self.bytes] } + /// Return true if the [`BytesChunk`] has bytes stored. pub fn has_data(&self) -> bool { self.bytes > 0 } - /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by - /// that number of bytes. If EOF is reached (so 0 bytes are read), then returns - /// [`UResult`] or else the result with [`Some(bytes)`] where bytes is the number of bytes - /// read from the source. - pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { + /// Return true if the [`BytesChunk`] has no bytes stored. + pub fn is_empty(&self) -> bool { + !self.has_data() + } + + /// Return the amount of bytes stored in this [`BytesChunk`]. + pub fn len(&self) -> usize { + self.bytes + } + + /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, + /// draining the reader by that number of bytes. If EOF is reached (so 0 + /// bytes are read), then returns [`io::Result`] or else the result + /// with [`io::Result io::Result> { let num_bytes = filehandle.read(&mut self.buffer)?; self.bytes = num_bytes; if num_bytes == 0 { @@ -285,18 +300,20 @@ impl BytesChunkBuffer { /// let mut chunks = BytesChunkBuffer::new(num_print); /// chunks.fill(&mut reader).unwrap(); /// ``` - pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { + pub fn fill(&mut self, reader: &mut impl Read) -> io::Result { + if self.num_print == 0 { + return Ok(0); + } + let mut chunk = Box::new(BytesChunk::new()); // fill chunks with all bytes from reader and reuse already instantiated chunks if possible while (chunk.fill(reader)?).is_some() { - self.bytes += chunk.bytes as u64; - self.chunks.push_back(chunk); + self.push_back(chunk); let first = &self.chunks[0]; if self.bytes - first.bytes as u64 > self.num_print { - chunk = self.chunks.pop_front().unwrap(); - self.bytes -= chunk.bytes as u64; + chunk = self.pop_front().unwrap(); } else { chunk = Box::new(BytesChunk::new()); } @@ -304,31 +321,61 @@ impl BytesChunkBuffer { // quit early if there are no chunks for example in case the pipe was empty if self.chunks.is_empty() { - return Ok(()); + return Ok(0); } - let chunk = self.chunks.pop_front().unwrap(); - // calculate the offset in the first chunk and put the calculated chunk as first element in // the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE // and is therefore safely convertible to a usize without losses. let offset = self.bytes.saturating_sub(self.num_print) as usize; - self.chunks - .push_front(Box::new(BytesChunk::from_chunk(&chunk, offset))); - Ok(()) + let chunk = self.pop_front().unwrap(); + let chunk = Box::new(BytesChunk::from_chunk(&chunk, offset)); + self.push_front(chunk); + + Ok(self.bytes) } - pub fn print(&self, mut writer: impl Write) -> UResult<()> { + /// Print the whole [`BytesChunkBuffer`]. + pub fn print(&self, writer: &mut impl Write) -> io::Result<()> { for chunk in &self.chunks { writer.write_all(chunk.get_buffer())?; } Ok(()) } + /// Return true if the [`BytesChunkBuffer`] has bytes stored. pub fn has_data(&self) -> bool { !self.chunks.is_empty() } + + /// Return the amount of bytes this [`BytesChunkBuffer`] has stored. + pub fn get_bytes(&self) -> u64 { + self.bytes + } + + /// Return and remove the first [`BytesChunk`] stored in this [`BytesChunkBuffer`]. + fn pop_front(&mut self) -> Option> { + let chunk = self.chunks.pop_front(); + if let Some(chunk) = chunk { + self.bytes -= chunk.bytes as u64; + Some(chunk) + } else { + None + } + } + + /// Add a [`BytesChunk`] at the start of this [`BytesChunkBuffer`]. + fn push_front(&mut self, chunk: Box) { + self.bytes += chunk.bytes as u64; + self.chunks.push_front(chunk); + } + + /// Add a [`BytesChunk`] at the end of this [`BytesChunkBuffer`]. + fn push_back(&mut self, chunk: Box) { + self.bytes += chunk.bytes as u64; + self.chunks.push_back(chunk); + } } /// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current @@ -346,6 +393,7 @@ pub struct LinesChunk { } impl LinesChunk { + /// Create a new [`LinesChunk`] with an arbitrary `u8` as `delimiter`. pub fn new(delimiter: u8) -> Self { Self { chunk: BytesChunk::new(), @@ -433,6 +481,16 @@ impl LinesChunk { self.chunk.has_data() } + /// Return true if the [`LinesChunk`] has no bytes stored. + pub fn is_empty(&self) -> bool { + !self.has_data() + } + + /// Return the amount of bytes stored in this [`LinesChunk`]. + pub fn len(&self) -> usize { + self.chunk.len() + } + /// Returns this buffer safely. See [`BytesChunk::get_buffer`] /// /// returns: &[u8] with length `self.bytes` @@ -458,7 +516,7 @@ impl LinesChunk { /// that number of bytes. This function works like the [`BytesChunk::fill`] function besides /// that this function also counts and stores the number of lines encountered while reading from /// the `filehandle`. - pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { + pub fn fill(&mut self, filehandle: &mut impl Read) -> io::Result> { match self.chunk.fill(filehandle)? { None => { self.lines = 0; @@ -514,7 +572,7 @@ impl LinesChunk { /// /// * `writer`: must implement [`Write`] /// * `offset`: An offset in number of lines. - pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> { + pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> io::Result { self.print_bytes(writer, self.calculate_bytes_offset_from(offset)) } @@ -524,9 +582,10 @@ impl LinesChunk { /// /// * `writer`: must implement [`Write`] /// * `offset`: An offset in number of bytes. - pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> { - writer.write_all(self.get_buffer_with(offset))?; - Ok(()) + pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> io::Result { + let buffer = self.get_buffer_with(offset); + writer.write_all(buffer)?; + Ok(buffer.len()) } } @@ -544,6 +603,8 @@ pub struct LinesChunkBuffer { num_print: u64, /// Stores the [`LinesChunk`] chunks: VecDeque>, + /// The total amount of bytes stored in all chunks + bytes: u64, } impl LinesChunkBuffer { @@ -554,35 +615,33 @@ impl LinesChunkBuffer { num_print, lines: 0, chunks: VecDeque::new(), + bytes: 0, } } - /// Fills this buffer with chunks and consumes the reader completely. This method ensures that - /// there are exactly as many chunks as needed to match `self.num_print` lines, so there are - /// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator - /// over these chunks. If there are no chunks, for example because the piped stdin contained no - /// lines, or `num_print = 0` then `iterator.next` will return None. - pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { - let mut chunk = Box::new(LinesChunk::new(self.delimiter)); + /// Fills this buffer with chunks and consumes the reader completely. + /// + /// This method ensures that there are exactly as many chunks as needed to match + /// `self.num_print` lines, so there are in sum exactly `self.num_print` lines stored in all + /// chunks. + pub fn fill(&mut self, reader: &mut impl Read) -> io::Result { + if self.num_print == 0 { + return Ok(0); + } + let mut chunk = Box::new(LinesChunk::new(self.delimiter)); while (chunk.fill(reader)?).is_some() { - self.lines += chunk.lines as u64; - self.chunks.push_back(chunk); + self.push_back(chunk); let first = &self.chunks[0]; if self.lines - first.lines as u64 > self.num_print { - chunk = self.chunks.pop_front().unwrap(); - - self.lines -= chunk.lines as u64; + chunk = self.pop_front().unwrap(); } else { chunk = Box::new(LinesChunk::new(self.delimiter)); } } - if self.chunks.is_empty() { - // chunks is empty when a file is empty so quitting early here - return Ok(()); - } else { + if self.has_data() { let length = &self.chunks.len(); let last = &mut self.chunks[length - 1]; if !last.get_buffer().ends_with(&[self.delimiter]) { @@ -593,41 +652,95 @@ impl LinesChunkBuffer { // skip unnecessary chunks and save the first chunk which may hold some lines we have to // print - let chunk = loop { - // it's safe to call unwrap here because there is at least one chunk and sorting out - // more chunks than exist shouldn't be possible. - let chunk = self.chunks.pop_front().unwrap(); - - // skip is true as long there are enough lines left in the other stored chunks. - let skip = self.lines - chunk.lines as u64 > self.num_print; - if skip { - self.lines -= chunk.lines as u64; - } else { - break chunk; + while let Some(chunk) = self.pop_front() { + // this is false as long there are enough lines left in the other stored chunks. + if self.lines <= self.num_print { + // Calculate the number of lines to skip in the current chunk. The calculated value must be + // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without + // losses. + let skip_lines = + (self.lines + chunk.lines as u64).saturating_sub(self.num_print) as usize; + + let chunk = Box::new(LinesChunk::from_chunk(&chunk, skip_lines)); + self.push_front(chunk); + break; } - }; - - // Calculate the number of lines to skip in the current chunk. The calculated value must be - // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without - // losses. - let skip_lines = self.lines.saturating_sub(self.num_print) as usize; - let chunk = LinesChunk::from_chunk(&chunk, skip_lines); - self.chunks.push_front(Box::new(chunk)); + } - Ok(()) + Ok(self.bytes) } - pub fn print(&self, mut writer: impl Write) -> UResult<()> { + /// Writes the whole [`LinesChunkBuffer`] into a `writer`. + pub fn print(&self, writer: &mut impl Write) -> io::Result<()> { for chunk in &self.chunks { - chunk.print_bytes(&mut writer, 0)?; + chunk.print_bytes(writer, 0)?; } Ok(()) } + + /// Return the amount of lines this buffer has stored. + pub fn get_lines(&self) -> u64 { + self.lines + } + + /// Return true if this buffer has stored any bytes. + pub fn has_data(&self) -> bool { + !self.chunks.is_empty() + } + + /// Return and remove the first [`LinesChunk`] stored in this [`LinesChunkBuffer`]. + fn pop_front(&mut self) -> Option> { + let chunk = self.chunks.pop_front(); + if let Some(chunk) = chunk { + self.bytes -= chunk.len() as u64; + self.lines -= chunk.lines as u64; + Some(chunk) + } else { + None + } + } + + /// Add a [`LinesChunk`] at the end of this [`LinesChunkBuffer`]. + fn push_back(&mut self, chunk: Box) { + self.bytes += chunk.len() as u64; + self.lines += chunk.lines as u64; + self.chunks.push_back(chunk); + } + + /// Add a [`LinesChunk`] at the start of this [`LinesChunkBuffer`]. + fn push_front(&mut self, chunk: Box) { + self.bytes += chunk.len() as u64; + self.lines += chunk.lines as u64; + self.chunks.push_front(chunk); + } } #[cfg(test)] mod tests { - use crate::chunks::{BytesChunk, BUFFER_SIZE}; + use super::*; + use std::io::{BufWriter, Cursor}; + + fn fill_lines_chunk(chunk: &mut LinesChunk, data: &str) -> usize { + let mut cursor = Cursor::new(data); + let result = chunk.fill(&mut cursor); + assert!(result.is_ok()); + let option = result.unwrap(); + option.unwrap_or(0) + } + + fn fill_lines_chunk_buffer(buffer: &mut LinesChunkBuffer, data: &str) -> u64 { + let mut cursor = Cursor::new(data); + let result = buffer.fill(&mut cursor); + assert!(result.is_ok()); + result.unwrap() + } + + fn fill_bytes_chunk_buffer(buffer: &mut BytesChunkBuffer, data: &str) -> u64 { + let mut cursor = Cursor::new(data); + let result = buffer.fill(&mut cursor); + assert!(result.is_ok()); + result.unwrap() + } #[test] fn test_bytes_chunk_from_when_offset_is_zero() { @@ -701,4 +814,327 @@ mod tests { let new_chunk = BytesChunk::from_chunk(&chunk, 1); assert_eq!(0, new_chunk.bytes); } + + #[test] + fn test_lines_chunk_fill_when_unix_line_endings() { + let mut chunk = LinesChunk::new(b'\n'); + + let bytes = fill_lines_chunk(&mut chunk, ""); + assert_eq!(bytes, 0); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "\n"); + assert_eq!(bytes, 1); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a"); + assert_eq!(bytes, 1); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "aa"); + assert_eq!(bytes, 2); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE + 1).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), 0); + + let bytes = fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE / 2).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + let bytes = fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + let bytes = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE); + + let bytes = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE + 1).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE); + } + + #[test] + fn test_lines_chunk_fill_when_windows_line_endings() { + let mut chunk = LinesChunk::new(b'\n'); + + let bytes = fill_lines_chunk(&mut chunk, "\r\n"); + assert_eq!(bytes, 2); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\n"); + assert_eq!(bytes, 3); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\na"); + assert_eq!(bytes, 4); + assert_eq!(chunk.get_lines(), 1); + + let bytes = fill_lines_chunk(&mut chunk, "a\r\na\r\n"); + assert_eq!(bytes, 6); + assert_eq!(chunk.get_lines(), 2); + + let bytes = fill_lines_chunk(&mut chunk, "\r\n".repeat(BUFFER_SIZE / 2).as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2); + + // tests the correct amount of lines when \r\n is split across different chunks + let mut data = "\r\n".repeat(BUFFER_SIZE / 2 - 1); + data.push('a'); + data.push('\r'); + data.push('\n'); + let bytes = fill_lines_chunk(&mut chunk, data.as_str()); + assert_eq!(bytes, BUFFER_SIZE); + assert_eq!(chunk.get_lines(), BUFFER_SIZE / 2 - 1); + } + + #[test] + fn test_lines_chunk_when_print_lines_no_offset_then_correct_amount_of_bytes() { + let mut chunk = LinesChunk::new(b'\n'); + let expected = fill_lines_chunk(&mut chunk, ""); + + let mut writer = BufWriter::new(vec![]); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "\n"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "a\n"); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + let expected = fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected); + + fill_lines_chunk(&mut chunk, "a\n".repeat(BUFFER_SIZE / 2 + 1).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE); + } + + #[test] + fn test_lines_chunk_when_print_lines_with_offset_then_correct_amount_of_bytes() { + let mut chunk = LinesChunk::new(b'\n'); + fill_lines_chunk(&mut chunk, ""); + + let mut writer = BufWriter::new(vec![]); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 2); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a"); + let result = chunk.print_lines(&mut writer, 100); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\n\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 1); + + fill_lines_chunk(&mut chunk, "a\na\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 2); + + fill_lines_chunk(&mut chunk, "a\na\n"); + let result = chunk.print_lines(&mut writer, 2); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "a\naa\n"); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 3); + + fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 0); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE); + + fill_lines_chunk(&mut chunk, "a".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), BUFFER_SIZE - 1); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, BUFFER_SIZE - 1); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 1); + + fill_lines_chunk(&mut chunk, "\n".repeat(BUFFER_SIZE).as_str()); + let result = chunk.print_lines(&mut writer, BUFFER_SIZE); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + } + + #[test] + fn test_lines_chunk_buffer_fill_when_num_print_is_equal_to_size() { + let size = 0; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "\n"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "\n".repeat(size).as_str()); + assert_eq!(buffer.get_lines(), size as u64); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut data = "a".repeat(BUFFER_SIZE); + data.push('\n'); + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, data.as_str()); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE + 1; + let mut data = "a".repeat(BUFFER_SIZE - 1); + data.push('\n'); + data.push('\n'); + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, data.as_str()); + assert_eq!(buffer.get_lines(), 2); + assert_eq!(bytes, size as u64); + + let size = BUFFER_SIZE * 2; + let mut buffer = LinesChunkBuffer::new(b'\n', size as u64); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + } + + #[test] + fn test_lines_chunk_buffer_fill_when_num_print_is_not_equal_to_size() { + let size = 0; + let mut buffer = LinesChunkBuffer::new(b'\n', 1); + let bytes = fill_lines_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = LinesChunkBuffer::new(b'\n', 2); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 1); + assert_eq!(bytes, size as u64); + + let mut buffer = LinesChunkBuffer::new(b'\n', 0); + let bytes = fill_lines_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_lines(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + } + + #[test] + fn test_bytes_chunk_buffer_fill_when_num_print_is_equal_to_size() { + let size = 0; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, size as u64); + assert!(!buffer.has_data()); + + let size = 1; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 1); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE + 1; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + + let size = BUFFER_SIZE * 2; + let mut buffer = BytesChunkBuffer::new(size as u64); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a".repeat(size).as_str()); + assert_eq!(buffer.get_bytes(), size as u64); + assert_eq!(bytes, size as u64); + assert!(buffer.has_data()); + } + + #[test] + fn test_bytes_chunk_buffer_fill_when_num_print_is_not_equal_to_size() { + let mut buffer = BytesChunkBuffer::new(0); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + + let mut buffer = BytesChunkBuffer::new(1); + let bytes = fill_bytes_chunk_buffer(&mut buffer, ""); + assert_eq!(buffer.get_bytes(), 0); + assert_eq!(bytes, 0); + assert!(!buffer.has_data()); + + let mut buffer = BytesChunkBuffer::new(2); + let bytes = fill_bytes_chunk_buffer(&mut buffer, "a"); + assert_eq!(buffer.get_bytes(), 1); + assert_eq!(bytes, 1); + assert!(buffer.has_data()); + } } diff --git a/src/uu/tail/src/follow/files.rs b/src/uu/tail/src/follow/files.rs index 8686e73f418..74639243e54 100644 --- a/src/uu/tail/src/follow/files.rs +++ b/src/uu/tail/src/follow/files.rs @@ -12,7 +12,7 @@ use crate::text; use std::collections::hash_map::Keys; use std::collections::HashMap; use std::fs::{File, Metadata}; -use std::io::{stdout, BufRead, BufReader, BufWriter}; +use std::io::{stdout, BufRead, BufReader, BufWriter, Write}; use std::path::{Path, PathBuf}; use uucore::error::UResult; @@ -147,8 +147,9 @@ impl FileHandling { } let stdout = stdout(); - let writer = BufWriter::new(stdout.lock()); - chunks.print(writer)?; + let mut writer = BufWriter::new(stdout.lock()); + chunks.print(&mut writer)?; + writer.flush()?; self.last.replace(path.to_owned()); self.update_metadata(path, None); diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index e07616c6f59..749b21a212d 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -151,7 +151,7 @@ fn tail_file( && file.is_seekable(if input.is_stdin() { offset } else { 0 }) && metadata.as_ref().unwrap().get_block_size() > 0 { - bounded_tail(&mut file, settings); + bounded_tail(&mut file, settings)?; reader = BufReader::new(file); } else { reader = BufReader::new(file); @@ -287,11 +287,7 @@ fn tail_stdin( /// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); /// assert_eq!(i, 2); /// ``` -fn forwards_thru_file( - reader: &mut R, - num_delimiters: u64, - delimiter: u8, -) -> std::io::Result +fn forwards_thru_file(reader: &mut R, num_delimiters: u64, delimiter: u8) -> io::Result where R: Read, { @@ -320,12 +316,14 @@ where /// Iterate over bytes in the file, in reverse, until we find the /// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the /// position just after that delimiter. -fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { +fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) -> io::Result<()> { // This variable counts the number of delimiters found in the file // so far (reading from the end of the file toward the beginning). let mut counter = 0; - for (block_idx, slice) in ReverseChunks::new(file).enumerate() { + for (block_idx, slice) in ReverseChunks::new(file)?.enumerate() { + let slice = slice?; + // Iterate over each byte in the slice in reverse order. let mut iter = slice.iter().enumerate().rev(); @@ -350,12 +348,14 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { // cursor in the file is at the *beginning* of the // block, so seeking forward by `i + 1` bytes puts // us right after the found delimiter. - file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); - return; + file.seek(SeekFrom::Current((i + 1) as i64))?; + return Ok(()); } } } } + + Ok(()) } /// When tail'ing a file, we do not need to read the whole file from start to @@ -363,54 +363,54 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(file: &mut File, settings: &Settings) { +fn bounded_tail(file: &mut File, settings: &Settings) -> io::Result { debug_assert!(!settings.presume_input_pipe); // Find the position in the file to start printing from. match &settings.mode { FilterMode::Lines(Signum::Negative(count), delimiter) => { - backwards_thru_file(file, *count, *delimiter); + backwards_thru_file(file, *count, *delimiter)?; } FilterMode::Lines(Signum::Positive(count), delimiter) if count > &1 => { - let i = forwards_thru_file(file, *count - 1, *delimiter).unwrap(); - file.seek(SeekFrom::Start(i as u64)).unwrap(); + let i = forwards_thru_file(file, *count - 1, *delimiter)?; + file.seek(SeekFrom::Start(i as u64))?; } FilterMode::Lines(Signum::MinusZero, _) => { - return; + return Ok(0); } FilterMode::Bytes(Signum::Negative(count)) => { - let len = file.seek(SeekFrom::End(0)).unwrap(); - file.seek(SeekFrom::End(-((*count).min(len) as i64))) - .unwrap(); + let len = file.seek(SeekFrom::End(0))?; + file.seek(SeekFrom::End(-((*count).min(len) as i64)))?; } FilterMode::Bytes(Signum::Positive(count)) if count > &1 => { // GNU `tail` seems to index bytes and lines starting at 1, not // at 0. It seems to treat `+0` and `+1` as the same thing. - file.seek(SeekFrom::Start(*count - 1)).unwrap(); + file.seek(SeekFrom::Start(*count - 1))?; } FilterMode::Bytes(Signum::MinusZero) => { - return; + return Ok(0); } _ => {} } // Print the target section of the file. let stdout = stdout(); - let mut stdout = stdout.lock(); - std::io::copy(file, &mut stdout).unwrap(); + let mut writer = BufWriter::new(stdout.lock()); + io::copy(file, &mut writer).and_then(|a| writer.flush().map(|_| a)) } -fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UResult<()> { +fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io::Result { let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); - match &settings.mode { + let result = match &settings.mode { FilterMode::Lines(Signum::Negative(count), sep) => { let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count); - chunks.fill(reader)?; + let bytes = chunks.fill(reader)?; chunks.print(&mut writer)?; + Ok(bytes) } FilterMode::Lines(Signum::PlusZero | Signum::Positive(1), _) => { - io::copy(reader, &mut writer)?; + io::copy(reader, &mut writer) } FilterMode::Lines(Signum::Positive(count), sep) => { let mut num_skip = *count - 1; @@ -424,50 +424,60 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UR } } if chunk.has_data() { - chunk.print_lines(&mut writer, num_skip as usize)?; - io::copy(reader, &mut writer)?; + let bytes = chunk.print_lines(&mut writer, num_skip as usize)?; + io::copy(reader, &mut writer).map(|b| b + bytes as u64) + } else { + Ok(0) } } FilterMode::Bytes(Signum::Negative(count)) => { let mut chunks = chunks::BytesChunkBuffer::new(*count); - chunks.fill(reader)?; + let bytes = chunks.fill(reader)?; chunks.print(&mut writer)?; + Ok(bytes) } - FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => { - io::copy(reader, &mut writer)?; - } + FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => io::copy(reader, &mut writer), FilterMode::Bytes(Signum::Positive(count)) => { let mut num_skip = *count - 1; let mut chunk = chunks::BytesChunk::new(); - loop { + let mut sum_bytes = 0u64; + let bytes = loop { if let Some(bytes) = chunk.fill(reader)? { let bytes: u64 = bytes as u64; match bytes.cmp(&num_skip) { Ordering::Less => num_skip -= bytes, Ordering::Equal => { - break; + break None; } Ordering::Greater => { - writer.write_all(chunk.get_buffer_with(num_skip as usize))?; - break; + let buffer = chunk.get_buffer_with(num_skip as usize); + writer.write_all(buffer)?; + sum_bytes += buffer.len() as u64; + break None; } } } else { - return Ok(()); + break Some(sum_bytes); } - } + }; - io::copy(reader, &mut writer)?; + if let Some(bytes) = bytes { + Ok(bytes) + } else { + io::copy(reader, &mut writer).map(|b| b + sum_bytes) + } } - _ => {} - } - Ok(()) + _ => Ok(0), + }; + + writer.flush()?; + result } #[cfg(test)] mod tests { - use crate::forwards_thru_file; + use crate::*; use std::io::Cursor; #[test] From d2b82410a96e8acfedc3b7615d19e1104a4c43c8 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 16:19:57 +0200 Subject: [PATCH 4/6] tail/args: Fix parsing follow mode has wrong mode when -F together with --retry --- src/uu/tail/src/args.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 177ff541a12..5254d3d8a74 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -202,6 +202,9 @@ impl Settings { ..Default::default() }; + settings.retry = + matches.get_flag(options::FOLLOW_RETRY) || matches.get_flag(options::RETRY); + if let Some(source) = matches.get_one::(options::SLEEP_INT) { // Advantage of `fundu` over `Duration::(try_)from_secs_f64(source.parse().unwrap())`: // * doesn't panic on errors like `Duration::from_secs_f64` would. @@ -544,7 +547,6 @@ pub fn uu_app() -> Command { Arg::new(options::FOLLOW_RETRY) .short('F') .help("Same as --follow=name --retry") - .overrides_with_all([options::RETRY, options::FOLLOW]) .action(ArgAction::SetTrue), ) .arg( From dc7826184c92962b7d688582b7363e11579025b8 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 16:47:13 +0200 Subject: [PATCH 5/6] tail: Big refactor of error handling in all affected modules ... tail.rs: * Change and simplify the control flow to try to open the input or file first and then react appropriately on errors. Note this also fixes some misbehavior on macos when a fifo pointed to a directory and other minor related issues. * Differentiate between read and write errors to exit on write errors. Read errors in contrast are not fatal and are handled like the other `TailError` types in the `TailErrorHandler` in the newly created error.rs module. * Return `Result` from unbounded_tail and bounded_tail. The Ok result (bytes read and printed) is currently not used but can be used later in the follow module. * Fix printing of Fifos multiple times. For example `tail - - < file` printed the content of the file twice). Fifos should behave like other standard input in such cases. follow/files.rs: Add "untailable" files separately in an own method to the set of watched files. paths.rs: * Remove the resolve method and replace the workaround with the more reliable open method which also differentiates between a fifo and a pipe in case of stdin. It uses an improved version of `FileExtTail::is_seekable` to be able to do so. * Cleanup the redundant `stdin_is_bad_fd` function --- src/uu/tail/src/error.rs | 245 +++++++++++++++++++++++++ src/uu/tail/src/follow/watch.rs | 29 ++- src/uu/tail/src/paths.rs | 67 +++---- src/uu/tail/src/tail.rs | 310 ++++++++++++++------------------ 4 files changed, 437 insertions(+), 214 deletions(-) create mode 100644 src/uu/tail/src/error.rs diff --git a/src/uu/tail/src/error.rs b/src/uu/tail/src/error.rs new file mode 100644 index 00000000000..6fc3c69a721 --- /dev/null +++ b/src/uu/tail/src/error.rs @@ -0,0 +1,245 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore Uncategorized WSAEBADF stdlib untailable tailable + +use crate::paths::Input; +#[cfg(unix)] +use crate::text; +use crate::{follow::Observer, paths::MetadataExtTail}; +use std::{fs::File, io}; +use uucore::error::{set_exit_code, FromIo, UIoError, UResult, USimpleError}; + +#[cfg(windows)] +pub fn new_io_directory_error() -> io::Error { + io::Error::from_raw_os_error(336) +} + +#[cfg(not(windows))] +pub fn new_io_directory_error() -> io::Error { + io::Error::from_raw_os_error(21) +} + +#[derive(Debug)] +pub enum TailError { + Stat(io::Error), + Open(io::Error), + Read(io::Error), + Write(io::Error), +} + +/// Print and format the error message of an [`TailError`]. +/// +/// Can be used testing purposes, to store the error message instead of printing to `stderr`. +pub struct TailErrorPrinter { + display_name: String, + prefix: String, +} + +impl TailErrorPrinter { + pub fn new(display_name: &str) -> Self { + Self { + display_name: String::from(display_name), + prefix: String::from(uucore::util_name()), + } + } + + pub fn format, T: AsRef>( + &self, + reason: Option, + message: T, + raw: bool, + ) -> String { + if raw { + format!("{}: {}", self.prefix, message.as_ref()) + } else { + let reason = reason.map_or( + format!("cannot open '{}' for reading", self.display_name), + |r| format!("{} '{}'", r.as_ref(), self.display_name), + ); + format!("{}: {}: {}", self.prefix, reason, message.as_ref()) + } + } + + pub fn print, T: AsRef>(&self, reason: &Option, message: T, raw: bool) { + let string = self.format(reason.as_ref(), message.as_ref(), raw); + eprintln!("{}", string); + } + + pub fn print_message>(&self, message: T) { + self.print::(&None, message, true); + } + + pub fn print_error(&self, error: &TailError) { + let message = error + .map_err_context(|| match error { + TailError::Stat(_) => format!("cannot fstat '{}'", self.display_name), + TailError::Open(_) => format!("cannot open '{}' for reading", self.display_name), + TailError::Read(_) => format!("error reading '{}'", self.display_name), + TailError::Write(_) => "write error".to_string(), + }) + .to_string(); + + self.print_message(message); + } +} + +impl FromIo> for &TailError { + fn map_err_context(self, context: impl FnOnce() -> String) -> Box { + let io_error = match self { + TailError::Stat(error) + | TailError::Open(error) + | TailError::Read(error) + | TailError::Write(error) => { + if let Some(raw) = error.raw_os_error() { + io::Error::from_raw_os_error(raw) + } else { + error.kind().into() + } + } + }; + io_error.map_err_context(context) + } +} + +/// Handle a [`TailError`]. +/// +/// This includes setting the exit code and printing of the error message. This handler needs an +/// [`Input`] to print the correct error message. +pub struct TailErrorHandler { + input: Input, + printer: TailErrorPrinter, + follow: bool, + retry: bool, +} + +impl TailErrorHandler { + /// Construct a new [`TailErrorHandler`] + pub fn new(input: Input, follow: bool, retry: bool) -> Self { + Self { + printer: TailErrorPrinter::new(input.display_name.as_str()), + input, + follow, + retry, + } + } + + /// Construct a new [`TailErrorHandler`] from an [`Input`] and the [`Observer`]. + pub fn from(input: Input, observer: &Observer) -> Self { + Self::new(input, observer.follow.is_some(), observer.retry) + } + + /// Handle the [`TailError`]. + /// + /// Sets the exit code to `1`, prints the correct error message based on the context in which an + /// [`io::Error`] happened + pub fn handle( + &self, + error: &TailError, + file: Option, + observer: &mut Observer, + ) -> UResult<()> { + set_exit_code(1); + self.printer.print_error(error); + + // TODO: what about bad stdin?? + match error { + TailError::Write(_) => return Err(USimpleError::new(1, "")), + TailError::Open(_) if self.input.is_stdin() => {} + TailError::Open(_) => { + // TODO: register bad file should take an Option instead of a path + observer.add_bad_path( + &self.input.path().unwrap(), + &self.input.display_name, + false, + )?; + } + TailError::Stat(_) | TailError::Read(_) + if cfg!(windows) && Self::is_directory_error(error) => + { + self.handle_untailable(); + + observer.add_untailable( + &self.input.path().unwrap(), + &self.input.display_name, + false, + )?; + } + TailError::Stat(_) | TailError::Read(_) if self.input.path().is_some() => { + // unwrap is safe here because only TailError::Open doesn't produce a File + let file = file.unwrap(); + match file.metadata() { + Ok(meta) if meta.is_tailable() => { + observer.add_bad_path( + &self.input.path().unwrap(), + &self.input.display_name, + false, + )?; + } + Ok(_) | Err(_) => { + self.handle_untailable(); + + observer.add_untailable( + &self.input.path().unwrap(), + &self.input.display_name, + false, + )?; + } + } + } + // FIXME: We're here on windows if the input was stdin, because we don't have a path on + // windows for stdin. Currently, the follow module depends on having a path for all + // inputs. This should be fixed. For now, we print the error message but don't follow + // the input and effectively do nothing. + TailError::Stat(_) | TailError::Read(_) => {} + } + + Ok(()) + } + + /// Return true if the [`io::Error`] is a `IsADirectory` error kind. + pub fn is_directory_error(error: &TailError) -> bool { + match error { + TailError::Stat(error) + | TailError::Open(error) + | TailError::Read(error) + | TailError::Write(error) => { + let os_error = error.raw_os_error(); + let message = error.to_string(); + + #[cfg(unix)] + return os_error.map_or(false, |n| n == 21) + || message.contains(text::IS_A_DIRECTORY); + + // This is the closest to a unix directory error I could find here + // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- + // in the rust stdlib `library/std/src/sys/windows/mod.rs` and `library/std/src/sys/windows/c/errors.rs` + // os_error == 336 maps to ErrorKind::IsADirectory (which is not stable + // at the time of writing this with rustc 1.60.0) + #[cfg(windows)] + return os_error.map_or(false, |n| n == 336) + || message.contains("An operation is not supported on a directory"); + } + } + } + + /// Depending on the follow mode and if `--retry` was given print the error message or not. + /// + /// No exit codes are set or any check is executed to ensure that the input is `tailable` or + /// not. This method only prints the error message. + pub fn handle_untailable(&self) { + if self.follow { + let msg = if self.retry { + "" + } else { + "; giving up on this name" + }; + self.printer.print_message(format!( + "{}: cannot follow end of this type of file{}", + self.input.display_name, msg, + )); + } + } +} diff --git a/src/uu/tail/src/follow/watch.rs b/src/uu/tail/src/follow/watch.rs index 966f3912040..22aa8c7a8fe 100644 --- a/src/uu/tail/src/follow/watch.rs +++ b/src/uu/tail/src/follow/watch.rs @@ -157,13 +157,14 @@ impl Observer { Ok(()) } + // FIXME: this doesn't work on windows pub fn add_stdin( &mut self, display_name: &str, reader: Option>, update_last: bool, ) -> UResult<()> { - if self.follow == Some(FollowMode::Descriptor) { + if self.follow_descriptor() { return self.add_path( &PathBuf::from(text::DEV_STDIN), display_name, @@ -188,6 +189,19 @@ impl Observer { Ok(()) } + pub fn add_untailable( + &mut self, + path: &Path, + display_name: &str, + update_last: bool, + ) -> UResult<()> { + if self.follow_name_retry() { + return self.add_path(path, display_name, None, update_last); + } + + Ok(()) + } + pub fn start(&mut self, settings: &Settings) -> UResult<()> { if settings.follow.is_none() { return Ok(()); @@ -470,7 +484,20 @@ impl Observer { } } +/* +POSIX specification regarding tail -f +If the input file is a regular file or if the file operand specifies a FIFO, do not +terminate after the last line of the input file has been copied, but read and copy +further bytes from the input file when they become available. If no file operand is +specified and standard input is a pipe or FIFO, the -f option shall be ignored. If +the input file is not a FIFO, pipe, or regular file, it is unspecified whether or +not the -f option shall be ignored. +*/ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { + if settings.follow.is_none() || settings.has_only_stdin() { + return Ok(()); + } + if observer.files.no_files_remaining(settings) && !observer.files.only_stdin_remaining() { return Err(USimpleError::new(1, text::NO_FILES_REMAINING.to_string())); } diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs index bf2a4e4c267..9643b714fdd 100644 --- a/src/uu/tail/src/paths.rs +++ b/src/uu/tail/src/paths.rs @@ -6,20 +6,31 @@ // spell-checker:ignore tailable seekable stdlib (stdlib) use crate::text; +use same_file::Handle; use std::ffi::OsStr; use std::fs::{File, Metadata}; -use std::io::{Seek, SeekFrom}; +use std::io::{self, Seek}; #[cfg(unix)] use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::path::{Path, PathBuf}; use uucore::error::UResult; +pub enum Opened { + File(File), + Fifo(File), + Pipe(File), +} + +/// The kind of input, either a `File` or `Stdin` defining an [`Input`] . #[derive(Debug, Clone)] pub enum InputKind { File(PathBuf), Stdin, } +/// Represent an input from the command line arguments. +/// +/// Is composed of an [`InputKind`] and a display name. #[derive(Debug, Clone)] pub struct Input { kind: InputKind, @@ -54,28 +65,34 @@ impl Input { } } - pub fn resolve(&self) -> Option { + pub fn open(&self) -> io::Result { match &self.kind { - InputKind::File(path) if path != &PathBuf::from(text::DEV_STDIN) => { - path.canonicalize().ok() - } - InputKind::File(_) | InputKind::Stdin => { - if cfg!(unix) { - match PathBuf::from(text::DEV_STDIN).canonicalize().ok() { - Some(path) if path != PathBuf::from(text::FD0) => Some(path), - Some(_) | None => None, - } + InputKind::File(path) => Ok(Opened::File(File::open(path)?)), + InputKind::Stdin => { + let mut handle = Handle::stdin()?; + let file = handle.as_file_mut(); + if file.is_seekable() { + Ok(Opened::Fifo(file.try_clone()?)) } else { - None + Ok(Opened::Pipe(file.try_clone()?)) } } } } - pub fn is_tailable(&self) -> bool { + #[cfg(unix)] + pub fn path(&self) -> Option { match &self.kind { - InputKind::File(path) => path_is_tailable(path), - InputKind::Stdin => self.resolve().map_or(false, |path| path_is_tailable(&path)), + InputKind::File(path) => Some(path.to_owned()), + InputKind::Stdin => Some(PathBuf::from(text::DEV_STDIN)), + } + } + + #[cfg(windows)] + pub fn path(&self) -> Option { + match &self.kind { + InputKind::File(path) => Some(path.to_owned()), + InputKind::Stdin => None, } } } @@ -120,16 +137,13 @@ impl HeaderPrinter { } pub trait FileExtTail { #[allow(clippy::wrong_self_convention)] - fn is_seekable(&mut self, current_offset: u64) -> bool; + fn is_seekable(&mut self) -> bool; } impl FileExtTail for File { /// Test if File is seekable. - /// Set the current position offset to `current_offset`. - fn is_seekable(&mut self, current_offset: u64) -> bool { + fn is_seekable(&mut self) -> bool { self.stream_position().is_ok() - && self.seek(SeekFrom::End(0)).is_ok() - && self.seek(SeekFrom::Start(current_offset)).is_ok() } } @@ -217,16 +231,3 @@ impl PathExtTail for Path { pub fn path_is_tailable(path: &Path) -> bool { path.is_file() || path.exists() && path.metadata().map_or(false, |meta| meta.is_tailable()) } - -#[inline] -pub fn stdin_is_bad_fd() -> bool { - // FIXME : Rust's stdlib is reopening fds as /dev/null - // see also: https://github.com/uutils/coreutils/issues/2873 - // (gnu/tests/tail-2/follow-stdin.sh fails because of this) - //#[cfg(unix)] - { - //platform::stdin_is_bad_fd() - } - //#[cfg(not(unix))] - false -} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 749b21a212d..e664dda8c76 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -18,6 +18,7 @@ pub mod args; pub mod chunks; +mod error; mod follow; mod parse; mod paths; @@ -27,16 +28,14 @@ pub mod text; pub use args::uu_app; use args::{parse_args, FilterMode, Settings, Signum}; use chunks::ReverseChunks; +use error::{new_io_directory_error, TailError, TailErrorHandler}; use follow::Observer; -use paths::{FileExtTail, HeaderPrinter, Input, InputKind, MetadataExtTail}; -use same_file::Handle; +use paths::{FileExtTail, HeaderPrinter, Input, MetadataExtTail, Opened}; use std::cmp::Ordering; use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write}; -use std::path::{Path, PathBuf}; use uucore::display::Quotable; -use uucore::error::{get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError}; -use uucore::{show, show_error}; +use uucore::error::{UResult, USimpleError}; #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { @@ -61,183 +60,114 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } fn uu_tail(settings: &Settings) -> UResult<()> { - let mut printer = HeaderPrinter::new(settings.verbose, true); + let mut header_printer = HeaderPrinter::new(settings.verbose, true); let mut observer = Observer::from(settings); observer.start(settings)?; - // Do an initial tail print of each path's content. - // Add `path` and `reader` to `files` map if `--follow` is selected. - for input in &settings.inputs.clone() { - match input.kind() { - InputKind::File(path) if cfg!(not(unix)) || path != &PathBuf::from(text::DEV_STDIN) => { - tail_file(settings, &mut printer, input, path, &mut observer, 0)?; - } - // File points to /dev/stdin here - InputKind::File(_) | InputKind::Stdin => { - tail_stdin(settings, &mut printer, input, &mut observer)?; - } - } - } - - if settings.follow.is_some() { - /* - POSIX specification regarding tail -f - If the input file is a regular file or if the file operand specifies a FIFO, do not - terminate after the last line of the input file has been copied, but read and copy - further bytes from the input file when they become available. If no file operand is - specified and standard input is a pipe or FIFO, the -f option shall be ignored. If - the input file is not a FIFO, pipe, or regular file, it is unspecified whether or - not the -f option shall be ignored. - */ - if !settings.has_only_stdin() { - follow::follow(observer, settings)?; - } - } - if get_exit_code() > 0 && paths::stdin_is_bad_fd() { - show_error!("-: {}", text::BAD_FD); + for input in &settings.inputs { + tail_input(input, settings, &mut header_printer, &mut observer)?; } - Ok(()) + follow::follow(observer, settings) } -fn tail_file( +// Do an initial tail print of each path's content. +// Add `path` and `reader` to `files` map if `--follow` is selected. +fn tail_input( + input: &Input, settings: &Settings, header_printer: &mut HeaderPrinter, - input: &Input, - path: &Path, observer: &mut Observer, - offset: u64, ) -> UResult<()> { - if !path.exists() { - set_exit_code(1); - show_error!( - "cannot open '{}' for reading: {}", - input.display_name, - text::NO_SUCH_FILE - ); - observer.add_bad_path(path, input.display_name.as_str(), false)?; - } else if path.is_dir() { - set_exit_code(1); - - header_printer.print_input(input); - let err_msg = "Is a directory".to_string(); - - show_error!("error reading '{}': {}", input.display_name, err_msg); - if settings.follow.is_some() { - let msg = if settings.retry { - "" - } else { - "; giving up on this name" - }; - show_error!( - "{}: cannot follow end of this type of file{}", - input.display_name, - msg - ); + let result: Result, TailError)> = match input.open() { + Ok(Opened::File(mut file)) => match tail_file(settings, header_printer, input, &mut file) { + Ok(_) => Ok(file), + Err(error) => Err((Some(file), error)), + }, + // looks like windows doesn't handle file seeks properly when the file is a fifo so we + // use unbounded tail (in tail_stdin()) which does the right thing. + Ok(Opened::Fifo(mut file)) if !cfg!(windows) => { + match tail_file(settings, header_printer, input, &mut file) { + Ok(_) => Ok(file), + Err(error) => Err((Some(file), error)), + } } - if !(observer.follow_name_retry()) { - // skip directory if not retry - return Ok(()); + Ok(Opened::Fifo(file)) | Ok(Opened::Pipe(file)) => { + match tail_stdin(settings, header_printer, input) { + Ok(_) => Ok(file), + Err(error) => Err((Some(file), error)), + } } - observer.add_bad_path(path, input.display_name.as_str(), false)?; - } else if input.is_tailable() { - let metadata = path.metadata().ok(); - match File::open(path) { - Ok(mut file) => { - header_printer.print_input(input); - let mut reader; - if !settings.presume_input_pipe - && file.is_seekable(if input.is_stdin() { offset } else { 0 }) - && metadata.as_ref().unwrap().get_block_size() > 0 - { - bounded_tail(&mut file, settings)?; - reader = BufReader::new(file); + // Unlike unix systems, windows returns an error (Permission denied) when trying to open + // directories like regular files (i.e. with File::open). So we have to fix that to + // maintain compatible behavior with the unix version of uu_tail + Err(error) if cfg!(windows) => { + if let Some(meta) = input.path().and_then(|path| path.metadata().ok()) { + if meta.is_dir() { + header_printer.print_input(input); + Err((None, TailError::Read(new_io_directory_error()))) } else { - reader = BufReader::new(file); - unbounded_tail(&mut reader, settings)?; + Err((None, TailError::Open(error))) } - observer.add_path( - path, - input.display_name.as_str(), - Some(Box::new(reader)), - true, - )?; - } - Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { - observer.add_bad_path(path, input.display_name.as_str(), false)?; - show!(e.map_err_context(|| { - format!("cannot open '{}' for reading", input.display_name) - })); - } - Err(e) => { - observer.add_bad_path(path, input.display_name.as_str(), false)?; - return Err(e.map_err_context(|| { - format!("cannot open '{}' for reading", input.display_name) - })); + } else { + Err((None, TailError::Open(error))) } } - } else { - observer.add_bad_path(path, input.display_name.as_str(), false)?; - } + // We do not print the header if we were unable to open the file to match gnu's tail + // behavior + Err(error) => Err((None, TailError::Open(error))), + }; + match result { + Ok(file) if input.is_stdin() => { + let reader = Box::new(BufReader::new(file)); + observer.add_stdin(input.display_name.as_str(), Some(reader), true)?; + } + Ok(file) => { + let reader = Box::new(BufReader::new(file)); + observer.add_path( + // we can safely unwrap here because path is None only on windows if input is + // stdin + input.path().unwrap().as_path(), + input.display_name.as_str(), + Some(reader), + true, + )?; + } + Err((file, error)) => { + let handler = TailErrorHandler::from(input.clone(), observer); + handler.handle(&error, file, observer)?; + } + } Ok(()) } -fn tail_stdin( +fn tail_file( settings: &Settings, header_printer: &mut HeaderPrinter, input: &Input, - observer: &mut Observer, -) -> UResult<()> { - match input.resolve() { - // fifo - Some(path) => { - let mut stdin_offset = 0; - if cfg!(unix) { - // Save the current seek position/offset of a stdin redirected file. - // This is needed to pass "gnu/tests/tail-2/start-middle.sh" - if let Ok(mut stdin_handle) = Handle::stdin() { - if let Ok(offset) = stdin_handle.as_file_mut().stream_position() { - stdin_offset = offset; - } - } - } - tail_file( - settings, - header_printer, - input, - &path, - observer, - stdin_offset, - )?; - } - // pipe - None => { - header_printer.print_input(input); - if paths::stdin_is_bad_fd() { - set_exit_code(1); - show_error!( - "cannot fstat {}: {}", - text::STDIN_HEADER.quote(), - text::BAD_FD - ); - if settings.follow.is_some() { - show_error!( - "error reading {}: {}", - text::STDIN_HEADER.quote(), - text::BAD_FD - ); - } - } else { - let mut reader = BufReader::new(stdin()); - unbounded_tail(&mut reader, settings)?; - observer.add_stdin(input.display_name.as_str(), Some(Box::new(reader)), true)?; - } - } - }; + file: &mut File, +) -> Result { + header_printer.print_input(input); - Ok(()) + let meta = file.metadata().map_err(TailError::Stat)?; + + if !settings.presume_input_pipe && file.is_seekable() && meta.get_block_size() > 0 { + bounded_tail(file, settings) + } else { + let mut reader = BufReader::new(file); + unbounded_tail(&mut reader, settings) + } +} + +fn tail_stdin( + settings: &Settings, + header_printer: &mut HeaderPrinter, + input: &Input, +) -> Result { + header_printer.print_input(input); + unbounded_tail(&mut BufReader::new(stdin()), settings) } /// Find the index after the given number of instances of a given byte. @@ -358,34 +288,51 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) -> i Ok(()) } +/// A helper function to copy the whole content of the `reader` into the `writer`. +/// +/// To differentiate between read and write errors this functions initially tries to read zero bytes +/// from the reader. This distinction is necessary, since write errors lead to the abortion of the +/// whole program and read errors do not. If the initial read fails a [`TailError::Read`] is +/// returned. If [`std::io::copy`] fails it'll return a [`TailError::Write`]. +fn copy(reader: &mut impl Read, writer: &mut impl Write) -> Result { + let mut zero_buffer = vec![]; + reader + .read(zero_buffer.as_mut_slice()) + .map_err(TailError::Read)?; + io::copy(reader, writer).map_err(TailError::Write) +} + /// When tail'ing a file, we do not need to read the whole file from start to /// finish just to find the last n lines or bytes. Instead, we can seek to the /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(file: &mut File, settings: &Settings) -> io::Result { +fn bounded_tail(file: &mut File, settings: &Settings) -> Result { debug_assert!(!settings.presume_input_pipe); // Find the position in the file to start printing from. match &settings.mode { FilterMode::Lines(Signum::Negative(count), delimiter) => { - backwards_thru_file(file, *count, *delimiter)?; + backwards_thru_file(file, *count, *delimiter).map_err(TailError::Read)?; } FilterMode::Lines(Signum::Positive(count), delimiter) if count > &1 => { - let i = forwards_thru_file(file, *count - 1, *delimiter)?; - file.seek(SeekFrom::Start(i as u64))?; + let i = forwards_thru_file(file, *count - 1, *delimiter).map_err(TailError::Read)?; + file.seek(SeekFrom::Start(i as u64)) + .map_err(TailError::Read)?; } FilterMode::Lines(Signum::MinusZero, _) => { return Ok(0); } FilterMode::Bytes(Signum::Negative(count)) => { - let len = file.seek(SeekFrom::End(0))?; - file.seek(SeekFrom::End(-((*count).min(len) as i64)))?; + let len = file.seek(SeekFrom::End(0)).map_err(TailError::Read)?; + file.seek(SeekFrom::End(-((*count).min(len) as i64))) + .map_err(TailError::Read)?; } FilterMode::Bytes(Signum::Positive(count)) if count > &1 => { // GNU `tail` seems to index bytes and lines starting at 1, not // at 0. It seems to treat `+0` and `+1` as the same thing. - file.seek(SeekFrom::Start(*count - 1))?; + file.seek(SeekFrom::Start(*count - 1)) + .map_err(TailError::Read)?; } FilterMode::Bytes(Signum::MinusZero) => { return Ok(0); @@ -396,26 +343,27 @@ fn bounded_tail(file: &mut File, settings: &Settings) -> io::Result { // Print the target section of the file. let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); - io::copy(file, &mut writer).and_then(|a| writer.flush().map(|_| a)) + copy(file, &mut writer).and_then(|a| writer.flush().map(|_| a).map_err(TailError::Write)) } -fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io::Result { +fn unbounded_tail( + reader: &mut BufReader, + settings: &Settings, +) -> Result { let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); let result = match &settings.mode { FilterMode::Lines(Signum::Negative(count), sep) => { let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count); - let bytes = chunks.fill(reader)?; - chunks.print(&mut writer)?; + let bytes = chunks.fill(reader).map_err(TailError::Read)?; + chunks.print(&mut writer).map_err(TailError::Write)?; Ok(bytes) } - FilterMode::Lines(Signum::PlusZero | Signum::Positive(1), _) => { - io::copy(reader, &mut writer) - } + FilterMode::Lines(Signum::PlusZero | Signum::Positive(1), _) => copy(reader, &mut writer), FilterMode::Lines(Signum::Positive(count), sep) => { let mut num_skip = *count - 1; let mut chunk = chunks::LinesChunk::new(*sep); - while chunk.fill(reader)?.is_some() { + while chunk.fill(reader).map_err(TailError::Read)?.is_some() { let lines = chunk.get_lines() as u64; if lines < num_skip { num_skip -= lines; @@ -424,25 +372,27 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io } } if chunk.has_data() { - let bytes = chunk.print_lines(&mut writer, num_skip as usize)?; - io::copy(reader, &mut writer).map(|b| b + bytes as u64) + let bytes = chunk + .print_lines(&mut writer, num_skip as usize) + .map_err(TailError::Write)?; + copy(reader, &mut writer).map(|b| b + bytes as u64) } else { Ok(0) } } FilterMode::Bytes(Signum::Negative(count)) => { let mut chunks = chunks::BytesChunkBuffer::new(*count); - let bytes = chunks.fill(reader)?; - chunks.print(&mut writer)?; + let bytes = chunks.fill(reader).map_err(TailError::Read)?; + chunks.print(&mut writer).map_err(TailError::Write)?; Ok(bytes) } - FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => io::copy(reader, &mut writer), + FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => copy(reader, &mut writer), FilterMode::Bytes(Signum::Positive(count)) => { let mut num_skip = *count - 1; let mut chunk = chunks::BytesChunk::new(); let mut sum_bytes = 0u64; let bytes = loop { - if let Some(bytes) = chunk.fill(reader)? { + if let Some(bytes) = chunk.fill(reader).map_err(TailError::Read)? { let bytes: u64 = bytes as u64; match bytes.cmp(&num_skip) { Ordering::Less => num_skip -= bytes, @@ -451,7 +401,7 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io } Ordering::Greater => { let buffer = chunk.get_buffer_with(num_skip as usize); - writer.write_all(buffer)?; + writer.write_all(buffer).map_err(TailError::Write)?; sum_bytes += buffer.len() as u64; break None; } @@ -464,13 +414,13 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> io if let Some(bytes) = bytes { Ok(bytes) } else { - io::copy(reader, &mut writer).map(|b| b + sum_bytes) + copy(reader, &mut writer).map(|b| b + sum_bytes) } } _ => Ok(0), }; - writer.flush()?; + writer.flush().map_err(TailError::Write)?; result } From a4de92bee0cd48b226c43f7889460039b736894b Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Sun, 9 Apr 2023 17:18:16 +0200 Subject: [PATCH 6/6] tail: Cleanup code, fix clippy warnings and add documentation --- src/uu/tail/src/args.rs | 45 +++++++++++++++++++++++++---- src/uu/tail/src/follow/files.rs | 10 ++++--- src/uu/tail/src/platform/mod.rs | 7 +---- src/uu/tail/src/platform/unix.rs | 15 ++-------- src/uu/tail/src/platform/windows.rs | 7 ++--- 5 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index 5254d3d8a74..ba02e90652e 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -5,6 +5,8 @@ // spell-checker:ignore (ToDO) kqueue Signum fundu +//! Parse the arguments passed to `tail` into a [`Settings`] struct. + use crate::paths::Input; use crate::{parse, platform, Quotable}; use clap::crate_version; @@ -41,6 +43,7 @@ pub mod options { pub static PRESUME_INPUT_PIPE: &str = "-presume-input-pipe"; // NOTE: three hyphens is correct } +/// Represent a `u64` with sign. `0` is a special value and can be negative or positive. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Signum { Negative(u64), @@ -49,8 +52,12 @@ pub enum Signum { MinusZero, } +/// The tail operation mode. Can be either `Lines` or `Bytes`. +/// +/// `Lines` for the `-n` option and `Bytes` for the `-c` option. #[derive(Debug, PartialEq, Eq)] pub enum FilterMode { + /// Mode for bytes. Bytes(Signum), /// Mode for lines delimited by delimiter as u8 @@ -116,29 +123,56 @@ impl Default for FilterMode { } } +/// The `tail` follow mode given by the `--follow` flag. +/// +/// Can bei either `Descriptor` (`--follow=descriptor`) which is the default or `Name` +/// (`--follow=name`) #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum FollowMode { Descriptor, Name, } +/// The result returned from [`Settings::verify`]. #[derive(Debug)] pub enum VerificationResult { + /// Returned if [`Settings::verify`] has not detected any misconfiguration of the command line + /// arguments Ok, + + // Returned if one of the file arguments passed to `tail` is the stdin file descriptor (`-`) and + // `--follow=name` was given. CannotFollowStdinByName, + + /// Returned if tail will not output anything because of the configuration passed to `tail`. NoOutput, } +/// Store the configuration of `tail` parsed from the command line arguments (and defaults). +/// +/// This struct is designed to store the initial values given from user provided command line +/// arguments if present or sane defaults if not. The fields of `Settings` and `Settings` itself +/// should be seen as constants. Please do not use `Settings` to store fields that need to be +/// mutable after the initialization of `Settings`. #[derive(Debug)] pub struct Settings { + /// `--follow`, `-f` and as part of `-F` pub follow: Option, + /// `--max-unchanged-stats` pub max_unchanged_stats: u32, + /// `--lines`, `-n` or `--bytes`, `-c` pub mode: FilterMode, + /// `--pid` pub pid: platform::Pid, + /// `--retry` and as part of `-F` pub retry: bool, + /// `--sleep-interval`, `-s` pub sleep_sec: Duration, + /// `--use-polling` (non standard: divergence to gnu's `tail` ) pub use_polling: bool, + /// `--verbose`, `-v` and `--quiet`, `-q` pub verbose: bool, + /// `---presume-input-pipe` pub presume_input_pipe: bool, /// `FILE(s)` positional arguments pub inputs: Vec, @@ -181,7 +215,7 @@ impl Settings { settings } - pub fn from(matches: &clap::ArgMatches) -> UResult { + pub fn from(matches: &ArgMatches) -> UResult { let mut settings: Self = Self { follow: if matches.get_flag(options::FOLLOW_RETRY) { Some(FollowMode::Name) @@ -323,10 +357,11 @@ impl Settings { } } - /// Verify [`Settings`] and try to find unsolvable misconfigurations of tail originating from - /// user provided command line arguments. In contrast to [`Settings::check_warnings`] these - /// misconfigurations usually lead to the immediate exit or abortion of the running `tail` - /// process. + /// Verify the [`Settings`] and try to find unsolvable misconfigurations of tail originating + /// from user provided command line arguments. + /// + /// In contrast to [`Settings::check_warnings`] these misconfigurations usually lead to the + /// immediate exit or abortion of the running `tail` process. pub fn verify(&self) -> VerificationResult { // Mimic GNU's tail for `tail -F` if self.inputs.iter().any(|i| i.is_stdin()) && self.follow == Some(FollowMode::Name) { diff --git a/src/uu/tail/src/follow/files.rs b/src/uu/tail/src/follow/files.rs index 74639243e54..a2f0d49601b 100644 --- a/src/uu/tail/src/follow/files.rs +++ b/src/uu/tail/src/follow/files.rs @@ -134,6 +134,7 @@ impl FileHandling { }; } + // TODO: return io::Result and handle io::Errors /// Read new data from `path` and print it to stdout pub fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult { let mut chunks = BytesChunkBuffer::new(u64::MAX); @@ -162,11 +163,11 @@ impl FileHandling { /// Decide if printing `path` needs a header based on when it was last printed pub fn needs_header(&self, path: &Path, verbose: bool) -> bool { if verbose { - if let Some(ref last) = self.last { - return !last.eq(&path); + return if let Some(ref last) = self.last { + !last.eq(&path) } else { - return true; - } + true + }; } false } @@ -174,6 +175,7 @@ impl FileHandling { /// Data structure to keep a handle on the BufReader, Metadata /// and the display_name (header_name) of files that are being followed. +/// FIXME: store File instead of a Buffered Reader pub struct PathData { pub reader: Option>, pub metadata: Option, diff --git a/src/uu/tail/src/platform/mod.rs b/src/uu/tail/src/platform/mod.rs index e5ae8b8d861..5d76c7f59c8 100644 --- a/src/uu/tail/src/platform/mod.rs +++ b/src/uu/tail/src/platform/mod.rs @@ -9,12 +9,7 @@ */ #[cfg(unix)] -pub use self::unix::{ - //stdin_is_bad_fd, stdin_is_pipe_or_fifo, supports_pid_checks, Pid, ProcessChecker, - supports_pid_checks, - Pid, - ProcessChecker, -}; +pub use self::unix::{supports_pid_checks, Pid, ProcessChecker}; #[cfg(windows)] pub use self::windows::{supports_pid_checks, Pid, ProcessChecker}; diff --git a/src/uu/tail/src/platform/unix.rs b/src/uu/tail/src/platform/unix.rs index ed34b2cf9bc..57269598ac6 100644 --- a/src/uu/tail/src/platform/unix.rs +++ b/src/uu/tail/src/platform/unix.rs @@ -16,11 +16,11 @@ use std::io::Error; pub type Pid = libc::pid_t; pub struct ProcessChecker { - pid: self::Pid, + pid: Pid, } impl ProcessChecker { - pub fn new(process_id: self::Pid) -> Self { + pub fn new(process_id: Pid) -> Self { Self { pid: process_id } } @@ -35,7 +35,7 @@ impl Drop for ProcessChecker { fn drop(&mut self) {} } -pub fn supports_pid_checks(pid: self::Pid) -> bool { +pub fn supports_pid_checks(pid: Pid) -> bool { unsafe { !(libc::kill(pid, 0) != 0 && get_errno() == libc::ENOSYS) } } @@ -43,12 +43,3 @@ pub fn supports_pid_checks(pid: self::Pid) -> bool { fn get_errno() -> i32 { Error::last_os_error().raw_os_error().unwrap() } - -//pub fn stdin_is_bad_fd() -> bool { -// FIXME: Detect a closed file descriptor, e.g.: `tail <&-` -// this is never `true`, even with `<&-` because Rust's stdlib is reopening fds as /dev/null -// see also: https://github.com/uutils/coreutils/issues/2873 -// (gnu/tests/tail-2/follow-stdin.sh fails because of this) -// unsafe { libc::fcntl(fd, libc::F_GETFD) == -1 && get_errno() == libc::EBADF } -//false -//} diff --git a/src/uu/tail/src/platform/windows.rs b/src/uu/tail/src/platform/windows.rs index 3e4cc7edc1e..d82f179299c 100644 --- a/src/uu/tail/src/platform/windows.rs +++ b/src/uu/tail/src/platform/windows.rs @@ -19,9 +19,8 @@ pub struct ProcessChecker { } impl ProcessChecker { - pub fn new(process_id: self::Pid) -> Self { - #[allow(non_snake_case)] - let FALSE: BOOL = 0; + pub fn new(process_id: Pid) -> Self { + const FALSE: BOOL = 0; let h = unsafe { OpenProcess(PROCESS_SYNCHRONIZE, FALSE, process_id) }; Self { dead: h == 0, @@ -50,6 +49,6 @@ impl Drop for ProcessChecker { } } -pub fn supports_pid_checks(_pid: self::Pid) -> bool { +pub fn supports_pid_checks(_pid: Pid) -> bool { true }