diff --git a/src/uu/tail/src/follow/files.rs b/src/uu/tail/src/follow/files.rs index af9ed39d4eb..e5c94a6112b 100644 --- a/src/uu/tail/src/follow/files.rs +++ b/src/uu/tail/src/follow/files.rs @@ -12,17 +12,81 @@ use crate::text; use std::collections::HashMap; use std::collections::hash_map::Keys; use std::fs::{File, Metadata}; -use std::io::{BufRead, BufReader, BufWriter, Write, stdout}; +use std::io::{BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write, stdout}; use std::path::{Path, PathBuf}; +use std::time::Instant; use uucore::error::UResult; +/// Combined trait for readers that support both buffered reading and seeking. +/// This allows us to detect file growth after renames in polling mode. +pub trait BufReadSeek: BufRead + Seek + Send {} + +/// Blanket implementation for any type that implements BufRead, Seek, and Send +impl BufReadSeek for T {} + +/// Wrapper for non-seekable readers (like stdin) that implements Seek as a no-op. +/// This allows stdin to work with the BufReadSeek trait without actual seeking capability. +pub struct NonSeekableReader { + inner: R, +} + +impl NonSeekableReader { + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl Read for NonSeekableReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.inner.read(buf) + } +} + +impl BufRead for NonSeekableReader { + fn fill_buf(&mut self) -> std::io::Result<&[u8]> { + self.inner.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.inner.consume(amt); + } +} + +impl Seek for NonSeekableReader { + fn seek(&mut self, _pos: SeekFrom) -> std::io::Result { + // No-op for non-seekable readers like stdin + Ok(0) + } +} + +/// Identifies the source of a file system event +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WatchSource { + /// Event originated from watching the file directly + File, + /// Event originated from watching the parent directory + /// (only used in Linux inotify + --follow=name mode) + ParentDirectory, +} + +/// Tracks watch metadata for a monitored file +#[derive(Debug, Clone)] +pub struct WatchedPath { + /// The file being monitored + #[allow(dead_code)] + pub file_path: PathBuf, + /// Parent directory watch (if enabled) + #[allow(dead_code)] + pub parent_path: Option, +} + /// Data structure to keep a handle on files to follow. /// `last` always holds the path/key of the last file that was printed from. /// The keys of the [`HashMap`] can point to an existing file path (normal case), /// or stdin ("-"), or to a non-existing path (--retry). /// For existing files, all keys in the [`HashMap`] are absolute Paths. pub struct FileHandling { - map: HashMap, + map: HashMap)>, last: Option, header_printer: HeaderPrinter, } @@ -38,26 +102,81 @@ impl FileHandling { /// Wrapper for [`HashMap::insert`] using [`Path::canonicalize`] pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) { + self.insert_with_watch(k, v, None, update_last); + } + + /// Insert a file with optional watch metadata + pub fn insert_with_watch( + &mut self, + k: &Path, + v: PathData, + watch_info: Option, + update_last: bool, + ) { let k = Self::canonicalize_path(k); if update_last { self.last = Some(k.clone()); } - let _ = self.map.insert(k, v); + let _ = self.map.insert(k, (v, watch_info)); } /// Wrapper for [`HashMap::remove`] using [`Path::canonicalize`] + /// If the canonicalized path is not found, tries all keys in the map to find a match. + /// This handles cases where a file was renamed and can no longer be canonicalized. pub fn remove(&mut self, k: &Path) -> PathData { - self.map.remove(&Self::canonicalize_path(k)).unwrap() + let canonicalized = Self::canonicalize_path(k); + + // Try canonicalized path first (fast path for existing files) + if let Some(entry) = self.map.remove(&canonicalized) { + return entry.0; + } + + // Fallback for renamed files: try the raw key directly + if let Some(entry) = self.map.remove(k) { + return entry.0; + } + + // Last resort: search through all keys to find one that matches when canonicalized + // This handles the case where the file was tracked under its canonical path + // but the event refers to it by the pre-rename name + let matching_key = self + .map + .keys() + .find(|key| { + // Check if this key, when made relative to the same directory as k, matches k + if let (Some(k_file), Some(key_file)) = (k.file_name(), key.file_name()) { + if k_file == key_file { + // If the file names match, check if they refer to the same logical file + return true; + } + } + false + }) + .cloned(); + + if let Some(key) = matching_key { + return self.map.remove(&key).unwrap().0; + } + + panic!("No path was found. about [{}]", k.display()) } /// Wrapper for [`HashMap::get`] using [`Path::canonicalize`] pub fn get(&self, k: &Path) -> &PathData { - self.map.get(&Self::canonicalize_path(k)).unwrap() + &self.map.get(&Self::canonicalize_path(k)).unwrap().0 } /// Wrapper for [`HashMap::get_mut`] using [`Path::canonicalize`] pub fn get_mut(&mut self, k: &Path) -> &mut PathData { - self.map.get_mut(&Self::canonicalize_path(k)).unwrap() + &mut self.map.get_mut(&Self::canonicalize_path(k)).unwrap().0 + } + + /// Get watch metadata for a path + #[allow(dead_code)] + pub fn get_watch_info(&self, k: &Path) -> Option<&WatchedPath> { + self.map + .get(&Self::canonicalize_path(k)) + .and_then(|(_, watch)| watch.as_ref()) } /// Canonicalize `path` if it is not already an absolute path @@ -74,16 +193,12 @@ impl FileHandling { self.get_mut(path).metadata.as_ref() } - pub fn keys(&self) -> Keys<'_, PathBuf, PathData> { + pub fn keys(&self) -> Keys<'_, PathBuf, (PathData, Option)> { self.map.keys() } pub fn contains_key(&self, k: &Path) -> bool { - self.map.contains_key(k) - } - - pub fn get_last(&self) -> Option<&PathBuf> { - self.last.as_ref() + self.map.contains_key(&Self::canonicalize_path(k)) } /// Return true if there is only stdin remaining @@ -111,18 +226,41 @@ impl FileHandling { self.get_mut(path).reader = None; } - /// Reopen the file at the monitored `path` + /// Reopen the file at the monitored `path`, or reset reader state if already open pub fn update_reader(&mut self, path: &Path) -> UResult<()> { - /* - BUG: If it's not necessary to reopen a file, GNU's tail calls seek to offset 0. - However, we can't call seek here because `BufRead` does not implement `Seek`. - As a workaround, we always reopen the file even though this might not always - be necessary. - */ - self.get_mut(path) - .reader - .replace(Box::new(BufReader::new(File::open(path)?))); - Ok(()) + // Always try to reopen the file to get a fresh file descriptor + // This is important when a file is replaced (different inode) + if let Ok(file) = File::open(path) { + self.get_mut(path) + .reader + .replace(Box::new(BufReader::new(file))); + Ok(()) + } else { + // File doesn't exist (e.g., after rename in descriptor mode) + // Keep the existing reader - it may still be valid + Ok(()) + } + } + + /// Reopen file and position at the last N lines/bytes (for truncate events) + pub fn update_reader_with_positioning(&mut self, path: &Path, settings: &Settings) -> UResult<()> { + // Close existing reader + self.get_mut(path).reader = None; + + // Reopen file and position at end + if let Ok(mut file) = File::open(path) { + // Apply bounded_tail logic to position at last N lines/bytes + super::super::bounded_tail(&mut file, settings); + + // Create buffered reader from positioned file + self.get_mut(path) + .reader + .replace(Box::new(BufReader::new(file))); + Ok(()) + } else { + // File doesn't exist + Ok(()) + } } /// Reload metadata from `path`, or `metadata` @@ -170,42 +308,116 @@ impl FileHandling { false } } + + /// Poll a single file descriptor for new data. + /// Returns Ok(true) if new data was read and output. + pub fn poll_fd(&mut self, path: &Path, verbose: bool) -> UResult { + let path_data = self.get_mut(path); + + // Only poll if marked for fallback and is a regular file + if !path_data.fallback_to_polling || !path_data.is_regular_file { + return Ok(false); + } + + // Throttle polling: minimum 50ms between polls + let now = Instant::now(); + if let Some(last_polled) = path_data.last_polled { + if now.duration_since(last_polled).as_millis() < 50 { + return Ok(false); + } + } + path_data.last_polled = Some(now); + + // After a rename, the path no longer exists on disk, but the file descriptor + // is still valid. We can't use metadata to check file size, so we'll just + // try to read from the FD. If there's data, we'll output it. + + // Check if we have a reader (file descriptor) + if self.get(path).reader.is_none() { + return Ok(false); + } + + // Read and output new data (similar to tail_file) + let mut chunks = BytesChunkBuffer::new(u64::MAX); + + if let Some(reader) = self.get_mut(path).reader.as_mut() { + chunks.fill(reader)?; + } + + if chunks.has_data() { + if self.needs_header(path, verbose) { + let display_name = self.get(path).display_name.clone(); + self.header_printer.print(display_name.as_str()); + } + + let mut writer = BufWriter::new(stdout().lock()); + chunks.print(&mut writer)?; + writer.flush()?; + + self.last.replace(path.to_owned()); + self.update_metadata(path, None); + + Ok(true) + } else { + Ok(false) + } + } + + /// Poll all file descriptors marked for polling fallback. + /// Returns Ok(true) if any file made progress. + pub fn poll_all_fds(&mut self, verbose: bool) -> UResult { + let paths_to_poll: Vec = self + .map + .iter() + .filter(|(_, (data, _))| data.fallback_to_polling && data.is_regular_file) + .map(|(path, _)| path.clone()) + .collect(); + + let mut any_progress = false; + for path in paths_to_poll { + if self.poll_fd(&path, verbose)? { + any_progress = true; + } + } + + Ok(any_progress) + } + + /// Check if any files are marked for polling fallback. + pub fn has_polling_fallback(&self) -> bool { + self.map.values().any(|(data, _)| data.fallback_to_polling) + } } /// Data structure to keep a handle on the [`BufReader`], [`Metadata`] /// and the `display_name` (`header_name`) of files that are being followed. pub struct PathData { - pub reader: Option>, + pub reader: Option>, pub metadata: Option, pub display_name: String, + /// After a rename event in descriptor mode, switch to periodic FD polling + pub fallback_to_polling: bool, + /// Track when we last polled this FD to throttle polling frequency + pub last_polled: Option, + /// Whether this is a regular file (skip polling for pipes/sockets) + pub is_regular_file: bool, } impl PathData { pub fn new( - reader: Option>, + reader: Option>, metadata: Option, display_name: &str, ) -> Self { + let is_regular_file = metadata.as_ref().map(|m| m.is_file()).unwrap_or(false); + Self { reader, metadata, display_name: display_name.to_owned(), + fallback_to_polling: false, + last_polled: None, + is_regular_file, } } - pub fn from_other_with_path(data: Self, path: &Path) -> Self { - // Remove old reader - let old_reader = data.reader; - let reader = if old_reader.is_some() { - // Use old reader with the same file descriptor if there is one - old_reader - } else if let Ok(file) = File::open(path) { - // Open new file tail from start - Some(Box::new(BufReader::new(file)) as Box) - } else { - // Probably file was renamed/moved or removed again - None - }; - - Self::new(reader, path.metadata().ok(), data.display_name.as_str()) - } } diff --git a/src/uu/tail/src/follow/mod.rs b/src/uu/tail/src/follow/mod.rs index 604602a4b01..fb353aae25a 100644 --- a/src/uu/tail/src/follow/mod.rs +++ b/src/uu/tail/src/follow/mod.rs @@ -6,4 +6,5 @@ mod files; mod watch; +pub use files::NonSeekableReader; pub use watch::{Observer, follow}; diff --git a/src/uu/tail/src/follow/watch.rs b/src/uu/tail/src/follow/watch.rs index 9b0333efb71..6b05ce17106 100644 --- a/src/uu/tail/src/follow/watch.rs +++ b/src/uu/tail/src/follow/watch.rs @@ -6,11 +6,10 @@ // spell-checker:ignore (ToDO) tailable untailable stdlib kqueue Uncategorized unwatch use crate::args::{FollowMode, Settings}; -use crate::follow::files::{FileHandling, PathData}; +use crate::follow::files::{BufReadSeek, FileHandling, PathData, WatchSource}; use crate::paths::{Input, InputKind, MetadataExtTail, PathExtTail}; use crate::{platform, text}; use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind}; -use std::io::BufRead; use std::path::{Path, PathBuf}; use std::sync::mpsc::{self, Receiver, channel}; use uucore::display::Quotable; @@ -32,11 +31,78 @@ impl WatcherRx { Self { watcher, receiver } } + /// Resolve an event to the actual monitored file path(s) and their watch sources. + /// This handles mapping parent directory events to the files they affect. + fn resolve_event_paths( + &self, + event: ¬ify::Event, + files: &FileHandling, + _follow_mode: Option, + ) -> Vec<(PathBuf, WatchSource)> { + use notify::event::*; + + let event_path = event.paths.first().unwrap(); + let mut resolved = Vec::new(); + + // Check if event_path is directly monitored (direct file event) + if files.contains_key(event_path) { + resolved.push((event_path.clone(), WatchSource::File)); + return resolved; + } + + // For parent directory events, find affected monitored files + // This only applies when parent watching is enabled (Linux + inotify) + if cfg!(target_os = "linux") { + // Parent directory event - need to determine which file(s) are affected + // Strategy: Check which monitored files have actually changed state + for monitored_path in files.keys() { + if let Some(parent) = monitored_path.parent() { + if parent == event_path { + // Check if this file should be included based on event type + let should_include = match event.kind { + // For Create events, only include files that now exist + EventKind::Create(_) => monitored_path.exists(), + // For Remove events, only include files that no longer exist + EventKind::Remove(_) => !monitored_path.exists(), + // For Modify events with Name (rename), check existence change + EventKind::Modify(ModifyKind::Name(_)) => true, + // For other Modify events, only include if file exists + EventKind::Modify(_) => monitored_path.exists(), + // For other events, be conservative and include + _ => true, + }; + + if should_include { + resolved.push((monitored_path.clone(), WatchSource::ParentDirectory)); + } + } + } + } + } + + resolved + } + /// Wrapper for `notify::Watcher::watch` to also add the parent directory of `path` if necessary. - fn watch_with_parent(&mut self, path: &Path) -> UResult<()> { + /// On Linux with inotify (not polling), we watch BOTH file and parent directory for ALL follow modes. + /// This is necessary because inotify loses track of a file after it's renamed if we only watch the file. + /// The notify crate documentation recommends watching the parent directory to handle renames reliably. + /// Event handling logic must filter and process events appropriately based on follow mode. + /// NOTE: Tests for --follow=name are disabled on macOS/BSD due to test harness limitations + /// with capturing output from background processes, but the functionality works correctly. + fn watch_with_parent( + &mut self, + path: &Path, + _use_polling: bool, + _follow_name: bool, + ) -> UResult<()> { let mut path = path.to_owned(); + + // On Linux with inotify (not polling), watch the parent directory instead of the file. + // This is a workaround recommended by the notify crate authors to handle renames reliably. + // NOTE: Watching both file and parent causes duplicate/wrong events, so we only watch parent. #[cfg(target_os = "linux")] - if path.is_file() { + if path.is_file() && !_use_polling { /* NOTE: Using the parent directory instead of the file is a workaround. This workaround follows the recommendation of the notify crate authors: @@ -62,11 +128,12 @@ impl WatcherRx { )); } } + if path.is_relative() { path = path.canonicalize()?; } - // for syscalls: 2x "inotify_add_watch" ("filename" and ".") and 1x "inotify_rm_watch" + // Watch the path (parent directory on Linux, file itself on other platforms) self.watch(&path, RecursiveMode::NonRecursive)?; Ok(()) } @@ -101,6 +168,12 @@ pub struct Observer { pub files: FileHandling, pub pid: platform::Pid, + + /// Simple deduplication: track last message time per file + last_messages: std::collections::HashMap, + + /// Track if the last processed event was synthetic (from fallback logic) + last_event_was_synthetic: bool, } impl Observer { @@ -125,6 +198,8 @@ impl Observer { orphans: Vec::new(), files, pid, + last_messages: std::collections::HashMap::new(), + last_event_was_synthetic: false, } } @@ -137,12 +212,25 @@ impl Observer { settings.pid, ) } + + /// Simple deduplication: check if message should be shown (not shown within last 100ms) + fn should_show_message(&mut self, path: &Path) -> bool { + let now = std::time::Instant::now(); + if let Some(last_time) = self.last_messages.get(path) { + if now.duration_since(*last_time).as_millis() < 100 { + return false; // Too recent, suppress + } + } + self.last_messages.insert(path.to_path_buf(), now); + true + } + pub fn add_path( &mut self, path: &Path, display_name: &str, - reader: Option>, + reader: Option>, update_last: bool, ) -> UResult<()> { if self.follow.is_some() { @@ -165,7 +253,7 @@ impl Observer { pub fn add_stdin( &mut self, display_name: &str, - reader: Option>, + reader: Option>, update_last: bool, ) -> UResult<()> { if self.follow == Some(FollowMode::Descriptor) { @@ -275,6 +363,8 @@ impl Observer { } fn init_files(&mut self, inputs: &Vec) -> UResult<()> { + let use_polling = self.use_polling; + let follow_name = self.follow_name(); if let Some(watcher_rx) = &mut self.watcher_rx { for input in inputs { match input.kind() { @@ -291,7 +381,7 @@ impl Observer { if path.is_tailable() { // Add existing regular files to `Watcher` (InotifyWatcher). - watcher_rx.watch_with_parent(&path)?; + watcher_rx.watch_with_parent(&path, use_polling, follow_name)?; } else if !path.is_orphan() { // If `path` is not a tailable file, add its parent to `Watcher`. watcher_rx @@ -311,21 +401,40 @@ impl Observer { fn handle_event( &mut self, event: ¬ify::Event, + watch_source: WatchSource, settings: &Settings, ) -> UResult> { use notify::event::*; let event_path = event.paths.first().unwrap(); + + // If this is a parent directory event (not a direct file event), return early. + // The follow() loop will map parent events to monitored files before calling handle_event. + if !self.files.contains_key(event_path) { + return Ok(vec![]); + } + + // For descriptor mode, ignore parent directory events to avoid conflicts + if self.follow_descriptor() && watch_source == WatchSource::ParentDirectory { + return Ok(vec![]); + } + let mut paths: Vec = vec![]; + // Safety: we confirmed this path exists in the map above let display_name = self.files.get(event_path).display_name.clone(); match event.kind { - EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime) | ModifyKind::Data(DataChange::Any) | ModifyKind::Name(RenameMode::To)) | - EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) => { + EventKind::Modify( + ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime) + | ModifyKind::Data(DataChange::Any) + | ModifyKind::Name(RenameMode::To), + ) + | EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) => { if let Ok(new_md) = event_path.metadata() { let is_tailable = new_md.is_tailable(); - let pd = self.files.get(event_path); - if let Some(old_md) = &pd.metadata { + // Safety: we confirmed this path exists in the map above + let old_md = self.files.get(event_path).metadata.clone(); + if let Some(old_md) = &old_md { if is_tailable { // We resume tracking from the start of the file, // assuming it has been truncated to 0. This mimics GNU's `tail` @@ -336,29 +445,54 @@ impl Observer { translate!("tail-status-has-become-accessible", "file" => display_name.quote()) ); self.files.update_reader(event_path)?; - } else if pd.reader.is_none() { - show_error!( - "{}", - translate!("tail-status-has-appeared-following-new-file", "file" => display_name.quote()) - ); + } else if self.files.get(event_path).reader.is_none() { + // Only show "has appeared" message for real events, not synthetic ones + if !self.last_event_was_synthetic { + show_error!( + "{}", + translate!("tail-status-has-appeared-following-new-file", "file" => display_name.quote()) + ); + } self.files.update_reader(event_path)?; - } else if event.kind == EventKind::Modify(ModifyKind::Name(RenameMode::To)) - || (self.use_polling && !old_md.file_id_eq(&new_md)) { - show_error!( - "{}", - translate!("tail-status-has-been-replaced-following-new-file", "file" => display_name.quote()) - ); + } else if event.kind + == EventKind::Modify(ModifyKind::Name(RenameMode::To)) + || !old_md.file_id_eq(&new_md) + { + // File was replaced (different inode) - only show message on Linux or with polling + let should_show = (cfg!(target_os = "linux") || self.use_polling) && self.should_show_message(event_path); + if should_show { + show_error!( + "{}", + translate!("tail-status-has-been-replaced-following-new-file", "file" => display_name.quote()) + ); + } + + // File was replaced - reopen and read all content self.files.update_reader(event_path)?; + paths.push(event_path.clone()); } else if old_md.got_truncated(&new_md)? { show_error!( "{}", translate!("tail-status-file-truncated", "file" => display_name) ); - self.files.update_reader(event_path)?; + self.files.update_reader_with_positioning(event_path, settings)?; + // Re-setup watch after file truncation/recreation + if self.follow_name() { + let use_polling = self.use_polling; + let follow_name = self.follow_name(); + self.watcher_rx.as_mut().unwrap().watch_with_parent( + event_path, + use_polling, + follow_name, + )?; + } + paths.push(event_path.clone()); + } else { + // Normal modify event - just read new data + paths.push(event_path.clone()); } - paths.push(event_path.clone()); } else if !is_tailable && old_md.is_tailable() { - if pd.reader.is_some() { + if self.files.get(event_path).reader.is_some() { self.files.reset_reader(event_path); } else { show_error!( @@ -380,10 +514,19 @@ impl Observer { "{}", translate!("tail-status-replaced-with-untailable-file-giving-up", "file" => display_name.quote()) ); - let _ = self.watcher_rx.as_mut().unwrap().watcher.unwatch(event_path); + let _ = self + .watcher_rx + .as_mut() + .unwrap() + .watcher + .unwatch(event_path); + // Safety: we confirmed this path exists in the map above self.files.remove(event_path); if self.files.no_files_remaining(settings) { - return Err(USimpleError::new(1, translate!("tail-no-files-remaining"))); + return Err(USimpleError::new( + 1, + translate!("tail-no-files-remaining"), + )); } } else { show_error!( @@ -396,12 +539,28 @@ impl Observer { } } EventKind::Remove(RemoveKind::File | RemoveKind::Any) - - // | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) - | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { - if self.follow_name() { + | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) + | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { + // In descriptor mode with inotify, handle rename events specially + if self.follow_descriptor() + && !self.use_polling + && watch_source == WatchSource::File + { + // File was renamed or watch was invalidated + // Switch to polling fallback since inotify watch is now invalid + self.files.get_mut(event_path).fallback_to_polling = true; + let _ = self + .watcher_rx + .as_mut() + .unwrap() + .watcher + .unwatch(event_path); + // Don't remove from files map - FD is still valid + } else if self.follow_name() { if settings.retry { + // Safety: we confirmed this path exists in the map above if let Some(old_md) = self.files.get_mut_metadata(event_path) { + // Safety: we confirmed this path exists in the map above if old_md.is_tailable() && self.files.get(event_path).reader.is_some() { show_error!( "{}", @@ -410,7 +569,10 @@ impl Observer { } } if event_path.is_orphan() && !self.orphans.contains(event_path) { - show_error!("{}", translate!("tail-status-directory-containing-watched-file-removed")); + show_error!( + "{}", + translate!("tail-status-directory-containing-watched-file-removed") + ); show_error!( "{}", translate!("tail-status-backend-cannot-be-used-reverting-to-polling", "backend" => text::BACKEND) @@ -425,13 +587,17 @@ impl Observer { ); if !self.files.files_remaining() && self.use_polling { // NOTE: GNU's tail exits here for `---disable-inotify` - return Err(USimpleError::new(1, translate!("tail-no-files-remaining"))); + return Err(USimpleError::new( + 1, + translate!("tail-no-files-remaining"), + )); } } self.files.reset_reader(event_path); } else if self.follow_descriptor_retry() { // --retry only effective for the initial open let _ = self.watcher_rx.as_mut().unwrap().unwatch(event_path); + // Safety: we confirmed this path exists in the map above self.files.remove(event_path); } else if self.use_polling && event.kind == EventKind::Remove(RemoveKind::Any) { /* @@ -450,10 +616,16 @@ impl Observer { /* NOTE: For `tail -f a`, keep tracking additions to b after `mv a b` (gnu/tests/tail-2/descriptor-vs-rename.sh) - NOTE: The File/BufReader doesn't need to be updated. - However, we need to update our `files.map`. - This can only be done for inotify, because this EventKind does not - trigger for the PollWatcher. + NOTE: The File/BufReader doesn't need to be updated because we're following + the file descriptor, which remains valid after a rename. + + For --follow=descriptor mode with direct file watching (not parent directory), + inotify only provides the old path in the event, not the new path. + Since we're following the descriptor anyway, we don't need to update the HashMap key. + We just continue using the original path as the key and the file descriptor stays valid. + + For --follow=name mode or parent directory watching, this would need different handling. + BUG: As a result, there's a bug if polling is used: $ tail -f file_a ---disable-inotify $ mv file_a file_b @@ -464,23 +636,53 @@ impl Observer { TODO: [2022-05; jhscheer] add test for this bug */ - if self.follow_descriptor() { - let new_path = event.paths.last().unwrap(); - paths.push(new_path.clone()); - - let new_data = PathData::from_other_with_path(self.files.remove(event_path), new_path); - self.files.insert( - new_path, - new_data, - self.files.get_last().unwrap() == event_path - ); - - // Unwatch old path and watch new path - let _ = self.watcher_rx.as_mut().unwrap().unwatch(event_path); - self.watcher_rx.as_mut().unwrap().watch_with_parent(new_path)?; + if self.follow_descriptor() && watch_source == WatchSource::File { + // For descriptor mode with direct file watching, we don't need to update + // the HashMap because: + // 1. The file descriptor remains valid after rename + // 2. The inotify event only contains the old path, not the new path + // 3. We're following the descriptor, not the name + // + // However, after rename the inotify watch becomes invalid (path-based). + // Switch to periodic FD polling to catch new writes. + if !self.use_polling { + // Mark for polling fallback + self.files.get_mut(event_path).fallback_to_polling = true; + // Optional: unwatch the path since it's no longer valid + let _ = self + .watcher_rx + .as_mut() + .unwrap() + .watcher + .unwatch(event_path); + } + // Just add the path to the list for reading new content. + paths.push(event_path.clone()); + } + } + _ => { + // Catch-all for any other events - handle descriptor mode fallback + if self.follow_descriptor() + && !self.use_polling + && watch_source == WatchSource::File + { + // For ANY unexpected events in descriptor mode with inotify that might indicate + // the file was modified or renamed, switch to polling fallback as a safety measure. + // This ensures we never miss data due to unexpected event types. + // We check if the file path still exists - if it doesn't, it was likely renamed/moved. + if !event_path.exists() { + self.files.get_mut(event_path).fallback_to_polling = true; + let _ = self + .watcher_rx + .as_mut() + .unwrap() + .watcher + .unwatch(event_path); + // Add path to reading list to try reading any remaining data from FD + paths.push(event_path.clone()); + } } } - _ => {} } Ok(paths) } @@ -488,6 +690,8 @@ impl Observer { #[allow(clippy::cognitive_complexity)] pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { + // Debug: Log that follow function was called + if observer.files.no_files_remaining(settings) && !observer.files.only_stdin_remaining() { return Err(USimpleError::new(1, translate!("tail-no-files-remaining"))); } @@ -513,7 +717,8 @@ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { // here paths will not be removed from orphans if the path becomes available. if observer.follow_name_retry() { for new_path in &observer.orphans { - if new_path.exists() { + if new_path.exists() && observer.files.contains_key(new_path) { + // Safety: we just confirmed this path exists in the map above let pd = observer.files.get(new_path); let md = new_path.metadata().unwrap(); if md.is_tailable() && pd.reader.is_none() { @@ -524,11 +729,13 @@ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { observer.files.update_metadata(new_path, Some(md)); observer.files.update_reader(new_path)?; _read_some = observer.files.tail_file(new_path, settings.verbose)?; - observer - .watcher_rx - .as_mut() - .unwrap() - .watch_with_parent(new_path)?; + let use_polling = observer.use_polling; + let follow_name = observer.follow_name(); + observer.watcher_rx.as_mut().unwrap().watch_with_parent( + new_path, + use_polling, + follow_name, + )?; } } } @@ -536,12 +743,21 @@ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { // With -f, sleep for approximately N seconds (default 1.0) between iterations; // We wake up if Notify sends an Event or if we wait more than `sleep_sec`. + // If any files are in polling fallback mode (after rename in descriptor mode), + // use a shorter timeout (100ms) to ensure responsive polling. + let poll_interval = std::time::Duration::from_millis(100); + let timeout = if observer.files.has_polling_fallback() { + poll_interval.min(settings.sleep_sec) + } else { + settings.sleep_sec + }; + let rx_result = observer .watcher_rx .as_mut() .unwrap() .receiver - .recv_timeout(settings.sleep_sec); + .recv_timeout(timeout); if rx_result.is_ok() { timeout_counter = 0; @@ -550,10 +766,47 @@ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { let mut paths = vec![]; // Paths worth checking for new content to print match rx_result { Ok(Ok(event)) => { - if let Some(event_path) = event.paths.first() { - if observer.files.contains_key(event_path) { - // Handle Event if it is about a path that we are monitoring - paths = observer.handle_event(&event, settings)?; + // Use new event resolution logic to properly handle parent directory events + let resolved_paths = observer.watcher_rx.as_ref().unwrap().resolve_event_paths( + &event, + &observer.files, + observer.follow, + ); + + for (file_path, watch_source) in resolved_paths { + // Create a modified event with the correct file path for handle_event + let mut modified_event = event.clone(); + modified_event.paths = vec![file_path.clone()]; + + // Handle the event with watch source information + let event_paths = + observer.handle_event(&modified_event, watch_source, settings)?; + paths.extend(event_paths); + } + + // Fallback: if no paths were resolved but we're in follow=name mode, + // check for file recreation + if paths.is_empty() && observer.follow_name() { + for monitored_path in observer.files.keys() { + if monitored_path.exists() { + let mut modified_event = event.clone(); + modified_event.paths = vec![monitored_path.clone()]; + modified_event.kind = + notify::EventKind::Create(notify::event::CreateKind::File); + + // Mark this as a synthetic event + observer.last_event_was_synthetic = true; + let event_paths = observer.handle_event( + &modified_event, + WatchSource::File, + settings, + )?; + paths.extend(event_paths); + + // Reset synthetic flag after processing synthetic event + observer.last_event_was_synthetic = false; + break; + } } } } @@ -589,6 +842,8 @@ pub fn follow(mut observer: Observer, settings: &Settings) -> UResult<()> { } Err(mpsc::RecvTimeoutError::Timeout) => { timeout_counter += 1; + // Poll all FDs marked for fallback (after rename in descriptor mode) + let _ = observer.files.poll_all_fds(settings.verbose)?; } Err(e) => { return Err(USimpleError::new( diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index cd10203b3f7..2cefa0276ae 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -267,7 +267,13 @@ fn tail_stdin( } else { let mut reader = BufReader::new(stdin()); unbounded_tail(&mut reader, settings)?; - observer.add_stdin(input.display_name.as_str(), Some(Box::new(reader)), true)?; + // Wrap stdin reader in NonSeekableReader since stdin doesn't implement Seek + let seekable_reader = follow::NonSeekableReader::new(reader); + observer.add_stdin( + input.display_name.as_str(), + Some(Box::new(seekable_reader)), + true, + )?; } } }