From f9ccfd0ddb1ff015854db15e5033b6d4529d59a3 Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Fri, 6 Feb 2026 16:41:42 -0800 Subject: [PATCH 1/3] Fix 8 bugs from codebase review (Wave 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wire protocol Written size u32 → u64 to prevent truncation on copy_file_range/remap_file_range returns exceeding 4GB - Loopback IP exhaustion now returns error instead of silently reusing 127.0.0.2 (would cause IP conflicts) - Remove security.capability xattr fast-path that returned ENODATA for all files, hiding real capabilities - Check e2fsck exit code before resize2fs (exit >= 4 means uncorrectable filesystem errors) - slirp4netns stdout/stderr changed from Stdio::piped() to Stdio::null() to prevent pipe buffer deadlock - Check truncate exit code in create_disk_from_dir - parse_size uses checked_mul to prevent silent overflow - Delete dead code mount_vsock_with_readers in fc-agent Tested: cargo test -p fuse-pipe --lib (42 pass), cargo test -p fcvm --lib (48 pass) --- fc-agent/src/fuse/mod.rs | 17 ----------------- fuse-pipe/src/client/fuse.rs | 26 +++----------------------- fuse-pipe/src/protocol/response.rs | 2 +- fuse-pipe/src/server/passthrough.rs | 10 +++++----- src/commands/podman.rs | 9 ++++++++- src/network/slirp.rs | 4 ++-- src/state/manager.rs | 10 ++++------ src/storage/disk.rs | 17 ++++++++++++++--- 8 files changed, 37 insertions(+), 58 deletions(-) diff --git a/fc-agent/src/fuse/mod.rs b/fc-agent/src/fuse/mod.rs index 05d18344..666cdf74 100644 --- a/fc-agent/src/fuse/mod.rs +++ b/fc-agent/src/fuse/mod.rs @@ -121,20 +121,3 @@ pub fn mount_vsock(port: u32, mount_point: &str) -> anyhow::Result<()> { ); fuse_pipe::mount_vsock_with_options(HOST_CID, port, mount_point, num_readers, trace_rate) } - -/// Mount a FUSE filesystem with multiple reader threads. -/// -/// Same as `mount_vsock` but creates multiple FUSE reader threads for -/// better parallel performance. -#[allow(dead_code)] -pub fn mount_vsock_with_readers( - port: u32, - mount_point: &str, - num_readers: usize, -) -> anyhow::Result<()> { - eprintln!( - "[fc-agent] mounting FUSE volume at {} via vsock port {} ({} readers)", - mount_point, port, num_readers - ); - fuse_pipe::mount_vsock_with_readers(HOST_CID, port, mount_point, num_readers) -} diff --git a/fuse-pipe/src/client/fuse.rs b/fuse-pipe/src/client/fuse.rs index 92a27ea0..92b25a45 100644 --- a/fuse-pipe/src/client/fuse.rs +++ b/fuse-pipe/src/client/fuse.rs @@ -605,7 +605,7 @@ impl Filesystem for FuseClient { }); match response { - VolumeResponse::Written { size } => reply.written(size), + VolumeResponse::Written { size } => reply.written(size as u32), VolumeResponse::Error { errno } => reply.error(Errno::from_i32(errno)), _ => reply.error(Errno::EIO), } @@ -990,26 +990,6 @@ impl Filesystem for FuseClient { } fn getxattr(&self, req: &Request, ino: INodeNo, name: &OsStr, size: u32, reply: ReplyXattr) { - // Fast path: The kernel calls getxattr("security.capability") on every write - // to check if file capabilities need to be cleared. This is extremely common - // and almost always returns ENODATA (no capabilities set). Short-circuit this - // to avoid the expensive server round-trip (~32µs savings per write). - // - // This is safe because: - // 1. If capabilities ARE set, they're preserved (we'd need setxattr to clear) - // 2. The kernel's capability check is advisory - it clears caps on successful write - // 3. Container workloads rarely use file capabilities - // - // Can be disabled via FCVM_NO_XATTR_FASTPATH=1 for debugging. - if std::env::var("FCVM_NO_XATTR_FASTPATH").is_err() { - if let Some(name_str) = name.to_str() { - if name_str == "security.capability" { - reply.error(Errno::ENODATA); - return; - } - } - } - let response = self.send_request_sync(VolumeRequest::Getxattr { ino: ino.into(), name: name.to_string_lossy().to_string(), @@ -1198,7 +1178,7 @@ impl Filesystem for FuseClient { }); match response { - VolumeResponse::Written { size } => reply.written(size), + VolumeResponse::Written { size } => reply.written(size as u32), VolumeResponse::Error { errno } => reply.error(Errno::from_i32(errno)), _ => reply.error(Errno::EIO), } @@ -1241,7 +1221,7 @@ impl Filesystem for FuseClient { ); match response { - VolumeResponse::Written { size } => reply.written(size), + VolumeResponse::Written { size } => reply.written(size as u32), VolumeResponse::Error { errno } => reply.error(Errno::from_i32(errno)), _ => reply.error(Errno::EIO), } diff --git a/fuse-pipe/src/protocol/response.rs b/fuse-pipe/src/protocol/response.rs index ea9279b9..9079136d 100644 --- a/fuse-pipe/src/protocol/response.rs +++ b/fuse-pipe/src/protocol/response.rs @@ -31,7 +31,7 @@ pub enum VolumeResponse { Data { data: Vec }, /// Number of bytes written. - Written { size: u32 }, + Written { size: u64 }, /// File opened response. Opened { fh: u64, flags: u32 }, diff --git a/fuse-pipe/src/server/passthrough.rs b/fuse-pipe/src/server/passthrough.rs index 730bf856..597b4466 100644 --- a/fuse-pipe/src/server/passthrough.rs +++ b/fuse-pipe/src/server/passthrough.rs @@ -656,7 +656,7 @@ impl FilesystemHandler for PassthroughFs { ) { Ok(n) => { tracing::debug!(target: "passthrough", fh, written = n, "write succeeded"); - VolumeResponse::Written { size: n as u32 } + VolumeResponse::Written { size: n as u64 } } Err(e) => { tracing::debug!(target: "passthrough", fh, error = ?e, "write failed"); @@ -1149,7 +1149,7 @@ impl FilesystemHandler for PassthroughFs { ) { Ok(n) => { tracing::debug!(target: "passthrough", copied = n, "copy_file_range succeeded"); - VolumeResponse::Written { size: n as u32 } + VolumeResponse::Written { size: n as u64 } } Err(e) => { tracing::debug!(target: "passthrough", error = ?e, "copy_file_range failed"); @@ -1190,7 +1190,7 @@ impl FilesystemHandler for PassthroughFs { ) { Ok(n) => { tracing::debug!(target: "passthrough", cloned = n, "remap_file_range succeeded"); - VolumeResponse::Written { size: n as u32 } + VolumeResponse::Written { size: n as u64 } } Err(e) => { tracing::debug!(target: "passthrough", error = ?e, "remap_file_range failed"); @@ -1607,7 +1607,7 @@ mod tests { // For whole-file clone (len=0), we return the file size on success assert_eq!( size, - test_data.len() as u32, + test_data.len() as u64, "FICLONE should return file size for whole file (len=0)" ); @@ -1726,7 +1726,7 @@ mod tests { match resp { VolumeResponse::Written { size } => { eprintln!("FICLONERANGE succeeded, size={}", size); - assert_eq!(size, block_size as u32, "should clone requested size"); + assert_eq!(size, block_size as u64, "should clone requested size"); // Verify: first block of dest should equal second block of source let resp = fs.read(dst_ino, dst_fh, 0, block_size as u32, uid, gid, 0); diff --git a/src/commands/podman.rs b/src/commands/podman.rs index 155320f3..3d3319c2 100644 --- a/src/commands/podman.rs +++ b/src/commands/podman.rs @@ -479,12 +479,19 @@ async fn create_disk_from_dir( ); // Create sparse file - tokio::process::Command::new("truncate") + let truncate_status = tokio::process::Command::new("truncate") .args(["-s", &image_size.to_string(), output_path.to_str().unwrap()]) .status() .await .context("creating sparse file")?; + if !truncate_status.success() { + bail!( + "truncate failed with exit code: {:?}", + truncate_status.code() + ); + } + // Format as ext4 let mkfs = tokio::process::Command::new("mkfs.ext4") .args(["-q", "-F", output_path.to_str().unwrap()]) diff --git a/src/network/slirp.rs b/src/network/slirp.rs index d662170c..371d77dd 100644 --- a/src/network/slirp.rs +++ b/src/network/slirp.rs @@ -343,8 +343,8 @@ ip addr add {namespace_ip}/24 dev {bridge} cmd.arg(namespace_pid.to_string()) .arg(&self.slirp_device) .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); + .stdout(Stdio::null()) + .stderr(Stdio::null()); let child = cmd.spawn().context("failed to spawn slirp4netns")?; diff --git a/src/state/manager.rs b/src/state/manager.rs index 0b596ec7..80efe296 100644 --- a/src/state/manager.rs +++ b/src/state/manager.rs @@ -524,20 +524,18 @@ impl StateManager { // Note: We rely on state file cleanup (cleanup_stale_state) to handle dead processes. // We don't check if port 8080 is available because wildcard binds (0.0.0.0:8080) // would cause false negatives. Real port conflicts are detected at slirp4netns add_hostfwd time. - let ip = (|| { + let ip = (|| -> Result { for b2 in 0..=255u8 { for b3 in 2..=254u8 { // Skip 127.0.0.1 (localhost) let ip = format!("127.0.{}.{}", b2, b3); if !used_ips.contains(&ip) { - return ip; + return Ok(ip); } } } - // Fallback if all IPs are used (very unlikely - 65,000+ IPs) - tracing::warn!("all loopback IPs in use, reusing 127.0.0.2"); - "127.0.0.2".to_string() - })(); + anyhow::bail!("all loopback IPs exhausted (65,000+ VMs)") + })()?; // Update VM state with the allocated IP and SAVE WHILE HOLDING THE LOCK // This ensures no other process can allocate the same IP diff --git a/src/storage/disk.rs b/src/storage/disk.rs index 25c0e8ed..cedf2ac5 100644 --- a/src/storage/disk.rs +++ b/src/storage/disk.rs @@ -196,10 +196,20 @@ pub async fn ensure_free_space( } // Check filesystem before resize (required by resize2fs) - let _ = tokio::process::Command::new("e2fsck") + let e2fsck_output = tokio::process::Command::new("e2fsck") .args(["-f", "-y", disk_path.to_string_lossy().as_ref()]) .output() - .await; + .await + .context("running e2fsck")?; + + // e2fsck exit codes: 0=clean, 1=corrected, 2=corrected+reboot needed + // Exit code >= 4 means uncorrected errors + if e2fsck_output.status.code().unwrap_or(1) >= 4 { + bail!( + "e2fsck found uncorrectable errors: {}", + String::from_utf8_lossy(&e2fsck_output.stderr) + ); + } // Resize ext4 filesystem to fill the new space let output = tokio::process::Command::new("resize2fs") @@ -256,5 +266,6 @@ pub fn parse_size(s: &str) -> Result { .parse() .with_context(|| format!("parsing size number '{}'", num_str))?; - Ok(num * multiplier) + num.checked_mul(multiplier) + .with_context(|| format!("size overflow: {} * {}", num, multiplier)) } From 17d8d44fed891b4ab6bcba76c7e6044784f4a06f Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Fri, 6 Feb 2026 18:50:13 -0800 Subject: [PATCH 2/3] Fix 10 bugs from codebase review (Wave 2) Fixes: - UFFD server auto-exit: use CancellationToken instead of exiting when connections drop to zero (server must stay alive for new clones) - Health monitor false positive: return None (unknown) instead of Some(true) when fcvm binary not found for podman healthcheck - Health monitor no fast polling revert: reset to startup interval when VM becomes unhealthy after being healthy - Hardcoded port 80: use url.port().unwrap_or(80) for rootless health - Slirp port forwarding: bail on error response instead of warn - Bridged cleanup: collect all errors instead of stopping on first - Clone setup leak: clean up data_dir and state on failure - Thread-local groups: Drop guard ensures cleanup on panic - Shell injection: replace bash -c with direct Command args for TAP device verification - set_var unsoundness: pass max_write as parameter instead of using std::env::set_var in multi-threaded context --- fc-agent/src/fuse/mod.rs | 50 +++++++++++++++++------------ fuse-pipe/src/client/fuse.rs | 24 +++++++------- fuse-pipe/src/client/mount.rs | 8 +++-- fuse-pipe/src/server/passthrough.rs | 19 ++++++----- src/commands/common.rs | 10 +++--- src/commands/snapshot.rs | 25 +++++++++++++-- src/health.rs | 31 +++++++++++------- src/network/bridged.rs | 49 +++++++++++++++++++--------- src/network/slirp.rs | 9 +++++- src/uffd/server.rs | 14 ++++---- 10 files changed, 153 insertions(+), 86 deletions(-) diff --git a/fc-agent/src/fuse/mod.rs b/fc-agent/src/fuse/mod.rs index 666cdf74..8a75610e 100644 --- a/fc-agent/src/fuse/mod.rs +++ b/fc-agent/src/fuse/mod.rs @@ -74,29 +74,32 @@ fn get_trace_rate() -> u64 { 0 } -/// Set FCVM_FUSE_MAX_WRITE from kernel boot param if not already set. -/// This propagates the max_write limit to the fuse-pipe client which reads from env. -/// Used to limit write sizes in nested VMs to avoid vsock data corruption. -fn propagate_max_write_from_cmdline() { - // Skip if already set in environment - if std::env::var("FCVM_FUSE_MAX_WRITE").is_ok() { - return; +/// Get max_write from FCVM_FUSE_MAX_WRITE env var or kernel boot param (0 = unbounded). +/// Checks (in order): +/// 1. FCVM_FUSE_MAX_WRITE environment variable +/// 2. fuse_max_write=N kernel boot parameter (from /proc/cmdline) +/// 3. Default: 0 (unbounded) +fn get_max_write() -> u32 { + // First check environment variable + if let Some(n) = std::env::var("FCVM_FUSE_MAX_WRITE") + .ok() + .and_then(|s| s.parse().ok()) + { + return n; } - // Check kernel command line for fuse_max_write=N + // Then check kernel command line if let Ok(cmdline) = std::fs::read_to_string("/proc/cmdline") { for part in cmdline.split_whitespace() { if let Some(value) = part.strip_prefix("fuse_max_write=") { - // Set as environment variable for fuse-pipe to pick up - std::env::set_var("FCVM_FUSE_MAX_WRITE", value); - eprintln!( - "[fc-agent] set FCVM_FUSE_MAX_WRITE={} from kernel cmdline", - value - ); - return; + if let Ok(n) = value.parse() { + return n; + } } } } + + 0 } /// Mount a FUSE filesystem from host via vsock. @@ -110,14 +113,19 @@ fn propagate_max_write_from_cmdline() { /// * `port` - The vsock port where the host VolumeServer is listening /// * `mount_point` - The path where the filesystem will be mounted pub fn mount_vsock(port: u32, mount_point: &str) -> anyhow::Result<()> { - // Propagate max_write limit from kernel cmdline to env var for fuse-pipe - propagate_max_write_from_cmdline(); - let num_readers = get_num_readers(); let trace_rate = get_trace_rate(); + let max_write = get_max_write(); eprintln!( - "[fc-agent] mounting FUSE volume at {} via vsock port {} ({} readers, trace_rate={})", - mount_point, port, num_readers, trace_rate + "[fc-agent] mounting FUSE volume at {} via vsock port {} ({} readers, trace_rate={}, max_write={})", + mount_point, port, num_readers, trace_rate, max_write ); - fuse_pipe::mount_vsock_with_options(HOST_CID, port, mount_point, num_readers, trace_rate) + fuse_pipe::mount_vsock_with_options( + HOST_CID, + port, + mount_point, + num_readers, + trace_rate, + max_write, + ) } diff --git a/fuse-pipe/src/client/fuse.rs b/fuse-pipe/src/client/fuse.rs index 92b25a45..5d5a30d7 100644 --- a/fuse-pipe/src/client/fuse.rs +++ b/fuse-pipe/src/client/fuse.rs @@ -57,12 +57,14 @@ pub struct FuseClient { init_callback: Mutex>, /// Shared flag set by destroy() to signal clean shutdown to reader threads destroyed: Arc, + /// Maximum write size (0 = unbounded). Passed explicitly to avoid env var races. + max_write: u32, } impl FuseClient { /// Create a new client using shared multiplexer. pub fn new(mux: Arc) -> Self { - Self::with_destroyed_flag(mux, Arc::new(AtomicBool::new(false))) + Self::with_options(mux, Arc::new(AtomicBool::new(false)), 0) } /// Create a new client with a shared destroyed flag. @@ -70,10 +72,16 @@ impl FuseClient { /// The destroyed flag is set by `destroy()` when the filesystem is unmounted. /// Reader threads can check this flag to distinguish clean shutdown from errors. pub fn with_destroyed_flag(mux: Arc, destroyed: Arc) -> Self { + Self::with_options(mux, destroyed, 0) + } + + /// Create a new client with a shared destroyed flag and max_write limit. + pub fn with_options(mux: Arc, destroyed: Arc, max_write: u32) -> Self { Self { mux, init_callback: Mutex::new(None), destroyed, + max_write, } } @@ -87,6 +95,7 @@ impl FuseClient { mux, init_callback: Mutex::new(Some(callback)), destroyed, + max_write: 0, } } @@ -204,12 +213,6 @@ fn protocol_file_type_to_fuser(ft: u8) -> FileType { } } -/// Default max_write size for FUSE operations (0 = unbounded, use kernel default). -/// -/// For nested virtualization (L2 VMs), set FCVM_FUSE_MAX_WRITE=32768 to avoid -/// vsock data loss due to cache coherency issues in double Stage 2 translation. -const DEFAULT_FUSE_MAX_WRITE: u32 = 0; - impl Filesystem for FuseClient { fn init(&mut self, _req: &Request, config: &mut fuser::KernelConfig) -> Result<(), io::Error> { // Enable writeback cache for better write performance (kernel batches writes). @@ -252,11 +255,8 @@ impl Filesystem for FuseClient { } // Limit max_write to avoid vsock data loss under nested virtualization. - // Override with FCVM_FUSE_MAX_WRITE env var (0 = unbounded). - let max_write = std::env::var("FCVM_FUSE_MAX_WRITE") - .ok() - .and_then(|v| v.parse::().ok()) - .unwrap_or(DEFAULT_FUSE_MAX_WRITE); + // Passed explicitly via mount_vsock_with_options to avoid env var races. + let max_write = self.max_write; if max_write > 0 { if let Err(max) = config.set_max_write(max_write) { diff --git a/fuse-pipe/src/client/mount.rs b/fuse-pipe/src/client/mount.rs index 0d860cdb..728ff30f 100644 --- a/fuse-pipe/src/client/mount.rs +++ b/fuse-pipe/src/client/mount.rs @@ -424,7 +424,7 @@ fn mount_internal>( /// ``` #[cfg(target_os = "linux")] pub fn mount_vsock>(cid: u32, port: u32, mount_point: P) -> anyhow::Result<()> { - mount_vsock_with_options(cid, port, mount_point, 1, 0) + mount_vsock_with_options(cid, port, mount_point, 1, 0, 0) } /// Mount a FUSE filesystem via vsock with multiple reader threads. @@ -435,7 +435,7 @@ pub fn mount_vsock_with_readers>( mount_point: P, num_readers: usize, ) -> anyhow::Result<()> { - mount_vsock_with_options(cid, port, mount_point, num_readers, 0) + mount_vsock_with_options(cid, port, mount_point, num_readers, 0, 0) } /// Mount a FUSE filesystem via vsock with full configuration. @@ -447,6 +447,7 @@ pub fn mount_vsock_with_readers>( /// * `mount_point` - Directory where the FUSE filesystem will be mounted /// * `num_readers` - Number of FUSE reader threads (1-8 recommended) /// * `trace_rate` - Trace every Nth request (0 = disabled) +/// * `max_write` - Maximum write size in bytes (0 = unbounded, use kernel default) #[cfg(target_os = "linux")] pub fn mount_vsock_with_options>( cid: u32, @@ -454,6 +455,7 @@ pub fn mount_vsock_with_options>( mount_point: P, num_readers: usize, trace_rate: u64, + max_write: u32, ) -> anyhow::Result<()> { info!(target: "fuse-pipe::client", cid, port, num_readers, "connecting via vsock"); @@ -516,7 +518,7 @@ pub fn mount_vsock_with_options>( let mut session = None; let mut last_error = None; for attempt in 0..=SESSION_NEW_MAX_RETRIES { - let fs = FuseClient::with_destroyed_flag(Arc::clone(&mux), Arc::clone(&destroyed)); + let fs = FuseClient::with_options(Arc::clone(&mux), Arc::clone(&destroyed), max_write); match fuser::Session::new(fs, mount_point.as_ref(), &config) { Ok(s) => { if attempt > 0 { diff --git a/fuse-pipe/src/server/passthrough.rs b/fuse-pipe/src/server/passthrough.rs index 597b4466..c803c507 100644 --- a/fuse-pipe/src/server/passthrough.rs +++ b/fuse-pipe/src/server/passthrough.rs @@ -210,15 +210,18 @@ impl FilesystemHandler for PassthroughFs { *g.borrow_mut() = groups_opt; }); - // Dispatch to the default handler - let result = self.handle_request(request); - - // Clear thread-local - CURRENT_GROUPS.with(|g| { - *g.borrow_mut() = None; - }); + // Use a Drop guard to ensure thread-local is cleared even on panic + struct GroupsGuard; + impl Drop for GroupsGuard { + fn drop(&mut self) { + CURRENT_GROUPS.with(|g| { + *g.borrow_mut() = None; + }); + } + } + let _guard = GroupsGuard; - result + self.handle_request(request) } fn lookup(&self, parent: u64, name: &str, uid: u32, gid: u32, pid: u32) -> VolumeResponse { diff --git a/src/commands/common.rs b/src/commands/common.rs index 566ed3ab..9229803f 100644 --- a/src/commands/common.rs +++ b/src/commands/common.rs @@ -552,12 +552,14 @@ pub async fn restore_from_snapshot( } // Verify TAP device was created successfully - let verify_cmd = format!("ip link show {} >/dev/null 2>&1", tap_device); let verify_output = tokio::process::Command::new(&nsenter_prefix[0]) .args(&nsenter_prefix[1..]) - .arg("bash") - .arg("-c") - .arg(&verify_cmd) + .arg("ip") + .arg("link") + .arg("show") + .arg(&tap_device) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) .status() .await .context("verifying TAP device")?; diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index 506e58c1..ebb0131a 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -277,8 +277,10 @@ async fn cmd_snapshot_serve(args: SnapshotServeArgs) -> Result<()> { let mut sigterm = signal(SignalKind::terminate())?; let mut sigint = signal(SignalKind::interrupt())?; - // Run server in background task - let mut server_handle = tokio::spawn(async move { server.run().await }); + // Run server in background task with cancellation token + let server_cancel = tokio_util::sync::CancellationToken::new(); + let server_cancel_clone = server_cancel.clone(); + let mut server_handle = tokio::spawn(async move { server.run(server_cancel_clone).await }); // Clone state_manager for signal handler use let state_manager_for_signal = state_manager.clone(); @@ -300,6 +302,7 @@ async fn cmd_snapshot_serve(args: SnapshotServeArgs) -> Result<()> { _ = sigterm.recv() => { info!("received SIGTERM"); + server_cancel.cancel(); break; } _ = sigint.recv() => { @@ -308,6 +311,7 @@ async fn cmd_snapshot_serve(args: SnapshotServeArgs) -> Result<()> { // Second Ctrl-C - force shutdown info!("received second SIGINT, forcing shutdown"); println!("\nForcing shutdown..."); + server_cancel.cancel(); break; } @@ -321,6 +325,7 @@ async fn cmd_snapshot_serve(args: SnapshotServeArgs) -> Result<()> { if running_clones.is_empty() { println!("\nNo running clones, shutting down..."); + server_cancel.cancel(); break; } else { println!("\n⚠️ {} clone(s) still running!", running_clones.len()); @@ -760,6 +765,22 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { cleanup_err ); } + + // Cleanup data directory + if data_dir.exists() { + if let Err(cleanup_err) = tokio::fs::remove_dir_all(&data_dir).await { + warn!( + "failed to cleanup data_dir after setup error: {}", + cleanup_err + ); + } + } + + // Cleanup state file + if let Err(cleanup_err) = state_manager.delete_state(&vm_id).await { + warn!("failed to cleanup state after setup error: {}", cleanup_err); + } + return Err(e); } diff --git a/src/health.rs b/src/health.rs index 4c034039..5959ff54 100644 --- a/src/health.rs +++ b/src/health.rs @@ -132,17 +132,24 @@ pub fn spawn_health_monitor_full( } }; - // Switch to slower polling once healthy - if health_status == HealthStatus::Healthy && !is_healthy { - is_healthy = true; - poll_interval = HEALTH_POLL_HEALTHY_INTERVAL; - info!(target: "health-monitor", "VM healthy, switching to {:?} polling", HEALTH_POLL_HEALTHY_INTERVAL); - - // Signal startup snapshot trigger (fires only once) - if let Some(tx) = startup_tx.take() { - info!(target: "health-monitor", "signaling startup snapshot trigger"); - let _ = tx.send(()); // Ignore error if receiver dropped + // Adaptive polling: fast when unhealthy, slow when healthy + if health_status == HealthStatus::Healthy { + if !is_healthy { + is_healthy = true; + poll_interval = HEALTH_POLL_HEALTHY_INTERVAL; + info!(target: "health-monitor", "VM healthy, switching to {:?} polling", HEALTH_POLL_HEALTHY_INTERVAL); + + // Signal startup snapshot trigger (fires only once) + if let Some(tx) = startup_tx.take() { + info!(target: "health-monitor", "signaling startup snapshot trigger"); + let _ = tx.send(()); // Ignore error if receiver dropped + } } + } else if is_healthy { + // VM was healthy but is no longer — revert to fast polling + is_healthy = false; + poll_interval = HEALTH_POLL_STARTUP_INTERVAL; + warn!(target: "health-monitor", "VM no longer healthy, reverting to {:?} polling", HEALTH_POLL_STARTUP_INTERVAL); } // Stop monitoring if container has stopped @@ -253,7 +260,7 @@ async fn check_podman_healthcheck(pid: u32) -> Option { // Use fcvm exec to run podman inspect inside the VM let exe = match find_fcvm_binary() { Some(e) => e, - None => return Some(true), // Can't find fcvm binary, assume healthy + None => return None, // Can't find fcvm binary, can't determine health }; let cmd_future = tokio::process::Command::new(&exe) @@ -391,7 +398,7 @@ async fn update_health_status_once( .as_ref() .map(|ip| ip.split('/').next().unwrap_or(ip)) .unwrap_or("192.168.1.2"); - let port = 80; // Always use port 80 directly to guest + let port = url.port().unwrap_or(80); debug!(target: "health-monitor", holder_pid = holder_pid, guest_ip = %guest_ip, port = port, "HTTP health check via nsenter"); match check_http_health_nsenter(holder_pid, guest_ip, port, health_path) diff --git a/src/network/bridged.rs b/src/network/bridged.rs index 9e562cf1..27ecf6d8 100644 --- a/src/network/bridged.rs +++ b/src/network/bridged.rs @@ -1,5 +1,5 @@ use anyhow::{Context, Result}; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; use super::{ get_host_dns_servers, namespace, portmap, types::generate_mac, veth, NetworkConfig, @@ -364,39 +364,56 @@ impl NetworkManager for BridgedNetwork { async fn cleanup(&mut self) -> Result<()> { info!(vm_id = %self.vm_id, "cleaning up network namespace and resources"); + let mut errors = Vec::new(); // Step 1: Cleanup port mapping rules (if any) if !self.port_mapping_rules.is_empty() { - portmap::cleanup_port_mappings(&self.port_mapping_rules).await?; + if let Err(e) = portmap::cleanup_port_mappings(&self.port_mapping_rules).await { + warn!(vm_id = %self.vm_id, error = %e, "failed to cleanup port mappings"); + errors.push(format!("port mappings: {}", e)); + } } // Step 2: Delete host route to guest IP (for clones) - // This route was added to allow direct access to the guest IP from the host. - // Must be deleted before the veth to prevent stale routes. if self.is_clone { if let Some(ref guest_ip) = self.guest_ip { - veth::delete_host_route_to_guest(guest_ip).await?; + if let Err(e) = veth::delete_host_route_to_guest(guest_ip).await { + warn!(vm_id = %self.vm_id, error = %e, "failed to delete host route"); + errors.push(format!("host route: {}", e)); + } } } // Step 3: Delete FORWARD rule and veth pair - // Note: With In-Namespace NAT, all clone-specific rules are inside the namespace - // and get cleaned up automatically when the namespace is deleted. if let Some(ref host_veth) = self.host_veth { - // Delete FORWARD rule to avoid accumulating orphaned rules - veth::delete_veth_forward_rule(host_veth).await?; - // Then delete the veth pair (this will also remove the peer in the namespace) - veth::delete_veth_pair(host_veth).await?; + if let Err(e) = veth::delete_veth_forward_rule(host_veth).await { + warn!(vm_id = %self.vm_id, error = %e, "failed to delete forward rule"); + errors.push(format!("forward rule: {}", e)); + } + if let Err(e) = veth::delete_veth_pair(host_veth).await { + warn!(vm_id = %self.vm_id, error = %e, "failed to delete veth pair"); + errors.push(format!("veth pair: {}", e)); + } } - // Step 4: Delete network namespace (this cleans up everything inside it) - // Including all NAT rules, bridge, and veth peer + // Step 4: Delete network namespace if let Some(ref namespace_id) = self.namespace_id { - namespace::delete_namespace(namespace_id).await?; + if let Err(e) = namespace::delete_namespace(namespace_id).await { + warn!(vm_id = %self.vm_id, error = %e, "failed to delete namespace"); + errors.push(format!("namespace: {}", e)); + } } - debug!(vm_id = %self.vm_id, "network cleanup complete"); - Ok(()) + if errors.is_empty() { + debug!(vm_id = %self.vm_id, "network cleanup complete"); + Ok(()) + } else { + anyhow::bail!( + "network cleanup had {} error(s): {}", + errors.len(), + errors.join("; ") + ) + } } fn tap_device(&self) -> &str { diff --git a/src/network/slirp.rs b/src/network/slirp.rs index 371d77dd..1bd21535 100644 --- a/src/network/slirp.rs +++ b/src/network/slirp.rs @@ -441,7 +441,14 @@ ip addr add {namespace_ip}/24 dev {bridge} debug!(response = %response_line.trim(), "slirp4netns API response"); if response_line.contains("error") { - warn!(response = %response_line.trim(), "port forwarding may have failed"); + anyhow::bail!( + "port forwarding failed for {}:{} -> {}:{}: {}", + bind_addr, + mapping.host_port, + self.guest_ip, + mapping.guest_port, + response_line.trim() + ); } } diff --git a/src/uffd/server.rs b/src/uffd/server.rs index 208ec447..f497a445 100644 --- a/src/uffd/server.rs +++ b/src/uffd/server.rs @@ -84,8 +84,8 @@ impl UffdServer { &self.socket_path } - /// Run the UFFD server (blocks until all VMs disconnect) - pub async fn run(&self) -> Result<()> { + /// Run the UFFD server (blocks until cancelled via CancellationToken) + pub async fn run(&self, cancel: tokio_util::sync::CancellationToken) -> Result<()> { info!( target: "uffd", snapshot = %self.snapshot_id, @@ -165,12 +165,12 @@ impl UffdServer { } info!(target: "uffd", active_vms = vm_tasks.len(), "VM exited"); + } - // Exit when last VM disconnects - if vm_tasks.is_empty() { - info!(target: "uffd", "no active VMs remaining, shutting down server"); - break; - } + // Shut down when cancellation token is triggered (Ctrl-C / SIGTERM) + _ = cancel.cancelled() => { + info!(target: "uffd", "cancellation requested, shutting down server"); + break; } } } From 2e984f4a665c4657314cd7c0ff7cf3de350d3ac6 Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Fri, 6 Feb 2026 18:57:35 -0800 Subject: [PATCH 3/3] Fix 3 issues from Wave 1 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - e2fsck signal-kill: unwrap_or(1) → unwrap_or(8) so signal-killed process is treated as fatal error, not "errors corrected" - Slirp stderr: keep stderr piped (only read after process exits, no deadlock risk) so error diagnostics aren't lost - Remove stale FCVM_NO_XATTR_FASTPATH env var from README (fast-path was removed in Wave 1) --- README.md | 2 +- src/network/slirp.rs | 2 +- src/storage/disk.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 817814cb..ce243fd3 100644 --- a/README.md +++ b/README.md @@ -875,7 +875,7 @@ See [DESIGN.md](DESIGN.md#guest-agent) for details. | `RUST_LOG` | `warn` | Logging level (quiet by default; use `info` or `debug` for verbose) | | `FCVM_NO_SNAPSHOT` | unset | Set to `1` to disable automatic snapshot creation (same as `--no-snapshot` flag) | | `FCVM_NO_WRITEBACK_CACHE` | unset | Set to `1` to disable FUSE writeback cache (see below) | -| `FCVM_NO_XATTR_FASTPATH` | unset | Set to `1` to disable security.capability xattr fast path | + ### FUSE Writeback Cache diff --git a/src/network/slirp.rs b/src/network/slirp.rs index 1bd21535..2f9eb762 100644 --- a/src/network/slirp.rs +++ b/src/network/slirp.rs @@ -344,7 +344,7 @@ ip addr add {namespace_ip}/24 dev {bridge} .arg(&self.slirp_device) .stdin(Stdio::null()) .stdout(Stdio::null()) - .stderr(Stdio::null()); + .stderr(Stdio::piped()); let child = cmd.spawn().context("failed to spawn slirp4netns")?; diff --git a/src/storage/disk.rs b/src/storage/disk.rs index cedf2ac5..4829aaa9 100644 --- a/src/storage/disk.rs +++ b/src/storage/disk.rs @@ -204,7 +204,7 @@ pub async fn ensure_free_space( // e2fsck exit codes: 0=clean, 1=corrected, 2=corrected+reboot needed // Exit code >= 4 means uncorrected errors - if e2fsck_output.status.code().unwrap_or(1) >= 4 { + if e2fsck_output.status.code().unwrap_or(8) >= 4 { bail!( "e2fsck found uncorrectable errors: {}", String::from_utf8_lossy(&e2fsck_output.stderr)