diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index ceac31cc..0bee2aed 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -667,23 +667,6 @@ Run `make help` for full list. Key targets: └── cache/ # Downloaded cloud images ``` -### One-Time Setup (dnsmasq) - -```bash -sudo apt-get update -sudo apt-get install -y dnsmasq - -# dnsmasq for DNS forwarding to VMs (bind-dynamic listens on dynamically created TAP devices) -sudo tee /etc/dnsmasq.d/fcvm.conf > /dev/null < /dev/null < /dev/null < /dev/null < bool { } // Send message - let written = unsafe { libc::write(fd, message.as_ptr() as *const libc::c_void, message.len()) }; + let written = + unsafe { libc::write(fd, message.as_ptr() as *const libc::c_void, message.len()) }; unsafe { libc::close(fd) }; written == message.len() as isize diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..1a216558 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.92.0" +components = ["rustfmt", "clippy"] diff --git a/src/commands/common.rs b/src/commands/common.rs index 75701ac9..473aa837 100644 --- a/src/commands/common.rs +++ b/src/commands/common.rs @@ -21,6 +21,59 @@ pub const VSOCK_VOLUME_PORT_BASE: u32 = 5000; /// Vsock port for status channel (fc-agent notifies when container starts) pub const VSOCK_STATUS_PORT: u32 = 4999; +/// Minimum required Firecracker version for network_overrides support +const MIN_FIRECRACKER_VERSION: (u32, u32, u32) = (1, 13, 1); + +/// Find and validate Firecracker binary +/// +/// Returns the path to the Firecracker binary if it exists and meets minimum version requirements. +/// Fails with a clear error if Firecracker is not found or version is too old. +pub fn find_firecracker() -> Result { + let firecracker_bin = which::which("firecracker").context("firecracker not found in PATH")?; + + // Check version + let output = std::process::Command::new(&firecracker_bin) + .arg("--version") + .output() + .context("failed to run firecracker --version")?; + + let version_str = String::from_utf8_lossy(&output.stdout); + let version = parse_firecracker_version(&version_str)?; + + if version < MIN_FIRECRACKER_VERSION { + anyhow::bail!( + "Firecracker version {}.{}.{} is too old. Minimum required: {}.{}.{} (for network_overrides support in snapshot cloning)", + version.0, version.1, version.2, + MIN_FIRECRACKER_VERSION.0, MIN_FIRECRACKER_VERSION.1, MIN_FIRECRACKER_VERSION.2 + ); + } + + debug!( + "Found Firecracker {}.{}.{} at {:?}", + version.0, version.1, version.2, firecracker_bin + ); + + Ok(firecracker_bin) +} + +/// Parse Firecracker version from --version output +/// +/// Expected format: "Firecracker v1.14.0" or similar +fn parse_firecracker_version(output: &str) -> Result<(u32, u32, u32)> { + // Find version number pattern vX.Y.Z + let version_re = regex::Regex::new(r"v?(\d+)\.(\d+)\.(\d+)").context("invalid regex")?; + + let caps = version_re + .captures(output) + .context("could not parse Firecracker version from output")?; + + let major: u32 = caps[1].parse().context("invalid major version")?; + let minor: u32 = caps[2].parse().context("invalid minor version")?; + let patch: u32 = caps[3].parse().context("invalid patch version")?; + + Ok((major, minor, patch)) +} + /// Save VM state with complete network configuration /// /// This function ensures both baseline and clone VMs save identical network data, diff --git a/src/commands/podman.rs b/src/commands/podman.rs index 48399b1b..723be8c6 100644 --- a/src/commands/podman.rs +++ b/src/commands/podman.rs @@ -689,8 +689,7 @@ async fn run_vm_setup( holder_child = None; } - let firecracker_bin = which::which("firecracker") - .context("firecracker not found in PATH")?; + let firecracker_bin = super::common::find_firecracker()?; vm_manager .start(&firecracker_bin, None) diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index a0c56b79..61275444 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -1021,8 +1021,7 @@ async fn run_clone_setup( ); vm_manager.set_vsock_redirect(baseline_dir, data_dir.to_path_buf()); - let firecracker_bin = which::which("firecracker") - .context("firecracker not found in PATH")?; + let firecracker_bin = super::common::find_firecracker()?; vm_manager .start(&firecracker_bin, None) diff --git a/src/firecracker/api.rs b/src/firecracker/api.rs index 28e487af..6d51caa8 100644 --- a/src/firecracker/api.rs +++ b/src/firecracker/api.rs @@ -36,7 +36,10 @@ impl FirecrackerClient { let resp = self.client.request(req).await?; if resp.status() != StatusCode::NO_CONTENT && resp.status() != StatusCode::OK { - anyhow::bail!("Firecracker API error: {}", resp.status()); + let status = resp.status(); + let body_bytes = hyper::body::to_bytes(resp.into_body()).await?; + let body_str = String::from_utf8_lossy(&body_bytes); + anyhow::bail!("Firecracker API error: {} - {}", status, body_str); } Ok(()) } @@ -52,7 +55,10 @@ impl FirecrackerClient { let resp = self.client.request(req).await?; if resp.status() != StatusCode::NO_CONTENT && resp.status() != StatusCode::OK { - anyhow::bail!("Firecracker API error: {}", resp.status()); + let status = resp.status(); + let body_bytes = hyper::body::to_bytes(resp.into_body()).await?; + let body_str = String::from_utf8_lossy(&body_bytes); + anyhow::bail!("Firecracker API error: {} - {}", status, body_str); } Ok(()) } diff --git a/src/health.rs b/src/health.rs index 3f001e30..14882395 100644 --- a/src/health.rs +++ b/src/health.rs @@ -185,13 +185,21 @@ async fn update_health_status_once( let health_path = url.path(); let net = &state.config.network; - // Rootless mode with loopback_ip (preferred - simpler, no nsenter needed) - // Linux routes all of 127.0.0.0/8 to loopback without ip addr add! - if let Some(loopback_ip) = &net.loopback_ip { - let port = net.health_check_port.unwrap_or(80); - debug!(target: "health-monitor", loopback_ip = %loopback_ip, port = port, "HTTP health check via loopback"); - - match check_http_health_loopback(loopback_ip, port, health_path).await { + // Rootless mode with holder_pid: use nsenter to curl guest directly + // This bypasses the complexity of slirp4netns port forwarding + if let Some(holder_pid) = state.holder_pid { + // Extract guest IP without CIDR suffix + let guest_ip = net + .guest_ip + .as_ref() + .map(|ip| ip.split('/').next().unwrap_or(ip)) + .unwrap_or("192.168.1.2"); + let port = 80; // Always use port 80 directly to guest + debug!(target: "health-monitor", holder_pid = holder_pid, guest_ip = %guest_ip, port = port, "HTTP health check via nsenter"); + + match check_http_health_nsenter(holder_pid, guest_ip, port, health_path) + .await + { Ok(true) => { debug!(target: "health-monitor", "health check passed"); *last_failure_log = None; @@ -209,7 +217,7 @@ async fn update_health_status_once( } }; if should_log { - debug!(target: "health-monitor", error = %e, "HTTP health check failed"); + debug!(target: "health-monitor", error = %e, "HTTP health check failed (nsenter)"); *last_failure_log = Some(Instant::now()); } HealthStatus::Unhealthy @@ -276,62 +284,89 @@ pub async fn run_health_check_once( Ok(status) } -/// Check if HTTP service is responding via loopback IP (rootless mode) +/// Check if HTTP service is responding via nsenter into the network namespace (rootless mode) /// -/// For rootless VMs, we use a unique loopback IP (127.x.y.z) with port forwarding -/// through slirp4netns to reach the guest. +/// For rootless VMs, we use nsenter to enter the network namespace and curl +/// the guest directly. This bypasses the complexity of slirp4netns port forwarding. /// -/// Linux routes all of 127.0.0.0/8 to loopback without needing `ip addr add`, -/// so this works fully rootless! -async fn check_http_health_loopback( - loopback_ip: &str, +/// The holder_pid is the PID of the namespace holder process (sleep infinity). +async fn check_http_health_nsenter( + holder_pid: u32, + guest_ip: &str, port: u16, health_path: &str, ) -> Result { - let url = format!("http://{}:{}{}", loopback_ip, port, health_path); - - let client = reqwest::Client::builder() - .timeout(Duration::from_secs(1)) - .build() - .context("building reqwest client")?; + let url = format!("http://{}:{}{}", guest_ip, port, health_path); let start = Instant::now(); - match client.get(&url).send().await { - Ok(response) => { - let elapsed = start.elapsed(); - if response.status().is_success() { - debug!( - target: "health-monitor", - loopback_ip = loopback_ip, - port = port, - status = %response.status(), - elapsed_ms = elapsed.as_millis(), - "health check succeeded (rootless)" - ); - Ok(true) - } else { - anyhow::bail!( - "Health check failed with status {} via {}:{} ({}ms)", - response.status(), - loopback_ip, - port, - elapsed.as_millis() - ) - } + // Use nsenter to enter the namespace and curl the guest directly + // --preserve-credentials keeps UID/GID mapping + let output = tokio::process::Command::new("nsenter") + .args([ + "-t", + &holder_pid.to_string(), + "-U", + "-n", + "--preserve-credentials", + "--", + "curl", + "-s", + "-o", + "/dev/null", + "-w", + "%{http_code}", + "--max-time", + "1", + &url, + ]) + .output() + .await + .context("failed to run nsenter curl")?; + + let elapsed = start.elapsed(); + + if output.status.success() { + let status_code = String::from_utf8_lossy(&output.stdout); + let status_code = status_code.trim(); + + if status_code.starts_with('2') || status_code.starts_with('3') { + debug!( + target: "health-monitor", + holder_pid = holder_pid, + guest_ip = guest_ip, + port = port, + status = status_code, + elapsed_ms = elapsed.as_millis(), + "health check succeeded (nsenter)" + ); + Ok(true) + } else { + anyhow::bail!( + "Health check failed with status {} via nsenter to {}:{} ({}ms)", + status_code, + guest_ip, + port, + elapsed.as_millis() + ) } - Err(e) => { - if e.is_timeout() { - anyhow::bail!( - "Health check timed out after 1 second via {}:{}", - loopback_ip, - port - ) - } else if e.is_connect() { - anyhow::bail!("Connection refused to {}:{}", loopback_ip, port) - } else { - anyhow::bail!("Failed to connect to {}:{}: {}", loopback_ip, port, e) - } + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("timed out") || stderr.contains("Connection timed out") { + anyhow::bail!( + "Health check timed out via nsenter to {}:{}", + guest_ip, + port + ) + } else if stderr.contains("Connection refused") { + anyhow::bail!("Connection refused to {}:{} via nsenter", guest_ip, port) + } else { + anyhow::bail!( + "Failed to connect to {}:{} via nsenter: {}", + guest_ip, + port, + stderr.trim() + ) } } } diff --git a/src/network/bridged.rs b/src/network/bridged.rs index 2c357997..e979df6a 100644 --- a/src/network/bridged.rs +++ b/src/network/bridged.rs @@ -291,7 +291,7 @@ impl NetworkManager for BridgedNetwork { loopback_ip: None, health_check_port: Some(80), health_check_url: Some(format!("http://{}:80/", health_check_ip)), - dns_server: Some(host_ip), // dnsmasq with bind-dynamic listens here + dns_server: super::get_host_dns_servers().first().cloned(), }) } diff --git a/src/network/mod.rs b/src/network/mod.rs index df2bae53..1596e725 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -33,3 +33,62 @@ pub trait NetworkManager: Send + Sync { /// Get a reference to Any for downcasting fn as_any(&self) -> &dyn std::any::Any; } + +/// Read DNS servers from host system +/// +/// Parses /etc/resolv.conf to extract nameserver entries. If only localhost +/// addresses are found (indicating systemd-resolved), falls back to reading +/// /run/systemd/resolve/resolv.conf for the real upstream DNS servers. +/// +/// Returns an empty Vec if no DNS servers can be determined. +pub fn get_host_dns_servers() -> Vec { + // Try /etc/resolv.conf first + let resolv = std::fs::read_to_string("/etc/resolv.conf").unwrap_or_default(); + + let servers: Vec = resolv + .lines() + .filter_map(|line| { + let line = line.trim(); + line.strip_prefix("nameserver ") + .map(|s| s.trim().to_string()) + }) + .collect(); + + // If only localhost (systemd-resolved), try real config + if servers.iter().all(|s| s.starts_with("127.")) { + if let Ok(real) = std::fs::read_to_string("/run/systemd/resolve/resolv.conf") { + let real_servers: Vec = real + .lines() + .filter_map(|line| { + line.trim() + .strip_prefix("nameserver ") + .map(|s| s.trim().to_string()) + }) + .filter(|s| !s.starts_with("127.")) + .collect(); + if !real_servers.is_empty() { + return real_servers; + } + } + } + + servers +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_host_dns_servers() { + let servers = get_host_dns_servers(); + println!("DNS servers: {:?}", servers); + // Should find at least one non-localhost server on this system + assert!(!servers.is_empty(), "Expected to find DNS servers"); + // Should not include localhost (127.x.x.x) since we're on systemd-resolved + assert!( + servers.iter().all(|s| !s.starts_with("127.")), + "Should have filtered out localhost DNS" + ); + } +} diff --git a/src/network/slirp.rs b/src/network/slirp.rs index e6e83b67..29f18eac 100644 --- a/src/network/slirp.rs +++ b/src/network/slirp.rs @@ -11,10 +11,6 @@ use super::{types::generate_mac, NetworkConfig, NetworkManager, PortMapping}; use crate::paths; use crate::state::truncate_id; -/// slirp4netns network addressing constants -/// slirp0 device is assigned this IP for routing to slirp4netns -const SLIRP_CIDR: &str = "10.0.2.100/24"; - /// Guest network addressing (isolated per VM namespace) const GUEST_SUBNET: &str = "192.168.1.0/24"; const GUEST_IP: &str = "192.168.1.2"; @@ -54,7 +50,6 @@ pub struct SlirpNetwork { port_mappings: Vec, // Network addressing - slirp_cidr: String, // slirp0: 10.0.2.100/24, gateway 10.0.2.2 guest_subnet: String, // tap0: 192.168.x.0/24 (derived from vm_id) guest_ip: String, // Guest VM IP (192.168.x.2) namespace_ip: String, // Namespace host IP on tap0 (192.168.x.1) @@ -74,7 +69,6 @@ impl SlirpNetwork { tap_device, slirp_device: SLIRP_DEVICE_NAME.to_string(), port_mappings, - slirp_cidr: SLIRP_CIDR.to_string(), guest_subnet: GUEST_SUBNET.to_string(), guest_ip: GUEST_IP.to_string(), namespace_ip: NAMESPACE_IP.to_string(), @@ -157,16 +151,17 @@ impl SlirpNetwork { /// Build the setup script to run inside the namespace via nsenter /// - /// This script creates both TAP devices and configures networking. + /// This script creates both TAP devices and sets up iptables rules for egress. + /// Health checks use nsenter to curl the guest directly, no port forwarding needed. /// Run via: nsenter -t HOLDER_PID -U -n -- bash -c '' pub fn build_setup_script(&self) -> String { format!( r#" set -e -# Create slirp0 TAP for slirp4netns connectivity +# Create slirp0 TAP for slirp4netns (slirp4netns will attach to this) ip tuntap add {slirp_dev} mode tap -ip addr add {slirp_ip} dev {slirp_dev} +ip addr add 10.0.2.1/24 dev {slirp_dev} ip link set {slirp_dev} up # Create TAP device for Firecracker (must exist before Firecracker starts) @@ -177,24 +172,23 @@ ip link set {fc_tap} up # Set up loopback ip link set lo up -# Set default route via slirp gateway +# Enable IP forwarding (required for NAT to work) +sysctl -w net.ipv4.ip_forward=1 + +# Set default route via slirp gateway (10.0.2.2 is slirp4netns internal gateway) ip route add default via 10.0.2.2 dev {slirp_dev} -# Set up iptables MASQUERADE for traffic from guest subnet -# This NATs guest traffic (192.168.x.x) to slirp0's address (10.0.2.100) -iptables -t nat -A POSTROUTING -s {guest_subnet} -o {slirp_dev} -j MASQUERADE 2>/dev/null || true +# Allow forwarding between slirp0 and FC TAP +iptables -A FORWARD -i {slirp_dev} -o {fc_tap} -j ACCEPT 2>/dev/null || true +iptables -A FORWARD -i {fc_tap} -o {slirp_dev} -j ACCEPT 2>/dev/null || true -# Set up DNAT for inbound connections from slirp4netns -# When slirp4netns forwards traffic to 10.0.2.100, redirect it to the actual guest IP -# This enables port forwarding: host -> slirp4netns -> 10.0.2.100 -> DNAT -> guest (192.168.x.2) -iptables -t nat -A PREROUTING -d 10.0.2.100 -j DNAT --to-destination {guest_ip} 2>/dev/null || true +# Set up iptables MASQUERADE for traffic from guest subnet (egress) +iptables -t nat -A POSTROUTING -s {guest_subnet} -o {slirp_dev} -j MASQUERADE 2>/dev/null || true "#, slirp_dev = self.slirp_device, - slirp_ip = self.slirp_cidr, fc_tap = self.tap_device, ns_ip = self.namespace_ip, guest_subnet = self.guest_subnet, - guest_ip = self.guest_ip, ) } @@ -239,11 +233,12 @@ iptables -t nat -A PREROUTING -d 10.0.2.100 -j DNAT --to-destination {guest_ip} namespace_pid = namespace_pid, slirp_tap = %self.slirp_device, api_socket = %api_socket.display(), - "starting slirp4netns (attaching to existing TAP)" + "starting slirp4netns (creating TAP, no IP assignment)" ); - // Start slirp4netns WITHOUT --configure (TAP already exists and is configured) - // slirp4netns will attach to the existing TAP device + // Start slirp4netns WITHOUT --configure so it doesn't assign an IP + // This avoids the issue where DNAT doesn't work for local addresses + // The TAP is created and connected, but we handle routing ourselves let mut cmd = Command::new("slirp4netns"); cmd.arg("--ready-fd") .arg(ready_write_raw.to_string()) @@ -347,59 +342,6 @@ iptables -t nat -A PREROUTING -d 10.0.2.100 -j DNAT --to-destination {guest_ip} Ok(()) } - /// Setup health check port forward (loopback_ip:8080 → guest:80) - /// - /// Uses port 8080 on host (unprivileged) forwarding to port 80 in guest. - /// This is fully rootless - no capabilities or sudo needed. - /// Linux routes all of 127.0.0.0/8 to loopback without needing `ip addr add`. - async fn setup_health_check_forward(&self, loopback_ip: &str) -> Result<()> { - let api_socket = self - .api_socket_path - .as_ref() - .context("API socket not configured")?; - - // Forward from unprivileged port 8080 on host to port 80 in guest - // Port 8080 doesn't require CAP_NET_BIND_SERVICE - let request = serde_json::json!({ - "execute": "add_hostfwd", - "arguments": { - "proto": "tcp", - "host_addr": loopback_ip, - "host_port": 8080, - "guest_addr": "10.0.2.100", - "guest_port": 80 - } - }); - - info!( - loopback_ip = %loopback_ip, - guest_ip = %self.guest_ip, - "setting up health check port forward (8080 -> 80) - fully rootless!" - ); - - let mut stream = UnixStream::connect(api_socket) - .await - .context("connecting to slirp4netns API socket")?; - - let request_str = serde_json::to_string(&request)? + "\n"; - stream.write_all(request_str.as_bytes()).await?; - stream.shutdown().await?; - - let mut reader = BufReader::new(stream); - let mut response_line = String::new(); - reader.read_line(&mut response_line).await?; - - debug!(response = %response_line.trim(), "slirp4netns health check forward response"); - - if response_line.contains("error") { - warn!(response = %response_line.trim(), "health check port forwarding may have failed"); - } else { - info!("health check port forwarding configured successfully"); - } - - Ok(()) - } - /// Get guest IP address for kernel boot args pub fn guest_ip(&self) -> &str { &self.guest_ip @@ -454,12 +396,10 @@ impl NetworkManager for SlirpNetwork { self.start_slirp(holder_pid).await?; - // Set up health check port forward (loopback_ip:80 → guest:80) - // No ip addr add needed - Linux routes all of 127.0.0.0/8 to loopback! - if let Some(loopback_ip) = &self.loopback_ip { - self.setup_health_check_forward(loopback_ip).await?; - } + // Health checks now use nsenter to curl the guest directly + // No port forwarding needed for health checks + // User-specified port mappings still use slirp4netns port forwarding if !self.port_mappings.is_empty() { self.setup_port_forwarding().await?; } diff --git a/src/network/veth.rs b/src/network/veth.rs index 4486fab3..12763676 100644 --- a/src/network/veth.rs +++ b/src/network/veth.rs @@ -1,42 +1,9 @@ use anyhow::{Context, Result}; -use std::time::Duration; use tokio::process::Command; use tracing::{debug, info, warn}; use super::namespace::exec_in_namespace; -/// Wait for dnsmasq to bind to a specific IP address on port 53 -/// -/// dnsmasq with `bind-dynamic` detects new interfaces and binds to them, -/// but this takes time. We must wait for it to bind before VMs can use DNS. -async fn wait_for_dnsmasq_bind(ip: &str) -> Result<()> { - let check_addr = format!("{}:53", ip); - - for attempt in 0..50 { - // Check if anything is listening on this IP:53 - let output = Command::new("ss") - .args(["-uln", "sport", "=", ":53"]) - .output() - .await - .context("checking if dnsmasq is listening")?; - - let stdout = String::from_utf8_lossy(&output.stdout); - if stdout.contains(ip) { - if attempt > 0 { - debug!(ip = %ip, attempts = attempt, "dnsmasq now listening"); - } - return Ok(()); - } - - tokio::time::sleep(Duration::from_millis(20)).await; - } - - anyhow::bail!( - "dnsmasq did not bind to {} within 1 second - check dnsmasq config", - check_addr - ); -} - /// In-Namespace NAT configuration for clone egress /// /// When clones are restored from a snapshot, they all have the same guest IP @@ -201,10 +168,8 @@ pub async fn setup_host_veth(veth_name: &str, ip_with_cidr: &str) -> Result<()> } } - // Wait for dnsmasq to bind to this IP (bind-dynamic detection) - // VMs use this IP as their DNS server, so dnsmasq must be listening before VM boots - let ip = ip_with_cidr.split('/').next().unwrap_or(ip_with_cidr); - wait_for_dnsmasq_bind(ip).await?; + // DNS: VMs now use host DNS servers directly (read from /etc/resolv.conf) + // No dnsmasq needed - the host DNS servers are reachable via the veth bridge // Add FORWARD rule to allow outbound traffic from this veth let forward_rule = format!("-A FORWARD -i {} -j ACCEPT", veth_name); diff --git a/src/setup/rootfs.rs b/src/setup/rootfs.rs index 01515e02..2100f36c 100644 --- a/src/setup/rootfs.rs +++ b/src/setup/rootfs.rs @@ -112,10 +112,29 @@ pub async fn ensure_rootfs() -> Result { info!("note: first-time cloud image download may take 5-15 minutes"); info!("cached rootfs creation takes ~45 seconds"); - let result = create_ubuntu_rootfs(&rootfs_path) + // Create at temp path first, then rename when complete to avoid race conditions. + // Other processes check if rootfs_path exists, so we must not create it until + // package installation is complete. + let temp_rootfs_path = rootfs_path.with_extension("ext4.tmp"); + + // Clean up any leftover temp file from a previous failed attempt + let _ = tokio::fs::remove_file(&temp_rootfs_path).await; + + let result = create_ubuntu_rootfs(&temp_rootfs_path) .await .context("creating Ubuntu rootfs"); + // If successful, rename temp file to final path + if result.is_ok() { + tokio::fs::rename(&temp_rootfs_path, &rootfs_path) + .await + .context("renaming temp rootfs to final path")?; + info!("rootfs creation complete"); + } else { + // Clean up temp file on failure + let _ = tokio::fs::remove_file(&temp_rootfs_path).await; + } + // Release lock flock .unlock() @@ -125,8 +144,6 @@ pub async fn ensure_rootfs() -> Result { result?; - info!("rootfs creation complete"); - Ok(rootfs_path) } diff --git a/tests/test_egress.rs b/tests/test_egress.rs index 501fc37e..f067bdc2 100644 --- a/tests/test_egress.rs +++ b/tests/test_egress.rs @@ -72,7 +72,7 @@ async fn egress_fresh_test_impl(network: &str) -> Result<()> { .context("spawning VM")?; println!(" Waiting for VM to become healthy (PID: {})...", vm_pid); - common::poll_health_by_pid(vm_pid, 60).await?; + common::poll_health_by_pid(vm_pid, 180).await?; println!(" ✓ VM healthy"); // Step 2: Test egress @@ -137,7 +137,11 @@ async fn egress_clone_test_impl(network: &str) -> Result<()> { " Waiting for baseline VM to become healthy (PID: {})...", baseline_pid ); - common::poll_health_by_pid(baseline_pid, 60).await?; + // Use 300 second timeout to account for rootfs creation on first run + if let Err(e) = common::poll_health_by_pid(baseline_pid, 300).await { + common::kill_process(baseline_pid).await; + return Err(e.context("baseline VM failed to become healthy")); + } println!(" ✓ Baseline VM healthy"); // Test egress on baseline first diff --git a/tests/test_exec.rs b/tests/test_exec.rs index 8166592e..96791263 100644 --- a/tests/test_exec.rs +++ b/tests/test_exec.rs @@ -45,7 +45,7 @@ async fn exec_test_impl(network: &str) -> Result<()> { // Wait for VM to become healthy println!(" Waiting for VM to become healthy..."); - if let Err(e) = common::poll_health_by_pid(fcvm_pid, 60).await { + if let Err(e) = common::poll_health_by_pid(fcvm_pid, 180).await { common::kill_process(fcvm_pid).await; return Err(e.context("VM failed to become healthy")); } diff --git a/tests/test_sanity.rs b/tests/test_sanity.rs index 64168254..0356590f 100644 --- a/tests/test_sanity.rs +++ b/tests/test_sanity.rs @@ -43,8 +43,9 @@ async fn sanity_test_impl(network: &str) -> Result<()> { println!(" Waiting for VM to become healthy..."); // Spawn health check task - // Use 180 second timeout to account for rootfs creation on first run (~60 sec) - let health_task = tokio::spawn(common::poll_health_by_pid(fcvm_pid, 180)); + // Use 300 second timeout to account for rootfs creation on first run + // (cloud image download ~7s, virt-customize ~10-60s, extraction ~30s, packages ~60s) + let health_task = tokio::spawn(common::poll_health_by_pid(fcvm_pid, 300)); // Monitor process for unexpected exits let monitor_task: tokio::task::JoinHandle> =