diff --git a/fc-agent/src/agent.rs b/fc-agent/src/agent.rs index c9391e1b..9696ab6c 100644 --- a/fc-agent/src/agent.rs +++ b/fc-agent/src/agent.rs @@ -162,11 +162,14 @@ pub async fn run() -> Result<()> { // Store prefix globally so exec server and health checks can use it container::set_podman_cmd_prefix(cmd_prefix.clone()); - // Reset podman state to match storage.conf before the first real podman operation. - // By this point, storage setup is complete (btrfs loopback mounted, storage.conf - // written with correct driver). Reset ensures db.sql matches storage.conf even if - // concurrent health monitor `podman inspect` created stale state during setup. - container::reset_podman_state(); + // Reset root podman state to match storage.conf. The health monitor may have + // run `podman inspect` via the exec server during setup, creating a stale + // db.sql with the wrong graph driver. Only needed for root podman — user mode + // already resets in create_vm_user(), and a root reset would destroy the + // user's storage directory. + if cmd_prefix.is_empty() { + container::reset_podman_state(); + } // Prepare image based on delivery mode let image_ref = match (plan.image_mode.as_deref(), &plan.image_device) { diff --git a/fc-agent/src/container.rs b/fc-agent/src/container.rs index 89adee5e..3596a708 100644 --- a/fc-agent/src/container.rs +++ b/fc-agent/src/container.rs @@ -356,17 +356,15 @@ pub fn setup_btrfs_storage_if_available() { ); } -/// Reset podman state to match the current storage.conf. +/// Reset root podman state to match the current storage.conf. /// -/// Must be called after storage.conf is written (by btrfs/overlay setup) and -/// immediately before the first real podman operation (pull/load/run). +/// Fixes "database graph driver does not match" errors caused by the health +/// monitor running `podman inspect` via exec before storage setup completes, +/// creating db.sql with an empty or wrong driver. /// -/// This fixes "database graph driver does not match" errors caused by: -/// 1. Stale db.sql from rootfs build (apt post-install creates it with driver="") -/// 2. Concurrent health monitor `podman inspect` recreating db.sql during setup -/// -/// `podman system reset --force` atomically drops and recreates all podman state -/// to match the current storage.conf, eliminating any driver mismatch. +/// Only call for root podman (empty cmd_prefix). User-mode podman already +/// resets in create_vm_user(). A root reset would destroy the user's btrfs +/// storage subdirectory at /var/lib/containers/storage/user-{uid}. pub fn reset_podman_state() { match std::process::Command::new("podman") .args(["system", "reset", "--force"]) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 915f2fa2..4d4b78c7 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1042,9 +1042,10 @@ pub async fn ensure_nested_image() -> anyhow::Result<()> { ensure_nested_container("localhost/nested-test", "Containerfile.nested").await } -/// Build a container image for nested testing. +/// Build a container image for nested testing, serialized via file lock. /// -/// Always runs podman build - relies on podman's layer caching for speed. +/// Uses an exclusive file lock to prevent concurrent builds from racing on +/// overlay unmount. Redundant builds are fast due to podman's layer caching. /// If the container extends localhost/nested-test, call ensure_nested_image() first. /// /// # Arguments @@ -1054,6 +1055,19 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a let fcvm_path = find_fcvm_binary()?; let fcvm_dir = fcvm_path.parent().unwrap(); + // Serialize concurrent builds with a file lock. Multiple nextest processes + // may call this simultaneously; without locking, concurrent `podman build` + // races on overlay unmount and corrupts the build cache (x64-specific). + let lock_name = image_name.replace('/', "-"); + let lock_path = format!("/tmp/fcvm-build-{}.lock", lock_name); + let lock_file = std::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(false) + .open(&lock_path) + .context("creating build lock file")?; + lock_file.lock_exclusive().context("acquiring build lock")?; + // Copy binaries to build context (needed for nested-test base) if image_name == "localhost/nested-test" { let profile = fcvm::setup::get_kernel_profile("nested")? @@ -1080,6 +1094,7 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a .context("running podman build")?; if !output.status.success() { + drop(lock_file); let stderr = String::from_utf8_lossy(&output.stderr); anyhow::bail!("Failed to build {}: {}", image_name, stderr); } @@ -1130,6 +1145,7 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a println!("✓ {} built", image_name); } + drop(lock_file); Ok(()) }