Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions fc-agent/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,14 @@ pub async fn run() -> Result<()> {
// Store prefix globally so exec server and health checks can use it
container::set_podman_cmd_prefix(cmd_prefix.clone());

// Reset podman state to match storage.conf before the first real podman operation.
// By this point, storage setup is complete (btrfs loopback mounted, storage.conf
// written with correct driver). Reset ensures db.sql matches storage.conf even if
// concurrent health monitor `podman inspect` created stale state during setup.
container::reset_podman_state();
// Reset root podman state to match storage.conf. The health monitor may have
// run `podman inspect` via the exec server during setup, creating a stale
// db.sql with the wrong graph driver. Only needed for root podman — user mode
// already resets in create_vm_user(), and a root reset would destroy the
// user's storage directory.
if cmd_prefix.is_empty() {
container::reset_podman_state();
}

// Prepare image based on delivery mode
let image_ref = match (plan.image_mode.as_deref(), &plan.image_device) {
Expand Down
16 changes: 7 additions & 9 deletions fc-agent/src/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,17 +356,15 @@ pub fn setup_btrfs_storage_if_available() {
);
}

/// Reset podman state to match the current storage.conf.
/// Reset root podman state to match the current storage.conf.
///
/// Must be called after storage.conf is written (by btrfs/overlay setup) and
/// immediately before the first real podman operation (pull/load/run).
/// Fixes "database graph driver does not match" errors caused by the health
/// monitor running `podman inspect` via exec before storage setup completes,
/// creating db.sql with an empty or wrong driver.
///
/// This fixes "database graph driver does not match" errors caused by:
/// 1. Stale db.sql from rootfs build (apt post-install creates it with driver="")
/// 2. Concurrent health monitor `podman inspect` recreating db.sql during setup
///
/// `podman system reset --force` atomically drops and recreates all podman state
/// to match the current storage.conf, eliminating any driver mismatch.
/// Only call for root podman (empty cmd_prefix). User-mode podman already
/// resets in create_vm_user(). A root reset would destroy the user's btrfs
/// storage subdirectory at /var/lib/containers/storage/user-{uid}.
pub fn reset_podman_state() {
match std::process::Command::new("podman")
.args(["system", "reset", "--force"])
Expand Down
20 changes: 18 additions & 2 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1042,9 +1042,10 @@ pub async fn ensure_nested_image() -> anyhow::Result<()> {
ensure_nested_container("localhost/nested-test", "Containerfile.nested").await
}

/// Build a container image for nested testing.
/// Build a container image for nested testing, serialized via file lock.
///
/// Always runs podman build - relies on podman's layer caching for speed.
/// Uses an exclusive file lock to prevent concurrent builds from racing on
/// overlay unmount. Redundant builds are fast due to podman's layer caching.
/// If the container extends localhost/nested-test, call ensure_nested_image() first.
///
/// # Arguments
Expand All @@ -1054,6 +1055,19 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a
let fcvm_path = find_fcvm_binary()?;
let fcvm_dir = fcvm_path.parent().unwrap();

// Serialize concurrent builds with a file lock. Multiple nextest processes
// may call this simultaneously; without locking, concurrent `podman build`
// races on overlay unmount and corrupts the build cache (x64-specific).
let lock_name = image_name.replace('/', "-");
let lock_path = format!("/tmp/fcvm-build-{}.lock", lock_name);
let lock_file = std::fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(false)
.open(&lock_path)
.context("creating build lock file")?;
lock_file.lock_exclusive().context("acquiring build lock")?;

// Copy binaries to build context (needed for nested-test base)
if image_name == "localhost/nested-test" {
let profile = fcvm::setup::get_kernel_profile("nested")?
Expand All @@ -1080,6 +1094,7 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a
.context("running podman build")?;

if !output.status.success() {
drop(lock_file);
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Failed to build {}: {}", image_name, stderr);
}
Expand Down Expand Up @@ -1130,6 +1145,7 @@ pub async fn ensure_nested_container(image_name: &str, containerfile: &str) -> a
println!("✓ {} built", image_name);
}

drop(lock_file);
Ok(())
}

Expand Down