From 73d81f930516d5cb64ef3bbf11935d7ef3fbe6f5 Mon Sep 17 00:00:00 2001 From: "claude[bot]" Date: Tue, 3 Feb 2026 02:14:14 +0000 Subject: [PATCH] fix: wait for network restore before exec in snapshot clones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After snapshot restore with slirp4netns, fc-agent needs time to complete network initialization before container exec will work: 1. fc-agent polls MMDS for restore-epoch every 100ms 2. When detected, it runs handle_clone_restore() which: - Flushes ARP cache - Sends gratuitous NDP NA for IPv6 - This announces the VM's MAC to the new slirp4netns process Without this delay, `fcvm snapshot run --exec` fails with exit code 125 ("no such container") because the container's IPv6 networking isn't ready. The test `test_snapshot_run_exec_rootless` was failing with: exec_output_found=true (command ran) exit_success=false (exit code 125) This fix adds a 300ms delay after vsock socket is ready: - 100ms max for restore-epoch detection - 200ms for network setup (ARP flush + NDP NA) This ensures IPv6 routing is established before trying to exec into the container, allowing Podman to communicate properly. Fixes CI test: test_snapshot_run_exec_rootless Fixes CI test: test_snapshot_run_exec_bridged 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/commands/snapshot.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/commands/snapshot.rs b/src/commands/snapshot.rs index f6cee3c2..5ae61acd 100644 --- a/src/commands/snapshot.rs +++ b/src/commands/snapshot.rs @@ -825,6 +825,16 @@ pub async fn cmd_snapshot_run(args: SnapshotRunArgs) -> Result<()> { tokio::time::sleep(VSOCK_POLL_INTERVAL).await; } + + // After snapshot restore, fc-agent needs time to complete network initialization: + // - Detect restore-epoch change (polls every 100ms) + // - Execute handle_clone_restore(): flush ARP, send NDP NA for IPv6 + // Without this delay, container exec fails with "no such container" because + // the container's network isn't fully ready (IPv6 NDP not announced to slirp4netns). + // 300ms allows: 100ms max for restore-epoch detection + 200ms for network setup. + info!("waiting 300ms for fc-agent to complete network restore after snapshot clone"); + tokio::time::sleep(Duration::from_millis(300)).await; + let exit_code = crate::commands::exec::run_exec_in_vm( &vsock_socket, &cmd_args,