Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 4 additions & 20 deletions .claude/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -667,23 +667,6 @@ Run `make help` for full list. Key targets:
└── cache/ # Downloaded cloud images
```

### One-Time Setup (dnsmasq)

```bash
sudo apt-get update
sudo apt-get install -y dnsmasq

# dnsmasq for DNS forwarding to VMs (bind-dynamic listens on dynamically created TAP devices)
sudo tee /etc/dnsmasq.d/fcvm.conf > /dev/null <<EOF
bind-dynamic
server=8.8.8.8
server=8.8.4.4
no-resolv
cache-size=1000
EOF
sudo systemctl restart dnsmasq
```

## Key Learnings

### Serial Console
Expand All @@ -709,9 +692,10 @@ ip addr add 172.16.29.1/24 dev tap-vm-c93e8 # Guest is 172.16.29.2
- On other clouds: use bare-metal or hosts with nested virtualization

### DNS Resolution in VMs
- Problem: Container image pulls failing with DNS timeout
- Root cause: VMs configured to use 8.8.8.8 but NAT wasn't forwarding DNS properly
- Fix: Install dnsmasq on host with `bind-dynamic` to listen on TAP devices
- VMs use host's DNS servers directly (read from `/etc/resolv.conf`)
- For systemd-resolved hosts, falls back to `/run/systemd/resolve/resolv.conf`
- Traffic flows: Guest → NAT → Host's DNS servers
- No dnsmasq required

### Pipe Buffer Deadlock in Tests (CRITICAL)

Expand Down
17 changes: 17 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: 2
updates:
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 5
groups:
minor-and-patch:
update-types:
- "minor"
- "patch"

- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
41 changes: 4 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,19 +255,6 @@ jobs:
ls -la /dev/nbd* | head -5
- name: Setup network namespace directory
run: sudo mkdir -p /var/run/netns
- name: Setup dnsmasq for VM DNS
run: |
sudo apt-get update
sudo apt-get install -y dnsmasq
sudo tee /etc/dnsmasq.d/fcvm.conf > /dev/null <<EOF
bind-dynamic
server=8.8.8.8
server=8.8.4.4
no-resolv
cache-size=1000
EOF
sudo systemctl restart dnsmasq
sudo systemctl status dnsmasq
- name: Setup iptables for VM networking
run: |
# BuildJet runners have FORWARD chain set to DROP by default
Expand All @@ -285,6 +272,8 @@ jobs:
test-vm-exec:
name: VM Exec
runs-on: buildjet-32vcpu-ubuntu-2204
needs: test-vm-sanity # Sequential: flock doesn't work across podman containers sharing /dev/nbd0
if: always() # Run even if previous job failed (rootfs will be cached after first success)
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -305,18 +294,6 @@ jobs:
run: sudo modprobe nbd max_part=8
- name: Setup network namespace directory
run: sudo mkdir -p /var/run/netns
- name: Setup dnsmasq for VM DNS
run: |
sudo apt-get update
sudo apt-get install -y dnsmasq
sudo tee /etc/dnsmasq.d/fcvm.conf > /dev/null <<EOF
bind-dynamic
server=8.8.8.8
server=8.8.4.4
no-resolv
cache-size=1000
EOF
sudo systemctl restart dnsmasq
- name: Setup iptables for VM networking
run: |
sudo iptables -P FORWARD ACCEPT
Expand All @@ -332,6 +309,8 @@ jobs:
test-vm-egress:
name: VM Egress
runs-on: buildjet-32vcpu-ubuntu-2204
needs: test-vm-exec # Sequential: flock doesn't work across podman containers sharing /dev/nbd0
if: always() # Run even if previous job failed (rootfs will be cached after first success)
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -352,18 +331,6 @@ jobs:
run: sudo modprobe nbd max_part=8
- name: Setup network namespace directory
run: sudo mkdir -p /var/run/netns
- name: Setup dnsmasq for VM DNS
run: |
sudo apt-get update
sudo apt-get install -y dnsmasq
sudo tee /etc/dnsmasq.d/fcvm.conf > /dev/null <<EOF
bind-dynamic
server=8.8.8.8
server=8.8.4.4
no-resolv
cache-size=1000
EOF
sudo systemctl restart dnsmasq
- name: Setup iptables for VM networking
run: |
sudo iptables -P FORWARD ACCEPT
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ shell-words = "1"
fuse-pipe = { path = "fuse-pipe", default-features = false }
url = "2"
tokio-util = "0.7"
regex = "1.12.2"

[dev-dependencies]
serial_test = "3"
Expand Down
7 changes: 4 additions & 3 deletions Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*

# Download and install Firecracker (architecture-aware)
# v1.14.0 adds network_overrides support for snapshot cloning
ARG ARCH=aarch64
RUN curl -L -o /tmp/firecracker.tgz \
https://github.com/firecracker-microvm/firecracker/releases/download/v1.10.1/firecracker-v1.10.1-${ARCH}.tgz \
https://github.com/firecracker-microvm/firecracker/releases/download/v1.14.0/firecracker-v1.14.0-${ARCH}.tgz \
&& tar -xzf /tmp/firecracker.tgz -C /tmp \
&& mv /tmp/release-v1.10.1-${ARCH}/firecracker-v1.10.1-${ARCH} /usr/local/bin/firecracker \
&& mv /tmp/release-v1.14.0-${ARCH}/firecracker-v1.14.0-${ARCH} /usr/local/bin/firecracker \
&& chmod +x /usr/local/bin/firecracker \
&& rm -rf /tmp/firecracker.tgz /tmp/release-v1.10.1-${ARCH}
&& rm -rf /tmp/firecracker.tgz /tmp/release-v1.14.0-${ARCH}

# Build and install pjdfstest (tests expect it at /tmp/pjdfstest-check/)
RUN git clone --depth 1 https://github.com/pjd/pjdfstest /tmp/pjdfstest-check \
Expand Down
3 changes: 2 additions & 1 deletion fc-agent/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,8 @@ fn send_status_to_host(message: &[u8]) -> bool {
}

// Send message
let written = unsafe { libc::write(fd, message.as_ptr() as *const libc::c_void, message.len()) };
let written =
unsafe { libc::write(fd, message.as_ptr() as *const libc::c_void, message.len()) };
unsafe { libc::close(fd) };

written == message.len() as isize
Expand Down
3 changes: 3 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[toolchain]
channel = "1.92.0"
components = ["rustfmt", "clippy"]
53 changes: 53 additions & 0 deletions src/commands/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,59 @@ pub const VSOCK_VOLUME_PORT_BASE: u32 = 5000;
/// Vsock port for status channel (fc-agent notifies when container starts)
pub const VSOCK_STATUS_PORT: u32 = 4999;

/// Minimum required Firecracker version for network_overrides support
const MIN_FIRECRACKER_VERSION: (u32, u32, u32) = (1, 13, 1);

/// Find and validate Firecracker binary
///
/// Returns the path to the Firecracker binary if it exists and meets minimum version requirements.
/// Fails with a clear error if Firecracker is not found or version is too old.
pub fn find_firecracker() -> Result<std::path::PathBuf> {
let firecracker_bin = which::which("firecracker").context("firecracker not found in PATH")?;

// Check version
let output = std::process::Command::new(&firecracker_bin)
.arg("--version")
.output()
.context("failed to run firecracker --version")?;

let version_str = String::from_utf8_lossy(&output.stdout);
let version = parse_firecracker_version(&version_str)?;

if version < MIN_FIRECRACKER_VERSION {
anyhow::bail!(
"Firecracker version {}.{}.{} is too old. Minimum required: {}.{}.{} (for network_overrides support in snapshot cloning)",
version.0, version.1, version.2,
MIN_FIRECRACKER_VERSION.0, MIN_FIRECRACKER_VERSION.1, MIN_FIRECRACKER_VERSION.2
);
}

debug!(
"Found Firecracker {}.{}.{} at {:?}",
version.0, version.1, version.2, firecracker_bin
);

Ok(firecracker_bin)
}

/// Parse Firecracker version from --version output
///
/// Expected format: "Firecracker v1.14.0" or similar
fn parse_firecracker_version(output: &str) -> Result<(u32, u32, u32)> {
// Find version number pattern vX.Y.Z
let version_re = regex::Regex::new(r"v?(\d+)\.(\d+)\.(\d+)").context("invalid regex")?;

let caps = version_re
.captures(output)
.context("could not parse Firecracker version from output")?;

let major: u32 = caps[1].parse().context("invalid major version")?;
let minor: u32 = caps[2].parse().context("invalid minor version")?;
let patch: u32 = caps[3].parse().context("invalid patch version")?;

Ok((major, minor, patch))
}

/// Save VM state with complete network configuration
///
/// This function ensures both baseline and clone VMs save identical network data,
Expand Down
3 changes: 1 addition & 2 deletions src/commands/podman.rs
Original file line number Diff line number Diff line change
Expand Up @@ -689,8 +689,7 @@ async fn run_vm_setup(
holder_child = None;
}

let firecracker_bin = which::which("firecracker")
.context("firecracker not found in PATH")?;
let firecracker_bin = super::common::find_firecracker()?;

vm_manager
.start(&firecracker_bin, None)
Expand Down
3 changes: 1 addition & 2 deletions src/commands/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1021,8 +1021,7 @@ async fn run_clone_setup(
);
vm_manager.set_vsock_redirect(baseline_dir, data_dir.to_path_buf());

let firecracker_bin = which::which("firecracker")
.context("firecracker not found in PATH")?;
let firecracker_bin = super::common::find_firecracker()?;

vm_manager
.start(&firecracker_bin, None)
Expand Down
10 changes: 8 additions & 2 deletions src/firecracker/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ impl FirecrackerClient {

let resp = self.client.request(req).await?;
if resp.status() != StatusCode::NO_CONTENT && resp.status() != StatusCode::OK {
anyhow::bail!("Firecracker API error: {}", resp.status());
let status = resp.status();
let body_bytes = hyper::body::to_bytes(resp.into_body()).await?;
let body_str = String::from_utf8_lossy(&body_bytes);
anyhow::bail!("Firecracker API error: {} - {}", status, body_str);
}
Ok(())
}
Expand All @@ -52,7 +55,10 @@ impl FirecrackerClient {

let resp = self.client.request(req).await?;
if resp.status() != StatusCode::NO_CONTENT && resp.status() != StatusCode::OK {
anyhow::bail!("Firecracker API error: {}", resp.status());
let status = resp.status();
let body_bytes = hyper::body::to_bytes(resp.into_body()).await?;
let body_str = String::from_utf8_lossy(&body_bytes);
anyhow::bail!("Firecracker API error: {} - {}", status, body_str);
}
Ok(())
}
Expand Down
Loading