From eb0703cca692f765ac9361fec5e89fa5101d08fb Mon Sep 17 00:00:00 2001 From: "claude[bot]" Date: Mon, 2 Mar 2026 08:48:58 +0000 Subject: [PATCH] fix: remove dead nsenter branch and update stale comments/docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After PR #541 switched rootless baselines to use pre_exec setns, the nsenter code path in VmManager::start() became unreachable (user_namespace_path is always set when holder_pid is set). Remove the dead branch and update all stale comments and DESIGN.md to reflect that ALL rootless VMs now use pre_exec setns. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- DESIGN.md | 2 +- src/firecracker/vm.rs | 73 ++++++++++++++----------------------------- 2 files changed, 25 insertions(+), 50 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index e4b2815d..bfd9ec38 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -469,7 +469,7 @@ pasta <──────────────────┼── pasta0 2. Pre-pasta setup via nsenter: create Firecracker TAP device only 3. Start pasta attached to holder's namespace (creates pasta0 TAP) 4. Post-pasta setup via nsenter: create bridge, attach pasta0 + tap-fc, add namespace IP -5. Run Firecracker via nsenter: `nsenter -t HOLDER_PID -U -n -- firecracker ...` +5. Run Firecracker via pre_exec setns (enters user+net namespace, then sets PR_SET_PDEATHSIG) 6. Health checks via nsenter: `nsenter -t HOLDER_PID -U -n -- curl 10.0.2.100:80` **Pre-Pasta Setup Script** (Phase 2, executed via nsenter): diff --git a/src/firecracker/vm.rs b/src/firecracker/vm.rs index 9de15e88..0fcaa72d 100644 --- a/src/firecracker/vm.rs +++ b/src/firecracker/vm.rs @@ -36,9 +36,9 @@ pub struct VmManager { socket_path: PathBuf, log_path: Option, namespace_id: Option, - holder_pid: Option, // namespace holder PID for rootless mode (use nsenter to run FC) - user_namespace_path: Option, // User namespace path for rootless clones (enter via setns in pre_exec) - net_namespace_path: Option, // Net namespace path for rootless clones (enter via setns in pre_exec) + holder_pid: Option, // namespace holder PID for rootless mode (health checks + cleanup) + user_namespace_path: Option, // User namespace path for rootless VMs (enter via setns in pre_exec) + net_namespace_path: Option, // Net namespace path for rootless VMs (enter via setns in pre_exec) mount_redirects: Option<(Vec, PathBuf)>, // (baseline_dirs, clone_dir) for mount namespace isolation process: Option, client: Option, @@ -76,35 +76,33 @@ impl VmManager { /// Set namespace holder PID for rootless networking /// - /// When set, Firecracker will be launched inside an existing user+net namespace - /// via nsenter. The holder process (created by `unshare --user --net -- sleep infinity`) - /// keeps the namespace alive while Firecracker runs. - /// - /// UID/GID mappings are written externally by `setup_namespace_mappings()`, which - /// maps the current user (UID 1000) to UID 0 inside the namespace. Combined with - /// `--preserve-credentials` when using nsenter, no chmod is needed on sockets or - /// files - the user has access both inside and outside. + /// The holder process (created by `unshare --user --net -- sleep infinity`) + /// keeps the namespace alive while Firecracker runs. The holder PID is used + /// for health checks (nsenter curl) and cleanup, not for launching Firecracker + /// (which uses pre_exec setns via set_user_namespace_path/set_net_namespace_path). pub fn set_holder_pid(&mut self, pid: u32) { self.holder_pid = Some(pid); } - /// Set user namespace path for rootless clones + /// Set user namespace path for rootless VMs (baselines + clones) /// - /// When set along with mount_redirects, pre_exec will enter this user namespace - /// first (via setns) before doing mount operations. This gives CAP_SYS_ADMIN - /// inside the user namespace, allowing unshare(CLONE_NEWNS) to succeed. + /// When set, pre_exec will enter this user namespace (via setns) before + /// launching Firecracker. This is used for ALL rootless VMs because nsenter's + /// internal setns(CLONE_NEWUSER) clears PR_SET_PDEATHSIG (kernel zeros + /// task->pdeath_signal on credential changes). The pre_exec path sets + /// pdeathsig AFTER entering the namespace, ensuring Firecracker is killed + /// when fcvm dies. /// - /// Use this instead of set_holder_pid when mount namespace isolation is needed, - /// since nsenter wrapper runs AFTER pre_exec. + /// For clones with mount_redirects, pre_exec also enters the user namespace + /// first to get CAP_SYS_ADMIN for mount operations. pub fn set_user_namespace_path(&mut self, path: PathBuf) { self.user_namespace_path = Some(path); } - /// Set network namespace path for rootless clones + /// Set network namespace path for rootless VMs (baselines + clones) /// /// When set, pre_exec will enter this network namespace (via setns) after - /// completing mount operations. Use with set_user_namespace_path for - /// rootless clones that need mount namespace isolation. + /// entering the user namespace and completing any mount operations. pub fn set_net_namespace_path(&mut self, path: PathBuf) { self.net_namespace_path = Some(path); } @@ -147,42 +145,19 @@ impl VmManager { // Build command based on mode: // 1. user_namespace_path set: direct Firecracker (namespaces entered via pre_exec setns) - // 2. holder_pid set (no user_namespace_path): use nsenter to enter existing namespace (rootless baseline) - // 3. neither: direct Firecracker (privileged/bridged mode) - // - // For rootless clones with mount_redirects, we MUST use pre_exec setns instead of nsenter, - // because pre_exec runs BEFORE nsenter would enter the namespace, and we need CAP_SYS_ADMIN - // from the user namespace to do mount operations. + // Used for ALL rootless VMs (baselines + clones) because nsenter's internal + // setns(CLONE_NEWUSER) clears PR_SET_PDEATHSIG (kernel zeros task->pdeath_signal + // on credential changes). The pre_exec path sets pdeathsig AFTER setns. + // For clones with mount_redirects, pre_exec also handles mount namespace isolation. + // 2. neither: direct Firecracker (privileged/bridged mode) let mut cmd = if self.user_namespace_path.is_some() { // Use direct Firecracker - namespaces will be entered via setns in pre_exec. - // Used for ALL rootless VMs (baselines + clones) because nsenter's internal - // setns(CLONE_NEWUSER) clears PR_SET_PDEATHSIG, leaving Firecracker orphaned - // if fcvm is SIGKILL'd. The pre_exec path sets pdeathsig AFTER setns. info!(target: "vm", vm_id = %self.vm_id, "using pre_exec setns for rootless VM"); let mut c = Command::new(firecracker_bin); c.arg("--api-sock").arg(&self.socket_path); c - } else if let Some(holder_pid) = self.holder_pid { - // Use nsenter to enter user+network namespace with preserved credentials - // --preserve-credentials keeps UID, GID, and supplementary groups (including kvm) - // This allows KVM access while being in the isolated network namespace - // NOTE: This path is for baseline VMs that don't need mount namespace isolation - info!(target: "vm", vm_id = %self.vm_id, holder_pid = holder_pid, "using nsenter for rootless networking"); - let mut c = Command::new("nsenter"); - c.args([ - "-t", - &holder_pid.to_string(), - "-U", - "-n", - "--preserve-credentials", - "--", - ]); - c.arg(firecracker_bin) - .arg("--api-sock") - .arg(&self.socket_path); - c } else { - // Direct Firecracker invocation (privileged mode) + // Direct Firecracker invocation (privileged/bridged mode) let mut c = Command::new(firecracker_bin); c.arg("--api-sock").arg(&self.socket_path); c