From 8c492710d54bf2efcd15184a698c14b6bed9bc63 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Tue, 17 Mar 2026 10:56:41 -0700 Subject: [PATCH 01/11] fix: gate sandbox registration on readiness and surface creation failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sandbox create command was piped through awk to deduplicate log lines. In bash, the exit status of a pipeline is the status of the last command (awk, always 0), so creation failures were silently swallowed. NemoClaw then registered a phantom sandbox in ~/.nemoclaw/sandboxes.json that caused "sandbox not found" on every subsequent connect/status call. This is the root cause of the WSL2 + Docker Desktop failures reported in #140 and #152 — sandbox creation fails due to Docker networking issues, but onboarding completes as if it succeeded. Three changes: 1. Remove the awk pipe so the real exit code flows through to run() 2. Poll openshell sandbox list for Ready state before registering (matches the gateway health check pattern at lines 121-132) 3. Move build-context cleanup before the exit-code check so temp files are always cleaned up, even on failure Signed-off-by: Brian Taylor --- bin/lib/onboard.js | 53 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 23f19b01a8..ce1559ec4b 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -434,9 +434,51 @@ async function createSandbox(gpu) { if (process.env.NVIDIA_API_KEY) { envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`); } - // set -o pipefail ensures the openshell exit code propagates through the awk pipe. - // Without it, awk's exit code (always 0) would mask a failed sandbox create. - run(`set -o pipefail; openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1 | awk '/Sandbox allocated/{if(!seen){print;seen=1}next}1'`); + + // Run without piping through awk — the pipe masked non-zero exit codes + // from openshell because bash returns the status of the last pipeline + // command (awk, always 0) unless pipefail is set. Removing the pipe + // lets the real exit code flow through to run(). + const createResult = run( + `openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1`, + { ignoreError: true } + ); + + // Clean up build context regardless of outcome + run(`rm -rf "${buildCtx}"`, { ignoreError: true }); + + if (createResult.status !== 0) { + console.error(""); + console.error(` Sandbox creation failed (exit ${createResult.status}).`); + console.error(" Try: openshell sandbox list # check gateway state"); + console.error(" Try: nemoclaw onboard # retry from scratch"); + process.exit(createResult.status || 1); + } + + // Wait for sandbox to reach Ready state in k3s before registering. + // On WSL2 + Docker Desktop the pod can take longer to initialize; + // without this gate, NemoClaw registers a phantom sandbox that + // causes "sandbox not found" on every subsequent connect/status call. + console.log(" Waiting for sandbox to become ready..."); + let ready = false; + for (let i = 0; i < 30; i++) { + const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true }); + const clean = list.replace(/\x1b\[[0-9;]*m/g, ""); + if (clean.split("\n").some((l) => l.includes(sandboxName) && l.includes("Ready"))) { + ready = true; + break; + } + if (i === 29) break; + require("child_process").spawnSync("sleep", ["2"]); + } + + if (!ready) { + console.error(""); + console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); + console.error(" Check: openshell sandbox list"); + console.error(` Logs: openshell sandbox logs ${sandboxName}`); + process.exit(1); + } // Release any stale forward on port 18789 before claiming it for the new sandbox. // A previous onboard run may have left the port forwarded to a different sandbox, @@ -445,10 +487,7 @@ async function createSandbox(gpu) { // Forward dashboard port to the new sandbox run(`openshell forward start --background 18789 "${sandboxName}"`, { ignoreError: true }); - // Clean up build context - run(`rm -rf "${buildCtx}"`, { ignoreError: true }); - - // Register in registry + // Register only after confirmed ready — prevents phantom entries registry.registerSandbox({ name: sandboxName, gpuEnabled: !!gpu, From 4fd2c6eb934e24aecd70b81f47b2bfeae2dec2de Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Tue, 17 Mar 2026 11:21:15 -0700 Subject: [PATCH 02/11] fix: use word-boundary match for Ready status and fix timeout includes("Ready") falsely matched "NotReady" because "Ready" is a substring. Use a word-boundary regex with a NotReady exclusion so sandboxes stuck in error states are not registered as healthy. Also remove the off-by-one break at i=29 so the loop sleeps the full 60s before timing out. Signed-off-by: Brian Taylor --- bin/lib/onboard.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index ce1559ec4b..851803df78 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -464,11 +464,12 @@ async function createSandbox(gpu) { for (let i = 0; i < 30; i++) { const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true }); const clean = list.replace(/\x1b\[[0-9;]*m/g, ""); - if (clean.split("\n").some((l) => l.includes(sandboxName) && l.includes("Ready"))) { + // Match "Ready" as a whole word — l.includes("Ready") would also match + // "NotReady", defeating the entire readiness gate. + if (clean.split("\n").some((l) => l.includes(sandboxName) && /\bReady\b/.test(l) && !/NotReady/.test(l))) { ready = true; break; } - if (i === 29) break; require("child_process").spawnSync("sleep", ["2"]); } From 68b8e5e07ed576bc7b4efad3bca4afa718db7226 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Tue, 17 Mar 2026 11:48:42 -0700 Subject: [PATCH 03/11] fix: exact-match sandbox name in readiness check Use column-based matching (split on whitespace, check cols[0]) instead of substring includes(). Prevents false positives when one sandbox name is a prefix of another (e.g. "my" matching "my-assistant"). Signed-off-by: Brian Taylor --- bin/lib/onboard.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 851803df78..ec764b5f5a 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -464,9 +464,13 @@ async function createSandbox(gpu) { for (let i = 0; i < 30; i++) { const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true }); const clean = list.replace(/\x1b\[[0-9;]*m/g, ""); - // Match "Ready" as a whole word — l.includes("Ready") would also match - // "NotReady", defeating the entire readiness gate. - if (clean.split("\n").some((l) => l.includes(sandboxName) && /\bReady\b/.test(l) && !/NotReady/.test(l))) { + // Exact-match sandbox name in the first column and "Ready" as a whole + // word. includes() would false-positive on substring names ("my" in + // "my-assistant") and on "NotReady" containing "Ready". + if (clean.split("\n").some((l) => { + const cols = l.trim().split(/\s+/); + return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady"); + })) { ready = true; break; } From 7a01e35d20e4217602b258fab645e8a0ebc9dae5 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Tue, 17 Mar 2026 13:23:19 -0700 Subject: [PATCH 04/11] test: add readiness gate parsing tests for sandbox creation Signed-off-by: Brian Taylor --- test/onboard-readiness.test.js | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 test/onboard-readiness.test.js diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js new file mode 100644 index 0000000000..a5d1c9b6c1 --- /dev/null +++ b/test/onboard-readiness.test.js @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const { describe, it } = require("node:test"); +const assert = require("node:assert/strict"); + +// Test the readiness parsing logic extracted for testability +// These validate the exact matching algorithm from onboard.js createSandbox() + +function isSandboxReady(output, sandboxName) { + const clean = output.replace(/\x1b\[[0-9;]*m/g, ""); + return clean.split("\n").some((l) => { + const cols = l.trim().split(/\s+/); + return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady"); + }); +} + +describe("sandbox readiness parsing", () => { + it("detects Ready sandbox", () => { + assert.ok(isSandboxReady("my-assistant Ready 2m ago", "my-assistant")); + }); + + it("rejects NotReady sandbox", () => { + assert.ok(!isSandboxReady("my-assistant NotReady init failed", "my-assistant")); + }); + + it("rejects empty output", () => { + assert.ok(!isSandboxReady("No sandboxes found.", "my-assistant")); + assert.ok(!isSandboxReady("", "my-assistant")); + }); + + it("strips ANSI escape codes before matching", () => { + assert.ok(isSandboxReady( + "\x1b[1mmy-assistant\x1b[0m \x1b[32mReady\x1b[0m 2m ago", + "my-assistant" + )); + }); + + it("rejects ANSI-wrapped NotReady", () => { + assert.ok(!isSandboxReady( + "\x1b[1mmy-assistant\x1b[0m \x1b[31mNotReady\x1b[0m crash", + "my-assistant" + )); + }); + + it("exact-matches sandbox name in first column", () => { + // "my" should NOT match "my-assistant" + assert.ok(!isSandboxReady("my-assistant Ready 2m ago", "my")); + }); + + it("does not match sandbox name in non-first column", () => { + assert.ok(!isSandboxReady("other-box Ready owned-by-my-assistant", "my-assistant")); + }); + + it("handles multiple sandboxes in output", () => { + const output = [ + "NAME STATUS AGE", + "dev-box NotReady 5m ago", + "my-assistant Ready 2m ago", + "staging Ready 10m ago", + ].join("\n"); + assert.ok(isSandboxReady(output, "my-assistant")); + assert.ok(!isSandboxReady(output, "dev-box")); // NotReady + assert.ok(isSandboxReady(output, "staging")); + assert.ok(!isSandboxReady(output, "prod")); // not present + }); + + it("handles Ready sandbox with extra status columns", () => { + assert.ok(isSandboxReady("my-assistant Ready Running 2m ago 1/1", "my-assistant")); + }); + + it("rejects when output only contains name in a URL or path", () => { + assert.ok(!isSandboxReady("Connecting to my-assistant.openshell.internal Ready", "my-assistant")); + // "my-assistant.openshell.internal" is cols[0], not "my-assistant" + }); + + it("handles tab-separated output", () => { + assert.ok(isSandboxReady("my-assistant\tReady\t2m ago", "my-assistant")); + }); +}); From 9f08cedbc9f2150a6adccaf43829008ede1c8ede Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Wed, 18 Mar 2026 13:55:41 -0700 Subject: [PATCH 05/11] fix: guard against truncated sandbox names on WSL (fixes #21) On WSL, hyphenated sandbox names like "my-assistant" can be truncated to "m" during shell argument parsing, causing "sandbox not found" failures when applying policy presets. - Add RFC 1123 validation in applyPreset() to catch truncated names early with a clear error message - Quote sandbox name in error output (was unquoted on line 356) - Add 6 WSL-specific regression tests covering hyphenated names, multi-hyphen names, truncation detection, and command quoting --- bin/lib/onboard.js | 2 +- bin/lib/policies.js | 9 ++++++ test/onboard-readiness.test.js | 56 ++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index ec764b5f5a..f77156f1d5 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -481,7 +481,7 @@ async function createSandbox(gpu) { console.error(""); console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); console.error(" Check: openshell sandbox list"); - console.error(` Logs: openshell sandbox logs ${sandboxName}`); + console.error(` Logs: openshell sandbox logs "${sandboxName}"`); process.exit(1); } diff --git a/bin/lib/policies.js b/bin/lib/policies.js index 7263b7d07f..e0c1ab910f 100644 --- a/bin/lib/policies.js +++ b/bin/lib/policies.js @@ -84,6 +84,15 @@ function buildPolicyGetCommand(sandboxName) { } function applyPreset(sandboxName, presetName) { + // Guard against truncated sandbox names (issue #21: WSL truncates hyphenated + // names during argument parsing, e.g. "my-assistant" → "m") + if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) { + throw new Error( + `Invalid or truncated sandbox name: '${sandboxName}'. ` + + `Names must be lowercase alphanumeric with optional hyphens.` + ); + } + const presetContent = loadPreset(presetName); if (!presetContent) { console.error(` Cannot load preset: ${presetName}`); diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js index a5d1c9b6c1..252c8dfd3f 100644 --- a/test/onboard-readiness.test.js +++ b/test/onboard-readiness.test.js @@ -4,6 +4,8 @@ const { describe, it } = require("node:test"); const assert = require("node:assert/strict"); +const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies"); + // Test the readiness parsing logic extracted for testability // These validate the exact matching algorithm from onboard.js createSandbox() @@ -78,3 +80,57 @@ describe("sandbox readiness parsing", () => { assert.ok(isSandboxReady("my-assistant\tReady\t2m ago", "my-assistant")); }); }); + +// Regression tests for issue #21: WSL truncates hyphenated sandbox names +// during shell argument parsing (e.g. "my-assistant" → "m"). +describe("WSL sandbox name handling (issue #21)", () => { + it("buildPolicySetCommand preserves hyphenated sandbox name", () => { + const cmd = buildPolicySetCommand("/tmp/policy.yaml", "my-assistant"); + assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`); + assert.ok(!cmd.includes(' my-assistant '), "Name must be quoted, not bare"); + }); + + it("buildPolicyGetCommand preserves hyphenated sandbox name", () => { + const cmd = buildPolicyGetCommand("my-assistant"); + assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`); + }); + + it("buildPolicySetCommand preserves multi-hyphen names", () => { + const cmd = buildPolicySetCommand("/tmp/p.yaml", "my-dev-assistant-v2"); + assert.ok(cmd.includes('"my-dev-assistant-v2"')); + }); + + it("buildPolicySetCommand preserves single-char name", () => { + // If WSL truncates "my-assistant" to "m", the single-char name should + // still be quoted and passed through unchanged + const cmd = buildPolicySetCommand("/tmp/p.yaml", "m"); + assert.ok(cmd.includes('"m"')); + }); + + it("applyPreset rejects truncated/invalid sandbox name", () => { + const policies = require("../bin/lib/policies"); + // Empty name + assert.throws( + () => policies.applyPreset("", "npm"), + /Invalid or truncated sandbox name/ + ); + // Name with uppercase (not valid per RFC 1123) + assert.throws( + () => policies.applyPreset("My-Assistant", "npm"), + /Invalid or truncated sandbox name/ + ); + // Name starting with hyphen + assert.throws( + () => policies.applyPreset("-broken", "npm"), + /Invalid or truncated sandbox name/ + ); + }); + + it("readiness check uses exact match preventing truncated name false-positive", () => { + // If "my-assistant" was truncated to "m", the readiness check should + // NOT match a sandbox named "my-assistant" when searching for "m" + assert.ok(!isSandboxReady("my-assistant Ready 2m ago", "m")); + assert.ok(!isSandboxReady("my-assistant Ready 2m ago", "my")); + assert.ok(!isSandboxReady("my-assistant Ready 2m ago", "my-")); + }); +}); From 73573722ca4f4a6ec0660fb68781c434a1b16c7a Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Wed, 18 Mar 2026 14:00:49 -0700 Subject: [PATCH 06/11] fix: clean up orphaned sandbox on readiness timeout When the sandbox is created but never reaches Ready within 60s, delete it before exiting so the next onboard retry with the same name doesn't fail on "sandbox already exists". --- bin/lib/onboard.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index f77156f1d5..b07059fa6b 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -478,10 +478,14 @@ async function createSandbox(gpu) { } if (!ready) { + // Clean up the orphaned sandbox so the next onboard retry with the same + // name doesn't fail on "sandbox already exists". + run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); console.error(""); console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); - console.error(" Check: openshell sandbox list"); + console.error(" The orphaned sandbox has been removed — you can safely retry."); console.error(` Logs: openshell sandbox logs "${sandboxName}"`); + console.error(" Retry: nemoclaw onboard"); process.exit(1); } From 8d4a23b12be0556c97c5e0b7988f6c9d628900c1 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Wed, 18 Mar 2026 14:05:37 -0700 Subject: [PATCH 07/11] chore: remove issue references from code comments --- bin/lib/policies.js | 4 ++-- test/onboard-readiness.test.js | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/lib/policies.js b/bin/lib/policies.js index e0c1ab910f..a059ff9651 100644 --- a/bin/lib/policies.js +++ b/bin/lib/policies.js @@ -84,8 +84,8 @@ function buildPolicyGetCommand(sandboxName) { } function applyPreset(sandboxName, presetName) { - // Guard against truncated sandbox names (issue #21: WSL truncates hyphenated - // names during argument parsing, e.g. "my-assistant" → "m") + // Guard against truncated sandbox names — WSL can truncate hyphenated + // names during argument parsing, e.g. "my-assistant" → "m" if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) { throw new Error( `Invalid or truncated sandbox name: '${sandboxName}'. ` + diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js index 252c8dfd3f..d5a06d436a 100644 --- a/test/onboard-readiness.test.js +++ b/test/onboard-readiness.test.js @@ -81,9 +81,9 @@ describe("sandbox readiness parsing", () => { }); }); -// Regression tests for issue #21: WSL truncates hyphenated sandbox names -// during shell argument parsing (e.g. "my-assistant" → "m"). -describe("WSL sandbox name handling (issue #21)", () => { +// Regression tests: WSL truncates hyphenated sandbox names during shell +// argument parsing (e.g. "my-assistant" → "m"). +describe("WSL sandbox name handling", () => { it("buildPolicySetCommand preserves hyphenated sandbox name", () => { const cmd = buildPolicySetCommand("/tmp/policy.yaml", "my-assistant"); assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`); From 6d617befaac3871f4e6a906ce1466fb8f1bf9ac2 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Wed, 18 Mar 2026 14:11:22 -0700 Subject: [PATCH 08/11] fix: enforce RFC 1123 63-char limit in sandbox name validation --- bin/lib/policies.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/lib/policies.js b/bin/lib/policies.js index a059ff9651..80034ee164 100644 --- a/bin/lib/policies.js +++ b/bin/lib/policies.js @@ -86,10 +86,11 @@ function buildPolicyGetCommand(sandboxName) { function applyPreset(sandboxName, presetName) { // Guard against truncated sandbox names — WSL can truncate hyphenated // names during argument parsing, e.g. "my-assistant" → "m" - if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) { + const isRfc1123Label = /^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName); + if (!sandboxName || sandboxName.length > 63 || !isRfc1123Label) { throw new Error( `Invalid or truncated sandbox name: '${sandboxName}'. ` + - `Names must be lowercase alphanumeric with optional hyphens.` + `Names must be 1-63 chars, lowercase alphanumeric, with optional internal hyphens.` ); } From 5d3b6d0d6f31faf6dc503a3607d74f4393a53734 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Wed, 18 Mar 2026 18:34:18 -0700 Subject: [PATCH 09/11] fix: extract isSandboxReady as shared function and branch cleanup messaging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract readiness parser from inline code to exported isSandboxReady() so tests validate the production code, not a duplicated copy - Branch cleanup messaging on delete result — report manual cleanup command if the orphan delete fails --- bin/lib/onboard.js | 33 +++++++++++++++++++++------------ test/onboard-readiness.test.js | 12 +----------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index b07059fa6b..793e3e8587 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -57,6 +57,18 @@ async function promptOrDefault(question, envVar, defaultValue) { // ── Helpers ────────────────────────────────────────────────────── +/** + * Check if a sandbox is in Ready state from `openshell sandbox list` output. + * Strips ANSI codes and exact-matches the sandbox name in the first column. + */ +function isSandboxReady(output, sandboxName) { + const clean = output.replace(/\x1b\[[0-9;]*m/g, ""); + return clean.split("\n").some((l) => { + const cols = l.trim().split(/\s+/); + return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady"); + }); +} + function step(n, total, msg) { console.log(""); console.log(` [${n}/${total}] ${msg}`); @@ -463,14 +475,7 @@ async function createSandbox(gpu) { let ready = false; for (let i = 0; i < 30; i++) { const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true }); - const clean = list.replace(/\x1b\[[0-9;]*m/g, ""); - // Exact-match sandbox name in the first column and "Ready" as a whole - // word. includes() would false-positive on substring names ("my" in - // "my-assistant") and on "NotReady" containing "Ready". - if (clean.split("\n").some((l) => { - const cols = l.trim().split(/\s+/); - return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady"); - })) { + if (isSandboxReady(list, sandboxName)) { ready = true; break; } @@ -480,11 +485,15 @@ async function createSandbox(gpu) { if (!ready) { // Clean up the orphaned sandbox so the next onboard retry with the same // name doesn't fail on "sandbox already exists". - run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); + const delResult = run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); console.error(""); console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); - console.error(" The orphaned sandbox has been removed — you can safely retry."); - console.error(` Logs: openshell sandbox logs "${sandboxName}"`); + if (delResult.status === 0) { + console.error(" The orphaned sandbox has been removed — you can safely retry."); + } else { + console.error(` Could not remove the orphaned sandbox. Manual cleanup:`); + console.error(` openshell sandbox delete "${sandboxName}"`); + } console.error(" Retry: nemoclaw onboard"); process.exit(1); } @@ -933,4 +942,4 @@ async function onboard(opts = {}) { printDashboard(sandboxName, model, provider); } -module.exports = { buildSandboxConfigSyncScript, onboard, setupNim }; +module.exports = { buildSandboxConfigSyncScript, isSandboxReady, onboard, setupNim }; diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js index d5a06d436a..fe20354c98 100644 --- a/test/onboard-readiness.test.js +++ b/test/onboard-readiness.test.js @@ -5,17 +5,7 @@ const { describe, it } = require("node:test"); const assert = require("node:assert/strict"); const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies"); - -// Test the readiness parsing logic extracted for testability -// These validate the exact matching algorithm from onboard.js createSandbox() - -function isSandboxReady(output, sandboxName) { - const clean = output.replace(/\x1b\[[0-9;]*m/g, ""); - return clean.split("\n").some((l) => { - const cols = l.trim().split(/\s+/); - return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady"); - }); -} +const { isSandboxReady } = require("../bin/lib/onboard"); describe("sandbox readiness parsing", () => { it("detects Ready sandbox", () => { From 6a1da48eb9b44e2ad0894bb5f0f737750ff3c762 Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Thu, 19 Mar 2026 01:51:06 -0700 Subject: [PATCH 10/11] fix: clean up stale gateway and port forward before preflight check A previous onboard session may leave the OpenShell gateway container and port forward running, causing port 8080/18789 conflicts on the next invocation. Detect a NemoClaw-owned gateway before the port availability check and tear it down automatically. Closes #397 --- bin/lib/onboard.js | 24 +++++++++++++++- test/onboard-readiness.test.js | 51 +++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 793e3e8587..060b5ba212 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -69,6 +69,16 @@ function isSandboxReady(output, sandboxName) { }); } +/** + * Determine whether stale NemoClaw gateway output indicates a previous + * session that should be cleaned up before the port preflight check. + * @param {string} gwInfoOutput - Raw output from `openshell gateway info -g nemoclaw`. + * @returns {boolean} + */ +function hasStaleGateway(gwInfoOutput) { + return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes("nemoclaw"); +} + function step(n, total, msg) { console.log(""); console.log(` [${n}/${total}] ${msg}`); @@ -288,6 +298,18 @@ async function preflight() { } console.log(` ✓ openshell CLI: ${runCapture("openshell --version 2>/dev/null || echo unknown", { ignoreError: true })}`); + // Clean up stale NemoClaw session before checking ports. + // A previous onboard run may have left the gateway container and port + // forward running. If a NemoClaw-owned gateway is still present, tear + // it down so the port check below doesn't fail on our own leftovers. + const gwInfo = runCapture("openshell gateway info -g nemoclaw 2>/dev/null", { ignoreError: true }); + if (hasStaleGateway(gwInfo)) { + console.log(" Cleaning up previous NemoClaw session..."); + run("openshell forward stop 18789 2>/dev/null || true", { ignoreError: true }); + run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); + console.log(" ✓ Previous session cleaned up"); + } + // Required ports — gateway (8080) and dashboard (18789) const requiredPorts = [ { port: 8080, label: "OpenShell gateway" }, @@ -942,4 +964,4 @@ async function onboard(opts = {}) { printDashboard(sandboxName, model, provider); } -module.exports = { buildSandboxConfigSyncScript, isSandboxReady, onboard, setupNim }; +module.exports = { buildSandboxConfigSyncScript, hasStaleGateway, isSandboxReady, onboard, setupNim }; diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js index fe20354c98..4eda74a570 100644 --- a/test/onboard-readiness.test.js +++ b/test/onboard-readiness.test.js @@ -5,7 +5,7 @@ const { describe, it } = require("node:test"); const assert = require("node:assert/strict"); const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies"); -const { isSandboxReady } = require("../bin/lib/onboard"); +const { hasStaleGateway, isSandboxReady } = require("../bin/lib/onboard"); describe("sandbox readiness parsing", () => { it("detects Ready sandbox", () => { @@ -124,3 +124,52 @@ describe("WSL sandbox name handling", () => { assert.ok(!isSandboxReady("my-assistant Ready 2m ago", "my-")); }); }); + +// Regression tests for issue #397: stale gateway detection before port checks. +// A previous onboard session may leave the gateway container and port forward +// running, causing port-conflict failures on the next onboard invocation. +describe("stale gateway detection", () => { + it("detects active nemoclaw gateway from real output", () => { + // Actual output from `openshell gateway info -g nemoclaw` (ANSI stripped) + const output = [ + "Gateway Info", + "", + " Gateway: nemoclaw", + " Gateway endpoint: https://127.0.0.1:8080", + ].join("\n"); + assert.ok(hasStaleGateway(output)); + }); + + it("detects gateway from ANSI-colored output", () => { + const output = + "\x1b[1m\x1b[36mGateway Info\x1b[39m\x1b[0m\n\n" + + " \x1b[2mGateway:\x1b[0m nemoclaw\n" + + " \x1b[2mGateway endpoint:\x1b[0m https://127.0.0.1:8080"; + assert.ok(hasStaleGateway(output)); + }); + + it("returns false for empty string (no gateway running)", () => { + assert.ok(!hasStaleGateway("")); + }); + + it("returns false for null/undefined", () => { + assert.ok(!hasStaleGateway(null)); + assert.ok(!hasStaleGateway(undefined)); + }); + + it("returns false for error output without gateway name", () => { + assert.ok(!hasStaleGateway("Error: no gateway found")); + assert.ok(!hasStaleGateway("connection refused")); + }); + + it("returns false for a different gateway name", () => { + // If someone ran a non-nemoclaw gateway, we should not touch it + const output = [ + "Gateway Info", + "", + " Gateway: my-other-gateway", + " Gateway endpoint: https://127.0.0.1:8080", + ].join("\n"); + assert.ok(!hasStaleGateway(output)); + }); +}); From c43c66a0386fb0522b1f5a37c4d38b6de00f40ca Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Thu, 19 Mar 2026 03:07:32 -0700 Subject: [PATCH 11/11] test: add double-onboard e2e test for stale state recovery Runs `nemoclaw onboard` three times without cleanup between runs, verifying that each subsequent onboard recovers automatically from stale gateway, port forward, and registry entries left behind. Regression test for #397. --- test/e2e/test-double-onboard.sh | 256 ++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100755 test/e2e/test-double-onboard.sh diff --git a/test/e2e/test-double-onboard.sh b/test/e2e/test-double-onboard.sh new file mode 100755 index 0000000000..7ebdddcd5a --- /dev/null +++ b/test/e2e/test-double-onboard.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# Double onboard: verify that consecutive `nemoclaw onboard` runs recover +# automatically from stale state (gateway, port forward, registry entries) +# left behind by a previous run. +# +# Regression test for issues #21, #22, #140, #152, #397. +# +# Key insight: running onboard without NVIDIA_API_KEY in non-interactive +# mode causes process.exit(1) at step 4, but steps 1-3 (preflight, +# gateway, sandbox) complete first — naturally simulating an unclean exit. +# +# Prerequisites: +# - Docker running +# - openshell CLI installed +# - nemoclaw CLI installed +# - NVIDIA_API_KEY must NOT be set +# +# Usage: +# unset NVIDIA_API_KEY +# bash test/e2e/test-double-onboard.sh + +set -uo pipefail + +PASS=0 +FAIL=0 +SKIP=0 +TOTAL=0 + +pass() { ((PASS++)); ((TOTAL++)); printf '\033[32m PASS: %s\033[0m\n' "$1"; } +fail() { ((FAIL++)); ((TOTAL++)); printf '\033[31m FAIL: %s\033[0m\n' "$1"; } +skip() { ((SKIP++)); ((TOTAL++)); printf '\033[33m SKIP: %s\033[0m\n' "$1"; } +section() { echo ""; printf '\033[1;36m=== %s ===\033[0m\n' "$1"; } +info() { printf '\033[1;34m [info]\033[0m %s\n' "$1"; } + +SANDBOX_A="e2e-double-a" +SANDBOX_B="e2e-double-b" +REGISTRY="$HOME/.nemoclaw/sandboxes.json" + +# ══════════════════════════════════════════════════════════════════ +# Phase 0: Pre-cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 0: Pre-cleanup" +info "Destroying any leftover test sandboxes/gateway from previous runs..." +# Use nemoclaw destroy (not just openshell sandbox delete) to also clean +# the nemoclaw registry at ~/.nemoclaw/sandboxes.json. Stale registry +# entries from a previous run would cause Phase 2 to exit with +# "Sandbox already exists" before the test even starts. +if command -v nemoclaw > /dev/null 2>&1; then + nemoclaw "$SANDBOX_A" destroy 2>/dev/null || true + nemoclaw "$SANDBOX_B" destroy 2>/dev/null || true +fi +openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true +openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true +pass "Pre-cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Phase 1: Prerequisites +# ══════════════════════════════════════════════════════════════════ +section "Phase 1: Prerequisites" + +if docker info > /dev/null 2>&1; then + pass "Docker is running" +else + fail "Docker is not running — cannot continue" + exit 1 +fi + +if command -v openshell > /dev/null 2>&1; then + pass "openshell CLI installed" +else + fail "openshell CLI not found — cannot continue" + exit 1 +fi + +if command -v nemoclaw > /dev/null 2>&1; then + pass "nemoclaw CLI installed" +else + fail "nemoclaw CLI not found — cannot continue" + exit 1 +fi + +if [ -n "${NVIDIA_API_KEY:-}" ]; then + fail "NVIDIA_API_KEY is set — this test requires it UNSET (unset NVIDIA_API_KEY)" + exit 1 +else + pass "NVIDIA_API_KEY is not set (required for controlled step-4 exit)" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 2: First onboard (e2e-double-a) — leaves stale state +# ══════════════════════════════════════════════════════════════════ +section "Phase 2: First onboard ($SANDBOX_A)" +info "Running nemoclaw onboard — expect exit 1 (no API key)..." + +# Write to temp file to avoid openshell FD inheritance blocking $() +ONBOARD_LOG="$(mktemp)" +NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \ + NEMOCLAW_POLICY_MODE=skip \ + nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1 +exit1=$? +output1="$(cat "$ONBOARD_LOG")" +rm -f "$ONBOARD_LOG" + +if [ $exit1 -eq 1 ]; then + pass "First onboard exited 1 (step 4 failed as expected)" +else + fail "First onboard exited $exit1 (expected 1)" +fi + +echo "$output1" | grep -q "Sandbox '${SANDBOX_A}' created" \ + && pass "Sandbox '$SANDBOX_A' created (step 3 completed)" \ + || fail "Sandbox creation not confirmed in output" + +# Verify stale state was left behind +openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw" \ + && pass "Gateway is still running (stale state)" \ + || fail "Gateway is not running after first onboard" + +openshell sandbox get "$SANDBOX_A" > /dev/null 2>&1 \ + && pass "Sandbox '$SANDBOX_A' exists in openshell" \ + || fail "Sandbox '$SANDBOX_A' not found in openshell" + +[ -f "$REGISTRY" ] && grep -q "$SANDBOX_A" "$REGISTRY" \ + && pass "Registry contains '$SANDBOX_A'" \ + || fail "Registry does not contain '$SANDBOX_A'" + +info "Stale state confirmed — NOT cleaning up before next onboard" + +# ══════════════════════════════════════════════════════════════════ +# Phase 3: Second onboard — SAME name (e2e-double-a) +# ══════════════════════════════════════════════════════════════════ +section "Phase 3: Second onboard ($SANDBOX_A — same name, stale state)" +info "Running nemoclaw onboard with NEMOCLAW_RECREATE_SANDBOX=1..." + +ONBOARD_LOG="$(mktemp)" +NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \ + NEMOCLAW_RECREATE_SANDBOX=1 \ + NEMOCLAW_POLICY_MODE=skip \ + nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1 +exit2=$? +output2="$(cat "$ONBOARD_LOG")" +rm -f "$ONBOARD_LOG" + +# Step 4 still fails (no API key), but steps 1-3 should succeed +if [ $exit2 -eq 1 ]; then + pass "Second onboard exited 1 (step 4 failed as expected)" +else + fail "Second onboard exited $exit2 (expected 1)" +fi + +echo "$output2" | grep -q "Cleaning up previous NemoClaw session" \ + && pass "Stale session cleanup fired on second onboard" \ + || fail "Stale session cleanup did NOT fire (regression: #397)" + +echo "$output2" | grep -q "Port 8080 is not available" \ + && fail "Port 8080 conflict detected (regression: #21)" \ + || pass "No port 8080 conflict" + +echo "$output2" | grep -q "Port 18789 is not available" \ + && fail "Port 18789 conflict detected" \ + || pass "No port 18789 conflict" + +echo "$output2" | grep -q "Sandbox '${SANDBOX_A}' created" \ + && pass "Sandbox '$SANDBOX_A' recreated" \ + || fail "Sandbox '$SANDBOX_A' was not recreated" + +openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw" \ + && pass "Gateway running after second onboard" \ + || fail "Gateway not running after second onboard" + +# ══════════════════════════════════════════════════════════════════ +# Phase 4: Third onboard — DIFFERENT name (e2e-double-b) +# ══════════════════════════════════════════════════════════════════ +section "Phase 4: Third onboard ($SANDBOX_B — different name, stale state)" +info "Running nemoclaw onboard with new sandbox name..." + +ONBOARD_LOG="$(mktemp)" +NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_B" \ + NEMOCLAW_POLICY_MODE=skip \ + nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1 +exit3=$? +output3="$(cat "$ONBOARD_LOG")" +rm -f "$ONBOARD_LOG" + +if [ $exit3 -eq 1 ]; then + pass "Third onboard exited 1 (step 4 failed as expected)" +else + fail "Third onboard exited $exit3 (expected 1)" +fi + +echo "$output3" | grep -q "Cleaning up previous NemoClaw session" \ + && pass "Stale session cleanup fired on third onboard" \ + || fail "Stale session cleanup did NOT fire on third onboard" + +echo "$output3" | grep -q "Port 8080 is not available" \ + && fail "Port 8080 conflict on third onboard (regression)" \ + || pass "No port 8080 conflict on third onboard" + +echo "$output3" | grep -q "Port 18789 is not available" \ + && fail "Port 18789 conflict on third onboard" \ + || pass "No port 18789 conflict on third onboard" + +echo "$output3" | grep -q "Sandbox '${SANDBOX_B}' created" \ + && pass "Sandbox '$SANDBOX_B' created" \ + || fail "Sandbox '$SANDBOX_B' was not created" + +# ══════════════════════════════════════════════════════════════════ +# Phase 5: Final cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 5: Final cleanup" + +nemoclaw "$SANDBOX_A" destroy 2>/dev/null || true +nemoclaw "$SANDBOX_B" destroy 2>/dev/null || true +openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true +openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true + +openshell sandbox get "$SANDBOX_A" > /dev/null 2>&1 \ + && fail "Sandbox '$SANDBOX_A' still exists after cleanup" \ + || pass "Sandbox '$SANDBOX_A' cleaned up" + +openshell sandbox get "$SANDBOX_B" > /dev/null 2>&1 \ + && fail "Sandbox '$SANDBOX_B' still exists after cleanup" \ + || pass "Sandbox '$SANDBOX_B' cleaned up" + +[ -f "$REGISTRY" ] && grep -q "$SANDBOX_A\|$SANDBOX_B" "$REGISTRY" \ + && fail "Registry still contains test sandbox entries" \ + || pass "Registry cleaned up" + +pass "Final cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Summary +# ══════════════════════════════════════════════════════════════════ +echo "" +echo "========================================" +echo " Double Onboard E2E Results:" +echo " Passed: $PASS" +echo " Failed: $FAIL" +echo " Skipped: $SKIP" +echo " Total: $TOTAL" +echo "========================================" + +if [ "$FAIL" -eq 0 ]; then + printf '\n\033[1;32m Double onboard PASSED — stale state recovery verified.\033[0m\n' + exit 0 +else + printf '\n\033[1;31m %d test(s) failed.\033[0m\n' "$FAIL" + exit 1 +fi