From 8c492710d54bf2efcd15184a698c14b6bed9bc63 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Tue, 17 Mar 2026 10:56:41 -0700
Subject: [PATCH 01/11] fix: gate sandbox registration on readiness and surface
 creation failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sandbox create command was piped through awk to deduplicate log
lines. In bash, the exit status of a pipeline is the status of the last
command (awk, always 0), so creation failures were silently swallowed.
NemoClaw then registered a phantom sandbox in ~/.nemoclaw/sandboxes.json
that caused "sandbox not found" on every subsequent connect/status call.

This is the root cause of the WSL2 + Docker Desktop failures reported
in #140 and #152 — sandbox creation fails due to Docker networking
issues, but onboarding completes as if it succeeded.

Three changes:
1. Remove the awk pipe so the real exit code flows through to run()
2. Poll openshell sandbox list for Ready state before registering
   (matches the gateway health check pattern at lines 121-132)
3. Move build-context cleanup before the exit-code check so temp
   files are always cleaned up, even on failure

Signed-off-by: Brian Taylor <brian.taylor818@gmail.com>
---
 bin/lib/onboard.js | 53 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 7 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 23f19b01a8..ce1559ec4b 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -434,9 +434,51 @@ async function createSandbox(gpu) {
   if (process.env.NVIDIA_API_KEY) {
     envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`);
   }
-  // set -o pipefail ensures the openshell exit code propagates through the awk pipe.
-  // Without it, awk's exit code (always 0) would mask a failed sandbox create.
-  run(`set -o pipefail; openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1 | awk '/Sandbox allocated/{if(!seen){print;seen=1}next}1'`);
+
+  // Run without piping through awk — the pipe masked non-zero exit codes
+  // from openshell because bash returns the status of the last pipeline
+  // command (awk, always 0) unless pipefail is set. Removing the pipe
+  // lets the real exit code flow through to run().
+  const createResult = run(
+    `openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1`,
+    { ignoreError: true }
+  );
+
+  // Clean up build context regardless of outcome
+  run(`rm -rf "${buildCtx}"`, { ignoreError: true });
+
+  if (createResult.status !== 0) {
+    console.error("");
+    console.error(`  Sandbox creation failed (exit ${createResult.status}).`);
+    console.error("  Try:  openshell sandbox list        # check gateway state");
+    console.error("  Try:  nemoclaw onboard              # retry from scratch");
+    process.exit(createResult.status || 1);
+  }
+
+  // Wait for sandbox to reach Ready state in k3s before registering.
+  // On WSL2 + Docker Desktop the pod can take longer to initialize;
+  // without this gate, NemoClaw registers a phantom sandbox that
+  // causes "sandbox not found" on every subsequent connect/status call.
+  console.log("  Waiting for sandbox to become ready...");
+  let ready = false;
+  for (let i = 0; i < 30; i++) {
+    const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true });
+    const clean = list.replace(/\x1b\[[0-9;]*m/g, "");
+    if (clean.split("\n").some((l) => l.includes(sandboxName) && l.includes("Ready"))) {
+      ready = true;
+      break;
+    }
+    if (i === 29) break;
+    require("child_process").spawnSync("sleep", ["2"]);
+  }
+
+  if (!ready) {
+    console.error("");
+    console.error(`  Sandbox '${sandboxName}' was created but did not become ready within 60s.`);
+    console.error("  Check: openshell sandbox list");
+    console.error(`  Logs:  openshell sandbox logs ${sandboxName}`);
+    process.exit(1);
+  }
 
   // Release any stale forward on port 18789 before claiming it for the new sandbox.
   // A previous onboard run may have left the port forwarded to a different sandbox,
@@ -445,10 +487,7 @@ async function createSandbox(gpu) {
   // Forward dashboard port to the new sandbox
   run(`openshell forward start --background 18789 "${sandboxName}"`, { ignoreError: true });
 
-  // Clean up build context
-  run(`rm -rf "${buildCtx}"`, { ignoreError: true });
-
-  // Register in registry
+  // Register only after confirmed ready — prevents phantom entries
   registry.registerSandbox({
     name: sandboxName,
     gpuEnabled: !!gpu,

From 4fd2c6eb934e24aecd70b81f47b2bfeae2dec2de Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Tue, 17 Mar 2026 11:21:15 -0700
Subject: [PATCH 02/11] fix: use word-boundary match for Ready status and fix
 timeout

includes("Ready") falsely matched "NotReady" because "Ready" is a
substring. Use a word-boundary regex with a NotReady exclusion so
sandboxes stuck in error states are not registered as healthy.

Also remove the off-by-one break at i=29 so the loop sleeps the
full 60s before timing out.

Signed-off-by: Brian Taylor <brian.taylor818@gmail.com>
---
 bin/lib/onboard.js | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index ce1559ec4b..851803df78 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -464,11 +464,12 @@ async function createSandbox(gpu) {
   for (let i = 0; i < 30; i++) {
     const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true });
     const clean = list.replace(/\x1b\[[0-9;]*m/g, "");
-    if (clean.split("\n").some((l) => l.includes(sandboxName) && l.includes("Ready"))) {
+    // Match "Ready" as a whole word — l.includes("Ready") would also match
+    // "NotReady", defeating the entire readiness gate.
+    if (clean.split("\n").some((l) => l.includes(sandboxName) && /\bReady\b/.test(l) && !/NotReady/.test(l))) {
       ready = true;
       break;
     }
-    if (i === 29) break;
     require("child_process").spawnSync("sleep", ["2"]);
   }
 

From 68b8e5e07ed576bc7b4efad3bca4afa718db7226 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Tue, 17 Mar 2026 11:48:42 -0700
Subject: [PATCH 03/11] fix: exact-match sandbox name in readiness check

Use column-based matching (split on whitespace, check cols[0])
instead of substring includes(). Prevents false positives when
one sandbox name is a prefix of another (e.g. "my" matching
"my-assistant").

Signed-off-by: Brian Taylor <brian.taylor818@gmail.com>
---
 bin/lib/onboard.js | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 851803df78..ec764b5f5a 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -464,9 +464,13 @@ async function createSandbox(gpu) {
   for (let i = 0; i < 30; i++) {
     const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true });
     const clean = list.replace(/\x1b\[[0-9;]*m/g, "");
-    // Match "Ready" as a whole word — l.includes("Ready") would also match
-    // "NotReady", defeating the entire readiness gate.
-    if (clean.split("\n").some((l) => l.includes(sandboxName) && /\bReady\b/.test(l) && !/NotReady/.test(l))) {
+    // Exact-match sandbox name in the first column and "Ready" as a whole
+    // word. includes() would false-positive on substring names ("my" in
+    // "my-assistant") and on "NotReady" containing "Ready".
+    if (clean.split("\n").some((l) => {
+      const cols = l.trim().split(/\s+/);
+      return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
+    })) {
       ready = true;
       break;
     }

From 7a01e35d20e4217602b258fab645e8a0ebc9dae5 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Tue, 17 Mar 2026 13:23:19 -0700
Subject: [PATCH 04/11] test: add readiness gate parsing tests for sandbox
 creation

Signed-off-by: Brian Taylor <brian.taylor818@gmail.com>
---
 test/onboard-readiness.test.js | 80 ++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 test/onboard-readiness.test.js

diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js
new file mode 100644
index 0000000000..a5d1c9b6c1
--- /dev/null
+++ b/test/onboard-readiness.test.js
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+
+// Test the readiness parsing logic extracted for testability
+// These validate the exact matching algorithm from onboard.js createSandbox()
+
+function isSandboxReady(output, sandboxName) {
+  const clean = output.replace(/\x1b\[[0-9;]*m/g, "");
+  return clean.split("\n").some((l) => {
+    const cols = l.trim().split(/\s+/);
+    return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
+  });
+}
+
+describe("sandbox readiness parsing", () => {
+  it("detects Ready sandbox", () => {
+    assert.ok(isSandboxReady("my-assistant   Ready   2m ago", "my-assistant"));
+  });
+
+  it("rejects NotReady sandbox", () => {
+    assert.ok(!isSandboxReady("my-assistant   NotReady   init failed", "my-assistant"));
+  });
+
+  it("rejects empty output", () => {
+    assert.ok(!isSandboxReady("No sandboxes found.", "my-assistant"));
+    assert.ok(!isSandboxReady("", "my-assistant"));
+  });
+
+  it("strips ANSI escape codes before matching", () => {
+    assert.ok(isSandboxReady(
+      "\x1b[1mmy-assistant\x1b[0m   \x1b[32mReady\x1b[0m   2m ago",
+      "my-assistant"
+    ));
+  });
+
+  it("rejects ANSI-wrapped NotReady", () => {
+    assert.ok(!isSandboxReady(
+      "\x1b[1mmy-assistant\x1b[0m   \x1b[31mNotReady\x1b[0m   crash",
+      "my-assistant"
+    ));
+  });
+
+  it("exact-matches sandbox name in first column", () => {
+    // "my" should NOT match "my-assistant"
+    assert.ok(!isSandboxReady("my-assistant   Ready   2m ago", "my"));
+  });
+
+  it("does not match sandbox name in non-first column", () => {
+    assert.ok(!isSandboxReady("other-box   Ready   owned-by-my-assistant", "my-assistant"));
+  });
+
+  it("handles multiple sandboxes in output", () => {
+    const output = [
+      "NAME           STATUS     AGE",
+      "dev-box        NotReady   5m ago",
+      "my-assistant   Ready      2m ago",
+      "staging        Ready      10m ago",
+    ].join("\n");
+    assert.ok(isSandboxReady(output, "my-assistant"));
+    assert.ok(!isSandboxReady(output, "dev-box")); // NotReady
+    assert.ok(isSandboxReady(output, "staging"));
+    assert.ok(!isSandboxReady(output, "prod")); // not present
+  });
+
+  it("handles Ready sandbox with extra status columns", () => {
+    assert.ok(isSandboxReady("my-assistant   Ready   Running   2m ago   1/1", "my-assistant"));
+  });
+
+  it("rejects when output only contains name in a URL or path", () => {
+    assert.ok(!isSandboxReady("Connecting to my-assistant.openshell.internal Ready", "my-assistant"));
+    // "my-assistant.openshell.internal" is cols[0], not "my-assistant"
+  });
+
+  it("handles tab-separated output", () => {
+    assert.ok(isSandboxReady("my-assistant\tReady\t2m ago", "my-assistant"));
+  });
+});

From 9f08cedbc9f2150a6adccaf43829008ede1c8ede Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Wed, 18 Mar 2026 13:55:41 -0700
Subject: [PATCH 05/11] fix: guard against truncated sandbox names on WSL
 (fixes #21)

On WSL, hyphenated sandbox names like "my-assistant" can be truncated
to "m" during shell argument parsing, causing "sandbox not found"
failures when applying policy presets.

- Add RFC 1123 validation in applyPreset() to catch truncated names
  early with a clear error message
- Quote sandbox name in error output (was unquoted on line 356)
- Add 6 WSL-specific regression tests covering hyphenated names,
  multi-hyphen names, truncation detection, and command quoting
---
 bin/lib/onboard.js             |  2 +-
 bin/lib/policies.js            |  9 ++++++
 test/onboard-readiness.test.js | 56 ++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index ec764b5f5a..f77156f1d5 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -481,7 +481,7 @@ async function createSandbox(gpu) {
     console.error("");
     console.error(`  Sandbox '${sandboxName}' was created but did not become ready within 60s.`);
     console.error("  Check: openshell sandbox list");
-    console.error(`  Logs:  openshell sandbox logs ${sandboxName}`);
+    console.error(`  Logs:  openshell sandbox logs "${sandboxName}"`);
     process.exit(1);
   }
 
diff --git a/bin/lib/policies.js b/bin/lib/policies.js
index 7263b7d07f..e0c1ab910f 100644
--- a/bin/lib/policies.js
+++ b/bin/lib/policies.js
@@ -84,6 +84,15 @@ function buildPolicyGetCommand(sandboxName) {
 }
 
 function applyPreset(sandboxName, presetName) {
+  // Guard against truncated sandbox names (issue #21: WSL truncates hyphenated
+  // names during argument parsing, e.g. "my-assistant" → "m")
+  if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) {
+    throw new Error(
+      `Invalid or truncated sandbox name: '${sandboxName}'. ` +
+      `Names must be lowercase alphanumeric with optional hyphens.`
+    );
+  }
+
   const presetContent = loadPreset(presetName);
   if (!presetContent) {
     console.error(`  Cannot load preset: ${presetName}`);
diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js
index a5d1c9b6c1..252c8dfd3f 100644
--- a/test/onboard-readiness.test.js
+++ b/test/onboard-readiness.test.js
@@ -4,6 +4,8 @@
 const { describe, it } = require("node:test");
 const assert = require("node:assert/strict");
 
+const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies");
+
 // Test the readiness parsing logic extracted for testability
 // These validate the exact matching algorithm from onboard.js createSandbox()
 
@@ -78,3 +80,57 @@ describe("sandbox readiness parsing", () => {
     assert.ok(isSandboxReady("my-assistant\tReady\t2m ago", "my-assistant"));
   });
 });
+
+// Regression tests for issue #21: WSL truncates hyphenated sandbox names
+// during shell argument parsing (e.g. "my-assistant" → "m").
+describe("WSL sandbox name handling (issue #21)", () => {
+  it("buildPolicySetCommand preserves hyphenated sandbox name", () => {
+    const cmd = buildPolicySetCommand("/tmp/policy.yaml", "my-assistant");
+    assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`);
+    assert.ok(!cmd.includes(' my-assistant '), "Name must be quoted, not bare");
+  });
+
+  it("buildPolicyGetCommand preserves hyphenated sandbox name", () => {
+    const cmd = buildPolicyGetCommand("my-assistant");
+    assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`);
+  });
+
+  it("buildPolicySetCommand preserves multi-hyphen names", () => {
+    const cmd = buildPolicySetCommand("/tmp/p.yaml", "my-dev-assistant-v2");
+    assert.ok(cmd.includes('"my-dev-assistant-v2"'));
+  });
+
+  it("buildPolicySetCommand preserves single-char name", () => {
+    // If WSL truncates "my-assistant" to "m", the single-char name should
+    // still be quoted and passed through unchanged
+    const cmd = buildPolicySetCommand("/tmp/p.yaml", "m");
+    assert.ok(cmd.includes('"m"'));
+  });
+
+  it("applyPreset rejects truncated/invalid sandbox name", () => {
+    const policies = require("../bin/lib/policies");
+    // Empty name
+    assert.throws(
+      () => policies.applyPreset("", "npm"),
+      /Invalid or truncated sandbox name/
+    );
+    // Name with uppercase (not valid per RFC 1123)
+    assert.throws(
+      () => policies.applyPreset("My-Assistant", "npm"),
+      /Invalid or truncated sandbox name/
+    );
+    // Name starting with hyphen
+    assert.throws(
+      () => policies.applyPreset("-broken", "npm"),
+      /Invalid or truncated sandbox name/
+    );
+  });
+
+  it("readiness check uses exact match preventing truncated name false-positive", () => {
+    // If "my-assistant" was truncated to "m", the readiness check should
+    // NOT match a sandbox named "my-assistant" when searching for "m"
+    assert.ok(!isSandboxReady("my-assistant   Ready   2m ago", "m"));
+    assert.ok(!isSandboxReady("my-assistant   Ready   2m ago", "my"));
+    assert.ok(!isSandboxReady("my-assistant   Ready   2m ago", "my-"));
+  });
+});

From 73573722ca4f4a6ec0660fb68781c434a1b16c7a Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Wed, 18 Mar 2026 14:00:49 -0700
Subject: [PATCH 06/11] fix: clean up orphaned sandbox on readiness timeout

When the sandbox is created but never reaches Ready within 60s,
delete it before exiting so the next onboard retry with the same
name doesn't fail on "sandbox already exists".
---
 bin/lib/onboard.js | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index f77156f1d5..b07059fa6b 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -478,10 +478,14 @@ async function createSandbox(gpu) {
   }
 
   if (!ready) {
+    // Clean up the orphaned sandbox so the next onboard retry with the same
+    // name doesn't fail on "sandbox already exists".
+    run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true });
     console.error("");
     console.error(`  Sandbox '${sandboxName}' was created but did not become ready within 60s.`);
-    console.error("  Check: openshell sandbox list");
+    console.error("  The orphaned sandbox has been removed — you can safely retry.");
     console.error(`  Logs:  openshell sandbox logs "${sandboxName}"`);
+    console.error("  Retry: nemoclaw onboard");
     process.exit(1);
   }
 

From 8d4a23b12be0556c97c5e0b7988f6c9d628900c1 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Wed, 18 Mar 2026 14:05:37 -0700
Subject: [PATCH 07/11] chore: remove issue references from code comments

---
 bin/lib/policies.js            | 4 ++--
 test/onboard-readiness.test.js | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bin/lib/policies.js b/bin/lib/policies.js
index e0c1ab910f..a059ff9651 100644
--- a/bin/lib/policies.js
+++ b/bin/lib/policies.js
@@ -84,8 +84,8 @@ function buildPolicyGetCommand(sandboxName) {
 }
 
 function applyPreset(sandboxName, presetName) {
-  // Guard against truncated sandbox names (issue #21: WSL truncates hyphenated
-  // names during argument parsing, e.g. "my-assistant" → "m")
+  // Guard against truncated sandbox names — WSL can truncate hyphenated
+  // names during argument parsing, e.g. "my-assistant" → "m"
   if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) {
     throw new Error(
       `Invalid or truncated sandbox name: '${sandboxName}'. ` +
diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js
index 252c8dfd3f..d5a06d436a 100644
--- a/test/onboard-readiness.test.js
+++ b/test/onboard-readiness.test.js
@@ -81,9 +81,9 @@ describe("sandbox readiness parsing", () => {
   });
 });
 
-// Regression tests for issue #21: WSL truncates hyphenated sandbox names
-// during shell argument parsing (e.g. "my-assistant" → "m").
-describe("WSL sandbox name handling (issue #21)", () => {
+// Regression tests: WSL truncates hyphenated sandbox names during shell
+// argument parsing (e.g. "my-assistant" → "m").
+describe("WSL sandbox name handling", () => {
   it("buildPolicySetCommand preserves hyphenated sandbox name", () => {
     const cmd = buildPolicySetCommand("/tmp/policy.yaml", "my-assistant");
     assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`);

From 6d617befaac3871f4e6a906ce1466fb8f1bf9ac2 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Wed, 18 Mar 2026 14:11:22 -0700
Subject: [PATCH 08/11] fix: enforce RFC 1123 63-char limit in sandbox name
 validation

---
 bin/lib/policies.js | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bin/lib/policies.js b/bin/lib/policies.js
index a059ff9651..80034ee164 100644
--- a/bin/lib/policies.js
+++ b/bin/lib/policies.js
@@ -86,10 +86,11 @@ function buildPolicyGetCommand(sandboxName) {
 function applyPreset(sandboxName, presetName) {
   // Guard against truncated sandbox names — WSL can truncate hyphenated
   // names during argument parsing, e.g. "my-assistant" → "m"
-  if (!sandboxName || !/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) {
+  const isRfc1123Label = /^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName);
+  if (!sandboxName || sandboxName.length > 63 || !isRfc1123Label) {
     throw new Error(
       `Invalid or truncated sandbox name: '${sandboxName}'. ` +
-      `Names must be lowercase alphanumeric with optional hyphens.`
+      `Names must be 1-63 chars, lowercase alphanumeric, with optional internal hyphens.`
     );
   }
 

From 5d3b6d0d6f31faf6dc503a3607d74f4393a53734 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Wed, 18 Mar 2026 18:34:18 -0700
Subject: [PATCH 09/11] fix: extract isSandboxReady as shared function and
 branch cleanup messaging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Extract readiness parser from inline code to exported isSandboxReady()
  so tests validate the production code, not a duplicated copy
- Branch cleanup messaging on delete result — report manual cleanup
  command if the orphan delete fails
---
 bin/lib/onboard.js             | 33 +++++++++++++++++++++------------
 test/onboard-readiness.test.js | 12 +-----------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index b07059fa6b..793e3e8587 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -57,6 +57,18 @@ async function promptOrDefault(question, envVar, defaultValue) {
 
 // ── Helpers ──────────────────────────────────────────────────────
 
+/**
+ * Check if a sandbox is in Ready state from `openshell sandbox list` output.
+ * Strips ANSI codes and exact-matches the sandbox name in the first column.
+ */
+function isSandboxReady(output, sandboxName) {
+  const clean = output.replace(/\x1b\[[0-9;]*m/g, "");
+  return clean.split("\n").some((l) => {
+    const cols = l.trim().split(/\s+/);
+    return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
+  });
+}
+
 function step(n, total, msg) {
   console.log("");
   console.log(`  [${n}/${total}] ${msg}`);
@@ -463,14 +475,7 @@ async function createSandbox(gpu) {
   let ready = false;
   for (let i = 0; i < 30; i++) {
     const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true });
-    const clean = list.replace(/\x1b\[[0-9;]*m/g, "");
-    // Exact-match sandbox name in the first column and "Ready" as a whole
-    // word. includes() would false-positive on substring names ("my" in
-    // "my-assistant") and on "NotReady" containing "Ready".
-    if (clean.split("\n").some((l) => {
-      const cols = l.trim().split(/\s+/);
-      return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
-    })) {
+    if (isSandboxReady(list, sandboxName)) {
       ready = true;
       break;
     }
@@ -480,11 +485,15 @@ async function createSandbox(gpu) {
   if (!ready) {
     // Clean up the orphaned sandbox so the next onboard retry with the same
     // name doesn't fail on "sandbox already exists".
-    run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true });
+    const delResult = run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true });
     console.error("");
     console.error(`  Sandbox '${sandboxName}' was created but did not become ready within 60s.`);
-    console.error("  The orphaned sandbox has been removed — you can safely retry.");
-    console.error(`  Logs:  openshell sandbox logs "${sandboxName}"`);
+    if (delResult.status === 0) {
+      console.error("  The orphaned sandbox has been removed — you can safely retry.");
+    } else {
+      console.error(`  Could not remove the orphaned sandbox. Manual cleanup:`);
+      console.error(`    openshell sandbox delete "${sandboxName}"`);
+    }
     console.error("  Retry: nemoclaw onboard");
     process.exit(1);
   }
@@ -933,4 +942,4 @@ async function onboard(opts = {}) {
   printDashboard(sandboxName, model, provider);
 }
 
-module.exports = { buildSandboxConfigSyncScript, onboard, setupNim };
+module.exports = { buildSandboxConfigSyncScript, isSandboxReady, onboard, setupNim };
diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js
index d5a06d436a..fe20354c98 100644
--- a/test/onboard-readiness.test.js
+++ b/test/onboard-readiness.test.js
@@ -5,17 +5,7 @@ const { describe, it } = require("node:test");
 const assert = require("node:assert/strict");
 
 const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies");
-
-// Test the readiness parsing logic extracted for testability
-// These validate the exact matching algorithm from onboard.js createSandbox()
-
-function isSandboxReady(output, sandboxName) {
-  const clean = output.replace(/\x1b\[[0-9;]*m/g, "");
-  return clean.split("\n").some((l) => {
-    const cols = l.trim().split(/\s+/);
-    return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
-  });
-}
+const { isSandboxReady } = require("../bin/lib/onboard");
 
 describe("sandbox readiness parsing", () => {
   it("detects Ready sandbox", () => {

From 6a1da48eb9b44e2ad0894bb5f0f737750ff3c762 Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Thu, 19 Mar 2026 01:51:06 -0700
Subject: [PATCH 10/11] fix: clean up stale gateway and port forward before
 preflight check

A previous onboard session may leave the OpenShell gateway container
and port forward running, causing port 8080/18789 conflicts on the
next invocation. Detect a NemoClaw-owned gateway before the port
availability check and tear it down automatically.

Closes #397
---
 bin/lib/onboard.js             | 24 +++++++++++++++-
 test/onboard-readiness.test.js | 51 +++++++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 793e3e8587..060b5ba212 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -69,6 +69,16 @@ function isSandboxReady(output, sandboxName) {
   });
 }
 
+/**
+ * Determine whether stale NemoClaw gateway output indicates a previous
+ * session that should be cleaned up before the port preflight check.
+ * @param {string} gwInfoOutput - Raw output from `openshell gateway info -g nemoclaw`.
+ * @returns {boolean}
+ */
+function hasStaleGateway(gwInfoOutput) {
+  return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes("nemoclaw");
+}
+
 function step(n, total, msg) {
   console.log("");
   console.log(`  [${n}/${total}] ${msg}`);
@@ -288,6 +298,18 @@ async function preflight() {
   }
   console.log(`  ✓ openshell CLI: ${runCapture("openshell --version 2>/dev/null || echo unknown", { ignoreError: true })}`);
 
+  // Clean up stale NemoClaw session before checking ports.
+  // A previous onboard run may have left the gateway container and port
+  // forward running.  If a NemoClaw-owned gateway is still present, tear
+  // it down so the port check below doesn't fail on our own leftovers.
+  const gwInfo = runCapture("openshell gateway info -g nemoclaw 2>/dev/null", { ignoreError: true });
+  if (hasStaleGateway(gwInfo)) {
+    console.log("  Cleaning up previous NemoClaw session...");
+    run("openshell forward stop 18789 2>/dev/null || true", { ignoreError: true });
+    run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true });
+    console.log("  ✓ Previous session cleaned up");
+  }
+
   // Required ports — gateway (8080) and dashboard (18789)
   const requiredPorts = [
     { port: 8080, label: "OpenShell gateway" },
@@ -942,4 +964,4 @@ async function onboard(opts = {}) {
   printDashboard(sandboxName, model, provider);
 }
 
-module.exports = { buildSandboxConfigSyncScript, isSandboxReady, onboard, setupNim };
+module.exports = { buildSandboxConfigSyncScript, hasStaleGateway, isSandboxReady, onboard, setupNim };
diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js
index fe20354c98..4eda74a570 100644
--- a/test/onboard-readiness.test.js
+++ b/test/onboard-readiness.test.js
@@ -5,7 +5,7 @@ const { describe, it } = require("node:test");
 const assert = require("node:assert/strict");
 
 const { buildPolicySetCommand, buildPolicyGetCommand } = require("../bin/lib/policies");
-const { isSandboxReady } = require("../bin/lib/onboard");
+const { hasStaleGateway, isSandboxReady } = require("../bin/lib/onboard");
 
 describe("sandbox readiness parsing", () => {
   it("detects Ready sandbox", () => {
@@ -124,3 +124,52 @@ describe("WSL sandbox name handling", () => {
     assert.ok(!isSandboxReady("my-assistant   Ready   2m ago", "my-"));
   });
 });
+
+// Regression tests for issue #397: stale gateway detection before port checks.
+// A previous onboard session may leave the gateway container and port forward
+// running, causing port-conflict failures on the next onboard invocation.
+describe("stale gateway detection", () => {
+  it("detects active nemoclaw gateway from real output", () => {
+    // Actual output from `openshell gateway info -g nemoclaw` (ANSI stripped)
+    const output = [
+      "Gateway Info",
+      "",
+      "  Gateway: nemoclaw",
+      "  Gateway endpoint: https://127.0.0.1:8080",
+    ].join("\n");
+    assert.ok(hasStaleGateway(output));
+  });
+
+  it("detects gateway from ANSI-colored output", () => {
+    const output =
+      "\x1b[1m\x1b[36mGateway Info\x1b[39m\x1b[0m\n\n" +
+      "  \x1b[2mGateway:\x1b[0m nemoclaw\n" +
+      "  \x1b[2mGateway endpoint:\x1b[0m https://127.0.0.1:8080";
+    assert.ok(hasStaleGateway(output));
+  });
+
+  it("returns false for empty string (no gateway running)", () => {
+    assert.ok(!hasStaleGateway(""));
+  });
+
+  it("returns false for null/undefined", () => {
+    assert.ok(!hasStaleGateway(null));
+    assert.ok(!hasStaleGateway(undefined));
+  });
+
+  it("returns false for error output without gateway name", () => {
+    assert.ok(!hasStaleGateway("Error: no gateway found"));
+    assert.ok(!hasStaleGateway("connection refused"));
+  });
+
+  it("returns false for a different gateway name", () => {
+    // If someone ran a non-nemoclaw gateway, we should not touch it
+    const output = [
+      "Gateway Info",
+      "",
+      "  Gateway: my-other-gateway",
+      "  Gateway endpoint: https://127.0.0.1:8080",
+    ].join("\n");
+    assert.ok(!hasStaleGateway(output));
+  });
+});

From c43c66a0386fb0522b1f5a37c4d38b6de00f40ca Mon Sep 17 00:00:00 2001
From: Brian Taylor <brian.taylor818@gmail.com>
Date: Thu, 19 Mar 2026 03:07:32 -0700
Subject: [PATCH 11/11] test: add double-onboard e2e test for stale state
 recovery

Runs `nemoclaw onboard` three times without cleanup between runs,
verifying that each subsequent onboard recovers automatically from
stale gateway, port forward, and registry entries left behind.

Regression test for #397.
---
 test/e2e/test-double-onboard.sh | 256 ++++++++++++++++++++++++++++++++
 1 file changed, 256 insertions(+)
 create mode 100755 test/e2e/test-double-onboard.sh

diff --git a/test/e2e/test-double-onboard.sh b/test/e2e/test-double-onboard.sh
new file mode 100755
index 0000000000..7ebdddcd5a
--- /dev/null
+++ b/test/e2e/test-double-onboard.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# Double onboard: verify that consecutive `nemoclaw onboard` runs recover
+# automatically from stale state (gateway, port forward, registry entries)
+# left behind by a previous run.
+#
+# Regression test for issues #21, #22, #140, #152, #397.
+#
+# Key insight: running onboard without NVIDIA_API_KEY in non-interactive
+# mode causes process.exit(1) at step 4, but steps 1-3 (preflight,
+# gateway, sandbox) complete first — naturally simulating an unclean exit.
+#
+# Prerequisites:
+#   - Docker running
+#   - openshell CLI installed
+#   - nemoclaw CLI installed
+#   - NVIDIA_API_KEY must NOT be set
+#
+# Usage:
+#   unset NVIDIA_API_KEY
+#   bash test/e2e/test-double-onboard.sh
+
+set -uo pipefail
+
+PASS=0
+FAIL=0
+SKIP=0
+TOTAL=0
+
+pass() { ((PASS++)); ((TOTAL++)); printf '\033[32m  PASS: %s\033[0m\n' "$1"; }
+fail() { ((FAIL++)); ((TOTAL++)); printf '\033[31m  FAIL: %s\033[0m\n' "$1"; }
+skip() { ((SKIP++)); ((TOTAL++)); printf '\033[33m  SKIP: %s\033[0m\n' "$1"; }
+section() { echo ""; printf '\033[1;36m=== %s ===\033[0m\n' "$1"; }
+info()  { printf '\033[1;34m  [info]\033[0m %s\n' "$1"; }
+
+SANDBOX_A="e2e-double-a"
+SANDBOX_B="e2e-double-b"
+REGISTRY="$HOME/.nemoclaw/sandboxes.json"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 0: Pre-cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 0: Pre-cleanup"
+info "Destroying any leftover test sandboxes/gateway from previous runs..."
+# Use nemoclaw destroy (not just openshell sandbox delete) to also clean
+# the nemoclaw registry at ~/.nemoclaw/sandboxes.json.  Stale registry
+# entries from a previous run would cause Phase 2 to exit with
+# "Sandbox already exists" before the test even starts.
+if command -v nemoclaw > /dev/null 2>&1; then
+  nemoclaw "$SANDBOX_A" destroy 2>/dev/null || true
+  nemoclaw "$SANDBOX_B" destroy 2>/dev/null || true
+fi
+openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+pass "Pre-cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 1: Prerequisites
+# ══════════════════════════════════════════════════════════════════
+section "Phase 1: Prerequisites"
+
+if docker info > /dev/null 2>&1; then
+  pass "Docker is running"
+else
+  fail "Docker is not running — cannot continue"
+  exit 1
+fi
+
+if command -v openshell > /dev/null 2>&1; then
+  pass "openshell CLI installed"
+else
+  fail "openshell CLI not found — cannot continue"
+  exit 1
+fi
+
+if command -v nemoclaw > /dev/null 2>&1; then
+  pass "nemoclaw CLI installed"
+else
+  fail "nemoclaw CLI not found — cannot continue"
+  exit 1
+fi
+
+if [ -n "${NVIDIA_API_KEY:-}" ]; then
+  fail "NVIDIA_API_KEY is set — this test requires it UNSET (unset NVIDIA_API_KEY)"
+  exit 1
+else
+  pass "NVIDIA_API_KEY is not set (required for controlled step-4 exit)"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 2: First onboard (e2e-double-a) — leaves stale state
+# ══════════════════════════════════════════════════════════════════
+section "Phase 2: First onboard ($SANDBOX_A)"
+info "Running nemoclaw onboard — expect exit 1 (no API key)..."
+
+# Write to temp file to avoid openshell FD inheritance blocking $()
+ONBOARD_LOG="$(mktemp)"
+NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \
+  NEMOCLAW_POLICY_MODE=skip \
+  nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1
+exit1=$?
+output1="$(cat "$ONBOARD_LOG")"
+rm -f "$ONBOARD_LOG"
+
+if [ $exit1 -eq 1 ]; then
+  pass "First onboard exited 1 (step 4 failed as expected)"
+else
+  fail "First onboard exited $exit1 (expected 1)"
+fi
+
+echo "$output1" | grep -q "Sandbox '${SANDBOX_A}' created" \
+  && pass "Sandbox '$SANDBOX_A' created (step 3 completed)" \
+  || fail "Sandbox creation not confirmed in output"
+
+# Verify stale state was left behind
+openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw" \
+  && pass "Gateway is still running (stale state)" \
+  || fail "Gateway is not running after first onboard"
+
+openshell sandbox get "$SANDBOX_A" > /dev/null 2>&1 \
+  && pass "Sandbox '$SANDBOX_A' exists in openshell" \
+  || fail "Sandbox '$SANDBOX_A' not found in openshell"
+
+[ -f "$REGISTRY" ] && grep -q "$SANDBOX_A" "$REGISTRY" \
+  && pass "Registry contains '$SANDBOX_A'" \
+  || fail "Registry does not contain '$SANDBOX_A'"
+
+info "Stale state confirmed — NOT cleaning up before next onboard"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 3: Second onboard — SAME name (e2e-double-a)
+# ══════════════════════════════════════════════════════════════════
+section "Phase 3: Second onboard ($SANDBOX_A — same name, stale state)"
+info "Running nemoclaw onboard with NEMOCLAW_RECREATE_SANDBOX=1..."
+
+ONBOARD_LOG="$(mktemp)"
+NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \
+  NEMOCLAW_RECREATE_SANDBOX=1 \
+  NEMOCLAW_POLICY_MODE=skip \
+  nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1
+exit2=$?
+output2="$(cat "$ONBOARD_LOG")"
+rm -f "$ONBOARD_LOG"
+
+# Step 4 still fails (no API key), but steps 1-3 should succeed
+if [ $exit2 -eq 1 ]; then
+  pass "Second onboard exited 1 (step 4 failed as expected)"
+else
+  fail "Second onboard exited $exit2 (expected 1)"
+fi
+
+echo "$output2" | grep -q "Cleaning up previous NemoClaw session" \
+  && pass "Stale session cleanup fired on second onboard" \
+  || fail "Stale session cleanup did NOT fire (regression: #397)"
+
+echo "$output2" | grep -q "Port 8080 is not available" \
+  && fail "Port 8080 conflict detected (regression: #21)" \
+  || pass "No port 8080 conflict"
+
+echo "$output2" | grep -q "Port 18789 is not available" \
+  && fail "Port 18789 conflict detected" \
+  || pass "No port 18789 conflict"
+
+echo "$output2" | grep -q "Sandbox '${SANDBOX_A}' created" \
+  && pass "Sandbox '$SANDBOX_A' recreated" \
+  || fail "Sandbox '$SANDBOX_A' was not recreated"
+
+openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw" \
+  && pass "Gateway running after second onboard" \
+  || fail "Gateway not running after second onboard"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 4: Third onboard — DIFFERENT name (e2e-double-b)
+# ══════════════════════════════════════════════════════════════════
+section "Phase 4: Third onboard ($SANDBOX_B — different name, stale state)"
+info "Running nemoclaw onboard with new sandbox name..."
+
+ONBOARD_LOG="$(mktemp)"
+NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_B" \
+  NEMOCLAW_POLICY_MODE=skip \
+  nemoclaw onboard --non-interactive > "$ONBOARD_LOG" 2>&1
+exit3=$?
+output3="$(cat "$ONBOARD_LOG")"
+rm -f "$ONBOARD_LOG"
+
+if [ $exit3 -eq 1 ]; then
+  pass "Third onboard exited 1 (step 4 failed as expected)"
+else
+  fail "Third onboard exited $exit3 (expected 1)"
+fi
+
+echo "$output3" | grep -q "Cleaning up previous NemoClaw session" \
+  && pass "Stale session cleanup fired on third onboard" \
+  || fail "Stale session cleanup did NOT fire on third onboard"
+
+echo "$output3" | grep -q "Port 8080 is not available" \
+  && fail "Port 8080 conflict on third onboard (regression)" \
+  || pass "No port 8080 conflict on third onboard"
+
+echo "$output3" | grep -q "Port 18789 is not available" \
+  && fail "Port 18789 conflict on third onboard" \
+  || pass "No port 18789 conflict on third onboard"
+
+echo "$output3" | grep -q "Sandbox '${SANDBOX_B}' created" \
+  && pass "Sandbox '$SANDBOX_B' created" \
+  || fail "Sandbox '$SANDBOX_B' was not created"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 5: Final cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 5: Final cleanup"
+
+nemoclaw "$SANDBOX_A" destroy 2>/dev/null || true
+nemoclaw "$SANDBOX_B" destroy 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+
+openshell sandbox get "$SANDBOX_A" > /dev/null 2>&1 \
+  && fail "Sandbox '$SANDBOX_A' still exists after cleanup" \
+  || pass "Sandbox '$SANDBOX_A' cleaned up"
+
+openshell sandbox get "$SANDBOX_B" > /dev/null 2>&1 \
+  && fail "Sandbox '$SANDBOX_B' still exists after cleanup" \
+  || pass "Sandbox '$SANDBOX_B' cleaned up"
+
+[ -f "$REGISTRY" ] && grep -q "$SANDBOX_A\|$SANDBOX_B" "$REGISTRY" \
+  && fail "Registry still contains test sandbox entries" \
+  || pass "Registry cleaned up"
+
+pass "Final cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Summary
+# ══════════════════════════════════════════════════════════════════
+echo ""
+echo "========================================"
+echo "  Double Onboard E2E Results:"
+echo "    Passed:  $PASS"
+echo "    Failed:  $FAIL"
+echo "    Skipped: $SKIP"
+echo "    Total:   $TOTAL"
+echo "========================================"
+
+if [ "$FAIL" -eq 0 ]; then
+  printf '\n\033[1;32m  Double onboard PASSED — stale state recovery verified.\033[0m\n'
+  exit 0
+else
+  printf '\n\033[1;31m  %d test(s) failed.\033[0m\n' "$FAIL"
+  exit 1
+fi