Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 87 additions & 8 deletions bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,28 @@ async function promptOrDefault(question, envVar, defaultValue) {

// ── Helpers ──────────────────────────────────────────────────────

/**
* Check if a sandbox is in Ready state from `openshell sandbox list` output.
* Strips ANSI codes and exact-matches the sandbox name in the first column.
*/
function isSandboxReady(output, sandboxName) {
const clean = output.replace(/\x1b\[[0-9;]*m/g, "");
return clean.split("\n").some((l) => {
const cols = l.trim().split(/\s+/);
return cols[0] === sandboxName && cols.includes("Ready") && !cols.includes("NotReady");
});
}

/**
* Determine whether stale NemoClaw gateway output indicates a previous
* session that should be cleaned up before the port preflight check.
* @param {string} gwInfoOutput - Raw output from `openshell gateway info -g nemoclaw`.
* @returns {boolean}
*/
function hasStaleGateway(gwInfoOutput) {
return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes("nemoclaw");
}

function step(n, total, msg) {
console.log("");
console.log(` [${n}/${total}] ${msg}`);
Expand Down Expand Up @@ -276,6 +298,18 @@ async function preflight() {
}
console.log(` ✓ openshell CLI: ${runCapture("openshell --version 2>/dev/null || echo unknown", { ignoreError: true })}`);

// Clean up stale NemoClaw session before checking ports.
// A previous onboard run may have left the gateway container and port
// forward running. If a NemoClaw-owned gateway is still present, tear
// it down so the port check below doesn't fail on our own leftovers.
const gwInfo = runCapture("openshell gateway info -g nemoclaw 2>/dev/null", { ignoreError: true });
if (hasStaleGateway(gwInfo)) {
console.log(" Cleaning up previous NemoClaw session...");
run("openshell forward stop 18789 2>/dev/null || true", { ignoreError: true });
run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true });
console.log(" ✓ Previous session cleaned up");
}

// Required ports — gateway (8080) and dashboard (18789)
const requiredPorts = [
{ port: 8080, label: "OpenShell gateway" },
Expand Down Expand Up @@ -434,9 +468,57 @@ async function createSandbox(gpu) {
if (process.env.NVIDIA_API_KEY) {
envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`);
}
// set -o pipefail ensures the openshell exit code propagates through the awk pipe.
// Without it, awk's exit code (always 0) would mask a failed sandbox create.
run(`set -o pipefail; openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1 | awk '/Sandbox allocated/{if(!seen){print;seen=1}next}1'`);

// Run without piping through awk — the pipe masked non-zero exit codes
// from openshell because bash returns the status of the last pipeline
// command (awk, always 0) unless pipefail is set. Removing the pipe
// lets the real exit code flow through to run().
const createResult = run(
`openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1`,
{ ignoreError: true }
);

// Clean up build context regardless of outcome
run(`rm -rf "${buildCtx}"`, { ignoreError: true });

if (createResult.status !== 0) {
console.error("");
console.error(` Sandbox creation failed (exit ${createResult.status}).`);
console.error(" Try: openshell sandbox list # check gateway state");
console.error(" Try: nemoclaw onboard # retry from scratch");
process.exit(createResult.status || 1);
}

// Wait for sandbox to reach Ready state in k3s before registering.
// On WSL2 + Docker Desktop the pod can take longer to initialize;
// without this gate, NemoClaw registers a phantom sandbox that
// causes "sandbox not found" on every subsequent connect/status call.
console.log(" Waiting for sandbox to become ready...");
let ready = false;
for (let i = 0; i < 30; i++) {
const list = runCapture("openshell sandbox list 2>&1", { ignoreError: true });
if (isSandboxReady(list, sandboxName)) {
ready = true;
break;
}
require("child_process").spawnSync("sleep", ["2"]);
}

if (!ready) {
// Clean up the orphaned sandbox so the next onboard retry with the same
// name doesn't fail on "sandbox already exists".
const delResult = run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true });
console.error("");
console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`);
if (delResult.status === 0) {
console.error(" The orphaned sandbox has been removed — you can safely retry.");
} else {
console.error(` Could not remove the orphaned sandbox. Manual cleanup:`);
console.error(` openshell sandbox delete "${sandboxName}"`);
}
console.error(" Retry: nemoclaw onboard");
process.exit(1);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

// Release any stale forward on port 18789 before claiming it for the new sandbox.
// A previous onboard run may have left the port forwarded to a different sandbox,
Expand All @@ -445,10 +527,7 @@ async function createSandbox(gpu) {
// Forward dashboard port to the new sandbox
run(`openshell forward start --background 18789 "${sandboxName}"`, { ignoreError: true });

// Clean up build context
run(`rm -rf "${buildCtx}"`, { ignoreError: true });

// Register in registry
// Register only after confirmed ready — prevents phantom entries
registry.registerSandbox({
name: sandboxName,
gpuEnabled: !!gpu,
Expand Down Expand Up @@ -885,4 +964,4 @@ async function onboard(opts = {}) {
printDashboard(sandboxName, model, provider);
}

module.exports = { buildSandboxConfigSyncScript, onboard, setupNim };
module.exports = { buildSandboxConfigSyncScript, hasStaleGateway, isSandboxReady, onboard, setupNim };
10 changes: 10 additions & 0 deletions bin/lib/policies.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ function buildPolicyGetCommand(sandboxName) {
}

function applyPreset(sandboxName, presetName) {
// Guard against truncated sandbox names — WSL can truncate hyphenated
// names during argument parsing, e.g. "my-assistant" → "m"
const isRfc1123Label = /^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName);
if (!sandboxName || sandboxName.length > 63 || !isRfc1123Label) {
throw new Error(
`Invalid or truncated sandbox name: '${sandboxName}'. ` +
`Names must be 1-63 chars, lowercase alphanumeric, with optional internal hyphens.`
);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

const presetContent = loadPreset(presetName);
if (!presetContent) {
console.error(` Cannot load preset: ${presetName}`);
Expand Down
Loading
Loading