diff --git a/ci/platform-matrix.json b/ci/platform-matrix.json index 59debfe090..797e0a1a4f 100644 --- a/ci/platform-matrix.json +++ b/ci/platform-matrix.json @@ -44,6 +44,14 @@ "_prd_note": "PRD tracks x86 and ARM (WOA) separately; treating as one entry until ARM is validated independently.", "notes": "Requires WSL2 with Docker Desktop backend." }, + { + "name": "Jetson (Orin Nano, Orin NX, AGX Orin, Xavier)", + "runtimes": ["Docker"], + "status": "tested", + "prd_priority": "P1", + "ci_tested": false, + "notes": "Run `sudo nemoclaw setup-jetson` before onboarding. See [commands reference](../reference/commands.md#nemoclaw-setup-jetson)." + }, { "name": "DGX Station", "runtimes": ["Docker"], diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md index 2858a0dbd1..86f92d3985 100644 --- a/docs/get-started/quickstart.md +++ b/docs/get-started/quickstart.md @@ -70,6 +70,7 @@ Availability is not limited to these entries, but untested configurations may ha | macOS (Apple Silicon) | Colima, Docker Desktop | Tested with limitations | Install Xcode Command Line Tools (`xcode-select --install`) and start the runtime before running the installer. | | DGX Spark | Docker | Tested | Use the standard installer and `nemoclaw onboard`. | | Windows WSL2 | Docker Desktop (WSL backend) | Tested with limitations | Requires WSL2 with Docker Desktop backend. | +| Jetson (Orin Nano, Orin NX, AGX Orin, Xavier) | Docker | Tested | Run `sudo nemoclaw setup-jetson` before onboarding. See [commands reference](../reference/commands.md#nemoclaw-setup-jetson). | ## Install NemoClaw and Onboard OpenClaw Agent diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 44960dfc6d..6de649fe91 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -275,6 +275,16 @@ This command remains as a compatibility alias to `nemoclaw onboard`. $ nemoclaw setup-spark ``` +### `nemoclaw setup-jetson` + +Set up NemoClaw on NVIDIA Jetson devices (Orin Nano, Orin NX, AGX Orin, Xavier). +This command configures the NVIDIA container runtime for Docker and applies iptables-legacy fixes required by Jetson's Tegra kernel. +Run with `sudo` on the Jetson host. + +```console +$ sudo nemoclaw setup-jetson +``` + ### `nemoclaw debug` Collect diagnostics for bug reports. diff --git a/scripts/setup-jetson.sh b/scripts/setup-jetson.sh index 523d5ba8d7..746f8474bf 100755 --- a/scripts/setup-jetson.sh +++ b/scripts/setup-jetson.sh @@ -1,84 +1,236 @@ #!/usr/bin/env bash # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# NemoClaw setup for NVIDIA Jetson devices (Orin Nano, Orin NX, AGX Orin, Thor). +# +# Jetson devices use unified memory and a Tegra kernel that lacks nf_tables +# chain modules (nft_chain_filter, nft_chain_nat, etc.). The OpenShell gateway +# runs k3s inside a Docker container, and k3s's network policy controller +# uses iptables in nf_tables mode by default, which panics on Tegra kernels. +# +# This script prepares the Jetson host so that `nemoclaw onboard` succeeds: +# 1. Verifies Jetson platform +# 2. Ensures NVIDIA Container Runtime is configured for Docker +# 3. Loads required kernel modules (br_netfilter, xt_comment) +# 4. Configures Docker daemon with default-runtime=nvidia +# +# The iptables-legacy patch for the gateway container image is handled +# automatically by `nemoclaw onboard` when it detects a Jetson GPU. +# +# Usage: +# sudo nemoclaw setup-jetson +# # or directly: +# sudo bash scripts/setup-jetson.sh set -euo pipefail -SUDO=() -((EUID != 0)) && SUDO=(sudo) +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +MIN_NODE_VERSION="22.16.0" -info() { - printf "[INFO] %s\n" "$*" -} - -error() { - printf "[ERROR] %s\n" "$*" >&2 +info() { echo -e "${GREEN}>>>${NC} $1"; } +warn() { echo -e "${YELLOW}>>>${NC} $1"; } +fail() { + echo -e "${RED}>>>${NC} $1" exit 1 } -get_jetpack_version() { - local release_line release revision l4t_version - - release_line="$(head -n1 /etc/nv_tegra_release 2>/dev/null || true)" - [[ -n "$release_line" ]] || return 0 - - release="$(printf '%s\n' "$release_line" | sed -n 's/^# R\([0-9][0-9]*\) (release).*/\1/p')" - revision="$(printf '%s\n' "$release_line" | sed -n 's/^.*REVISION: \([0-9][0-9]*\)\..*$/\1/p')" - l4t_version="${release}.${revision}" - - case "$l4t_version" in - 36.*) - printf "%s" "jp6" - ;; - 38.*) - printf "%s" "jp7" - ;; - *) - info "Jetson detected (L4T $l4t_version) but version is not recognized — skipping host setup" - ;; - esac +version_gte() { + # Returns 0 (true) if $1 >= $2 — portable, no sort -V (BSD compat) + local IFS=. + local -a a b + read -r -a a <<<"$1" + read -r -a b <<<"$2" + for i in 0 1 2; do + local ai=${a[$i]:-0} bi=${b[$i]:-0} + if ((ai > bi)); then return 0; fi + if ((ai < bi)); then return 1; fi + done + return 0 } -configure_jetson_host() { - local jetpack_version="$1" +# ── Pre-flight checks ───────────────────────────────────────────── - if ((EUID != 0)); then - info "Jetson host configuration requires sudo. You may be prompted for your password." - "${SUDO[@]}" true >/dev/null || error "Sudo is required to apply Jetson host configuration." - fi +if [ "$(uname -s)" != "Linux" ]; then + fail "This script is for NVIDIA Jetson (Linux). Use 'nemoclaw setup' for macOS." +fi + +if [ "$(uname -m)" != "aarch64" ]; then + fail "Jetson devices are aarch64. This system is $(uname -m)." +fi - case "$jetpack_version" in - jp6) - "${SUDO[@]}" update-alternatives --set iptables /usr/sbin/iptables-legacy - "${SUDO[@]}" sed -i '/"iptables": false,/d; /"bridge": "none"/d; s/"default-runtime": "nvidia",/"default-runtime": "nvidia"/' /etc/docker/daemon.json - ;; - jp7) - # JP7 (Thor) does not need iptables or Docker daemon.json changes. - ;; - *) - error "Unsupported Jetson version: $jetpack_version" - ;; - esac - - "${SUDO[@]}" modprobe br_netfilter - "${SUDO[@]}" sysctl -w net.bridge.bridge-nf-call-iptables=1 >/dev/null - - # Persist across reboots - echo "br_netfilter" | "${SUDO[@]}" tee /etc/modules-load.d/nemoclaw.conf >/dev/null - echo "net.bridge.bridge-nf-call-iptables=1" | "${SUDO[@]}" tee /etc/sysctl.d/99-nemoclaw.conf >/dev/null - - if [[ "$jetpack_version" == "jp6" ]]; then - "${SUDO[@]}" systemctl restart docker +if [ "$(id -u)" -ne 0 ]; then + fail "Must run as root: sudo nemoclaw setup-jetson" +fi + +# Verify Jetson platform +JETSON_MODEL="" +if [ -f /proc/device-tree/model ]; then + JETSON_MODEL=$(tr -d '\0' /dev/null || echo "") + if ! echo "$GPU_NAME" | grep -qiE "orin|thor"; then + fail "This does not appear to be a Jetson device. Use 'nemoclaw onboard' directly." fi -} + # Exclude discrete GPUs that happen to contain matching strings + if echo "$GPU_NAME" | grep -qiE "geforce|rtx|quadro"; then + fail "Discrete GPU detected ('$GPU_NAME'). This script is for Jetson only." + fi + JETSON_MODEL="${GPU_NAME}" +fi + +info "Detected Jetson platform: ${JETSON_MODEL}" + +# Detect the real user (not root) for docker group add +REAL_USER="${SUDO_USER:-$(logname 2>/dev/null || echo "")}" + +command -v docker >/dev/null || fail "Docker not found. Install docker.io: sudo apt-get install -y docker.io" +command -v python3 >/dev/null || fail "python3 not found. Install with: sudo apt-get install -y python3-minimal" +command -v node >/dev/null || fail "Node.js not found. NemoClaw requires Node.js >= ${MIN_NODE_VERSION}. Install Node.js before running 'nemoclaw onboard'." + +NODE_VERSION_RAW="$(node --version 2>/dev/null || true)" +NODE_VERSION="${NODE_VERSION_RAW#v}" +if ! echo "$NODE_VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+$'; then + fail "Could not parse Node.js version from '${NODE_VERSION_RAW}'. NemoClaw requires Node.js >= ${MIN_NODE_VERSION}." +fi +if ! version_gte "$NODE_VERSION" "$MIN_NODE_VERSION"; then + fail "Node.js ${NODE_VERSION_RAW} is too old. NemoClaw requires Node.js >= ${MIN_NODE_VERSION}." +fi +info "Node.js ${NODE_VERSION_RAW} OK" + +# ── 1. Docker group ─────────────────────────────────────────────── + +if [ -n "$REAL_USER" ]; then + if id -nG "$REAL_USER" | grep -qw docker; then + info "User '$REAL_USER' already in docker group" + else + info "Adding '$REAL_USER' to docker group..." + usermod -aG docker "$REAL_USER" + info "Added. Group will take effect on next login (or use 'newgrp docker')." + fi +fi + +# ── 2. NVIDIA Container Runtime ────────────────────────────────── +# +# Jetson JetPack pre-installs nvidia-container-runtime but Docker may +# not be configured to use it as the default runtime. -main() { - local jetpack_version - jetpack_version="$(get_jetpack_version)" - [[ -n "$jetpack_version" ]] || exit 0 +DAEMON_JSON="/etc/docker/daemon.json" +NEEDS_RESTART=false + +configure_nvidia_runtime() { + if ! command -v nvidia-container-runtime >/dev/null 2>&1; then + warn "nvidia-container-runtime not found. GPU passthrough may not work." + warn "Install with: sudo apt-get install -y nvidia-container-toolkit" + return + fi - info "Jetson detected ($jetpack_version) — applying required host configuration" - configure_jetson_host "$jetpack_version" + if [ -f "$DAEMON_JSON" ]; then + # Check if nvidia runtime is already configured + if python3 -c " +import json, sys +try: + d = json.load(open('$DAEMON_JSON')) + runtimes = d.get('runtimes', {}) if isinstance(d, dict) else {} + if 'nvidia' in runtimes and d.get('default-runtime') == 'nvidia': + sys.exit(0) + sys.exit(1) +except (IOError, ValueError, KeyError, AttributeError): + sys.exit(1) +" 2>/dev/null; then + info "NVIDIA runtime already configured in Docker daemon" + else + info "Adding NVIDIA runtime to Docker daemon config..." + python3 -c " +import json +try: + with open('$DAEMON_JSON') as f: + d = json.load(f) +except (IOError, ValueError, KeyError): + d = {} +if not isinstance(d, dict): + d = {} +d.setdefault('runtimes', {})['nvidia'] = { + 'path': 'nvidia-container-runtime', + 'runtimeArgs': [] +} +d['default-runtime'] = 'nvidia' +with open('$DAEMON_JSON', 'w') as f: + json.dump(d, f, indent=2) +" + NEEDS_RESTART=true + fi + else + info "Creating Docker daemon config with NVIDIA runtime..." + mkdir -p "$(dirname "$DAEMON_JSON")" + cat >"$DAEMON_JSON" <<'DAEMONJSON' +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} +DAEMONJSON + NEEDS_RESTART=true + fi } -main "$@" +configure_nvidia_runtime + +# ── 3. Kernel modules ──────────────────────────────────────────── + +info "Loading required kernel modules..." +modprobe br_netfilter 2>/dev/null || warn "Could not load br_netfilter" +modprobe xt_comment 2>/dev/null || warn "Could not load xt_comment" + +# Persist across reboots +MODULES_FILE="/etc/modules-load.d/nemoclaw-jetson.conf" +if [ ! -f "$MODULES_FILE" ]; then + info "Persisting kernel modules for boot..." + cat >"$MODULES_FILE" <<'MODULES' +# NemoClaw: required for k3s networking inside Docker +br_netfilter +xt_comment +MODULES +fi + +# ── 4. Restart Docker if needed ────────────────────────────────── + +if [ "$NEEDS_RESTART" = true ]; then + info "Restarting Docker daemon..." + if command -v systemctl >/dev/null 2>&1; then + systemctl restart docker + else + service docker restart 2>/dev/null || dockerd & + fi + for i in $(seq 1 15); do + if docker info >/dev/null 2>&1; then + break + fi + [ "$i" -eq 15 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." + sleep 2 + done + info "Docker restarted with NVIDIA runtime" +fi + +# ── Done ───────────────────────────────────────────────────────── + +echo "" +info "Jetson setup complete." +info "" +info "Device: ${JETSON_MODEL}" +info "" +info "Next step: run 'nemoclaw onboard' to set up your sandbox." +info " nemoclaw onboard" +info "" +info "The onboard wizard will automatically patch the gateway image" +info "for Jetson iptables compatibility." diff --git a/src/lib/local-inference.test.ts b/src/lib/local-inference.test.ts index 5bc1db5ad6..82cdbab86c 100644 --- a/src/lib/local-inference.test.ts +++ b/src/lib/local-inference.test.ts @@ -8,6 +8,7 @@ import { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, LARGE_OLLAMA_MIN_MEMORY_MB, + DEFAULT_OLLAMA_MODEL_JETSON, getDefaultOllamaModel, getBootstrapOllamaModelOptions, getLocalProviderBaseUrl, @@ -26,6 +27,8 @@ import { validateLocalProvider, } from "../../dist/lib/local-inference"; +const FAKE_JETSON_GPU = { type: "nvidia", totalMemoryMB: 7627, jetson: true, unifiedMemory: true }; + describe("local inference helpers", () => { it("returns the expected base URL for vllm-local", () => { expect(getLocalProviderBaseUrl("vllm-local")).toBe("http://host.openshell.internal:8000/v1"); @@ -304,4 +307,14 @@ describe("local inference helpers", () => { it("treats non-JSON probe output as success once the model responds", () => { expect(validateOllamaModel("nemotron-3-nano:30b", () => "ok")).toEqual({ ok: true }); }); + + it("returns jetson 4b model as default on jetson when available", () => { + const list = "nemotron-3-nano:4b abc 2.8 GB now\nqwen3:32b def 20 GB now"; + expect(getDefaultOllamaModel(() => list, FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); + }); + + it("falls back to jetson 4b model when ollama list is empty on jetson", () => { + expect(getBootstrapOllamaModelOptions(FAKE_JETSON_GPU)).toEqual([DEFAULT_OLLAMA_MODEL_JETSON]); + expect(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); + }); }); diff --git a/src/lib/local-inference.ts b/src/lib/local-inference.ts index b2bcd903fe..db29ee2bcf 100644 --- a/src/lib/local-inference.ts +++ b/src/lib/local-inference.ts @@ -17,11 +17,13 @@ export const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; export const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; export const SMALL_OLLAMA_MODEL = "qwen2.5:7b"; export const LARGE_OLLAMA_MIN_MEMORY_MB = 32768; +export const DEFAULT_OLLAMA_MODEL_JETSON = "nemotron-3-nano:4b"; export type RunCaptureFn = (cmd: string, opts?: { ignoreError?: boolean }) => string; export interface GpuInfo { totalMemoryMB: number; + jetson?: boolean; } export interface ValidationResult { @@ -249,6 +251,11 @@ export function getOllamaModelOptions(runCapture: RunCaptureFn): string[] { } export function getBootstrapOllamaModelOptions(gpu: GpuInfo | null): string[] { + // Jetson: fall back to the 4B model that fits in 8GB unified memory + // instead of the 30B default which would OOM. + if (gpu && gpu.jetson) { + return [DEFAULT_OLLAMA_MODEL_JETSON]; + } const options = [SMALL_OLLAMA_MODEL]; if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) { options.push(DEFAULT_OLLAMA_MODEL); @@ -265,6 +272,10 @@ export function getDefaultOllamaModel( const bootstrap = getBootstrapOllamaModelOptions(gpu); return bootstrap[0]; } + if (gpu && gpu.jetson) { + if (models.includes(DEFAULT_OLLAMA_MODEL_JETSON)) return DEFAULT_OLLAMA_MODEL_JETSON; + return models[0]; + } return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; } diff --git a/src/lib/nim.test.ts b/src/lib/nim.test.ts index a36dddf3c9..878a3ccc43 100644 --- a/src/lib/nim.test.ts +++ b/src/lib/nim.test.ts @@ -159,6 +159,7 @@ describe("nim", () => { nimCapable: false, unifiedMemory: true, spark: false, + jetson: true, }); } finally { restore(); @@ -256,4 +257,37 @@ describe("nim", () => { } }); }); + + it("detects Jetson Orin and sets jetson flag", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return "Orin"; + if (cmd.includes("free -m")) return "7627"; + return ""; + }); + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true, unifiedMemory: true }); + } finally { + restore(); + } + }); + + it("detects Jetson via /proc/device-tree/model fallback", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return ""; + if (cmd.includes("device-tree/model")) return "NVIDIA Jetson Orin Nano Super Developer Kit"; + if (cmd.includes("free -m")) return "7627"; + return ""; + }); + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true }); + } finally { + restore(); + } + }); }); diff --git a/src/lib/nim.ts b/src/lib/nim.ts index d0118ff393..cf1a5ae18c 100644 --- a/src/lib/nim.ts +++ b/src/lib/nim.ts @@ -26,6 +26,7 @@ export interface GpuDetection { nimCapable: boolean; unifiedMemory?: boolean; spark?: boolean; + jetson?: boolean; } export interface NimStatus { @@ -107,6 +108,9 @@ export function detectGpu(): GpuDetection | null { const count = unifiedGpuNames.length; const perGpuMB = count > 0 ? Math.floor(totalMemoryMB / count) : totalMemoryMB; const isSpark = unifiedGpuNames.some((name: string) => /GB10/i.test(name)); + const isJetson = + unifiedGpuNames.some((name: string) => /orin|thor|xavier/i.test(name)) && + !unifiedGpuNames.some((name: string) => /geforce|rtx|quadro/i.test(name)); return { type: "nvidia", name: unifiedGpuNames[0], @@ -116,6 +120,35 @@ export function detectGpu(): GpuDetection | null { nimCapable: canRunNimWithMemory(totalMemoryMB), unifiedMemory: true, spark: isSpark, + jetson: isJetson, + }; + } + } catch { + /* ignored */ + } + + // Jetson fallback: /proc/device-tree/model (for cases where nvidia-smi is absent) + try { + const dtModel = runCapture("cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", { + ignoreError: true, + }); + if (dtModel && /jetson/i.test(dtModel)) { + let totalMemoryMB = 0; + try { + const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); + if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; + } catch { + /* ignored */ + } + return { + type: "nvidia", + name: dtModel.trim(), + count: 1, + totalMemoryMB, + perGpuMB: totalMemoryMB, + nimCapable: false, + unifiedMemory: true, + jetson: true, }; } } catch { diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 8b7f66a91f..58dcb59f50 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -936,6 +936,12 @@ function getSandboxInferenceConfig(model, provider = null, preferredInferenceApi supportsStore: false, }; break; + case "ollama-local": + case "vllm-local": + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + inferenceBaseUrl = getLocalProviderBaseUrl(provider); + break; case "nvidia-prod": case "nvidia-nim": default: @@ -1956,7 +1962,10 @@ async function preflight() { // GPU const gpu = nim.detectGpu(); - if (gpu && gpu.type === "nvidia") { + if (gpu && gpu.type === "nvidia" && gpu.jetson) { + console.log(` ✓ NVIDIA Jetson detected: ${gpu.name}, ${gpu.totalMemoryMB} MB unified memory`); + console.log(" ⓘ NIM containers not supported on Jetson — will use Ollama or cloud inference"); + } else if (gpu && gpu.type === "nvidia") { console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); if (!gpu.nimCapable) { console.log(" ⓘ GPU VRAM too small for local NIM — will use cloud inference"); @@ -2016,9 +2025,85 @@ async function preflight() { return gpu; } +// ── Jetson gateway image patch ─────────────────────────────────── +// +// JetPack kernels (Tegra) ship without nft_chain_filter and related +// nf_tables modules. The OpenShell gateway image embeds k3s, whose +// network policy controller calls iptables in nf_tables mode by default. +// Without kernel support the controller panics on startup. +// +// This function rebuilds the gateway image locally, switching the +// default iptables alternative to iptables-legacy so all rule +// manipulation uses the classic xtables backend that Tegra kernels +// fully support. + +/** Extracts the semver tag from the installed openshell CLI version. */ +function getGatewayImageTag() { + const openshellVersion = + runCapture("openshell --version 2>/dev/null", { ignoreError: true }) || ""; + const match = openshellVersion.match(/(\d+\.\d+\.\d+)/); + return match ? match[1] : "latest"; +} + +/** + * Rebuilds the OpenShell gateway container image with iptables-legacy as the + * default backend. Idempotent — skips rebuild if the image is already patched + * (checked via Docker label). Required on Jetson because the Tegra kernel + * lacks nft_chain_filter modules that k3s's network policy controller needs. + */ +function patchGatewayImageForJetson() { + const tag = getGatewayImageTag(); + const image = `ghcr.io/nvidia/openshell/cluster:${tag}`; + + // Check if already patched (look for our label) + const inspectOut = ( + runCapture( + `docker inspect --format='{{index .Config.Labels "io.nemoclaw.jetson-patched"}}' ${shellQuote(image)} 2>/dev/null`, + { ignoreError: true }, + ) || "" + ).trim(); + if (inspectOut === "true") { + console.log(" ✓ Gateway image already patched for Jetson"); + return; + } + + console.log(" Patching gateway image for Jetson (iptables-legacy)..."); + console.log(" (this may take a moment on first run if the base image needs to be pulled)"); + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-")); + try { + const dockerfile = path.join(tmpDir, "Dockerfile"); + fs.writeFileSync( + dockerfile, + [ + `FROM ${image}`, + `RUN if command -v update-alternatives >/dev/null 2>&1 && \\`, + ` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`, + ` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`, + ` :; \\`, + ` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`, + ` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`, + ` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`, + ` else \\`, + ` echo "iptables-legacy not available in base image" >&2; exit 1; \\`, + ` fi`, + `LABEL io.nemoclaw.jetson-patched="true"`, + "", + ].join("\n"), + ); + + run(`docker build --quiet -t ${shellQuote(image)} ${shellQuote(tmpDir)}`, { + ignoreError: false, + }); + console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } +} + // ── Step 2: Gateway ────────────────────────────────────────────── -async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { +async function startGatewayWithOptions(gpu, { exitOnFailure = true } = {}) { step(2, 8, "Starting OpenShell gateway"); const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); @@ -2033,6 +2118,15 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { return; } + // Jetson (Tegra kernel): The k3s container image ships iptables v1.8.10 in + // nf_tables mode, but JetPack kernels lack the nft_chain_filter module, + // causing the k3s network policy controller to panic on startup. + // Workaround: rebuild the gateway image locally with iptables-legacy as the + // default so iptables commands use the legacy (xtables) backend instead. + if (gpu && gpu.jetson) { + patchGatewayImageForJetson(); + } + // When a stale gateway is detected (metadata exists but container is gone, // e.g. after a Docker/Colima restart), skip the destroy — `gateway start` // can recover the container without wiping metadata and mTLS certs. @@ -4763,6 +4857,7 @@ module.exports = { createSandbox, formatEnvAssignment, getFutureShellPathHint, + getGatewayImageTag, getGatewayStartEnv, getGatewayReuseState, getNavigationChoice, @@ -4810,6 +4905,7 @@ module.exports = { hasResponsesToolCall, upsertProvider, hydrateCredentialEnv, + patchGatewayImageForJetson, pruneKnownHostsEntries, shouldIncludeBuildContextPath, writeSandboxConfigSyncFile, diff --git a/src/nemoclaw.ts b/src/nemoclaw.ts index f5db23544e..f34fe90e7f 100644 --- a/src/nemoclaw.ts +++ b/src/nemoclaw.ts @@ -75,6 +75,7 @@ const GLOBAL_COMMANDS = new Set([ "deploy", "setup", "setup-spark", + "setup-jetson", "start", "stop", "status", @@ -819,6 +820,11 @@ async function setupSpark(args = []) { }); } +function setupJetson() { + // setup-jetson.sh configures Docker runtime + iptables-legacy for Jetson. + run(`sudo bash "${path.join(ROOT, "scripts", "setup-jetson.sh")}"`); +} + async function deploy(instanceName) { await executeDeploy({ instanceName, @@ -1322,6 +1328,7 @@ function help() { ${B}nemoclaw onboard${R} Configure inference endpoint and credentials nemoclaw onboard ${D}--from ${R} Use a custom Dockerfile for the sandbox image ${D}(non-interactive: ${NOTICE_ACCEPT_FLAG} or ${NOTICE_ACCEPT_ENV}=1)${R} + nemoclaw setup-jetson Set up on Jetson ${D}(Docker runtime + iptables-legacy)${R} ${G}Sandbox Management:${R} ${B}nemoclaw list${R} List all sandboxes @@ -1390,6 +1397,9 @@ const [cmd, ...args] = process.argv.slice(2); case "setup-spark": await setupSpark(args); break; + case "setup-jetson": + setupJetson(); + break; case "deploy": await deploy(args[0]); break; diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 0398170797..1313c86ed6 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -44,6 +44,8 @@ import { summarizeProbeFailure, shouldIncludeBuildContextPath, writeSandboxConfigSyncFile, + getGatewayImageTag, + patchGatewayImageForJetson, } from "../dist/lib/onboard"; import { stageOptimizedSandboxBuildContext } from "../dist/lib/sandbox-build-context"; import { buildWebSearchDockerConfig } from "../dist/lib/web-search"; @@ -4244,4 +4246,169 @@ const { createSandbox } = require(${onboardPath}); assert.match(fnBody, /isNonInteractive\(\)/); assert.match(fnBody, /process\.exit\(1\)/); }); + + it("exports getGatewayImageTag and patchGatewayImageForJetson as functions", () => { + assert.equal(typeof getGatewayImageTag, "function"); + assert.equal(typeof patchGatewayImageForJetson, "function"); + }); + + it("patchGatewayImageForJetson skips rebuild when image is already patched (idempotency)", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-patch-idem-")); + const scriptPath = path.join(tmpDir, "jetson-patch-idempotent.js"); + const fakeBin = path.join(tmpDir, "bin"); + const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js")); + + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\necho 0.0.10\n", { + mode: 0o755, + }); + + const script = String.raw` +const runner = require(${runnerPath}); +const commands = []; +runner.run = (command, opts = {}) => { + commands.push({ command, type: "run" }); + return { status: 0 }; +}; +runner.runCapture = (command) => { + commands.push({ command, type: "runCapture" }); + if (command.includes("openshell --version")) return "0.0.10"; + if (command.includes("docker inspect") && command.includes("jetson-patched")) return "true"; + return ""; +}; + +const { patchGatewayImageForJetson } = require(${onboardPath}); +patchGatewayImageForJetson(); + +const buildCalls = commands.filter(c => c.command && c.command.includes("docker build")); +console.log(JSON.stringify({ buildCalls: buildCalls.length, totalCommands: commands.length })); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { ...process.env, HOME: tmpDir, PATH: `${fakeBin}:${process.env.PATH || ""}` }, + }); + + assert.equal(result.status, 0, `stderr: ${result.stderr}`); + const output = JSON.parse(result.stdout.trim().split("\n").pop()); + assert.equal(output.buildCalls, 0, "docker build should NOT be called when already patched"); + }); + + it("patchGatewayImageForJetson builds image when not yet patched", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-patch-build-")); + const scriptPath = path.join(tmpDir, "jetson-patch-build.js"); + const fakeBin = path.join(tmpDir, "bin"); + const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js")); + + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\necho 0.0.10\n", { + mode: 0o755, + }); + + const script = String.raw` +const runner = require(${runnerPath}); +const commands = []; +runner.run = (command, opts = {}) => { + commands.push({ command, type: "run" }); + return { status: 0 }; +}; +runner.runCapture = (command) => { + commands.push({ command, type: "runCapture" }); + if (command.includes("openshell --version")) return "0.0.10"; + if (command.includes("docker inspect") && command.includes("jetson-patched")) return ""; + return ""; +}; + +const { patchGatewayImageForJetson } = require(${onboardPath}); +patchGatewayImageForJetson(); + +const buildCalls = commands.filter(c => c.command && c.command.includes("docker build")); +const buildCmd = buildCalls.length > 0 ? buildCalls[0].command : ""; +console.log(JSON.stringify({ + buildCalls: buildCalls.length, + usesShellQuote: buildCmd.includes("'ghcr.io/nvidia/openshell/cluster:0.0.10'"), + hasImage: buildCmd.includes("ghcr.io/nvidia/openshell/cluster:0.0.10"), +})); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { ...process.env, HOME: tmpDir, PATH: `${fakeBin}:${process.env.PATH || ""}` }, + }); + + assert.equal(result.status, 0, `stderr: ${result.stderr}`); + const output = JSON.parse(result.stdout.trim().split("\n").pop()); + assert.equal(output.buildCalls, 1, "docker build should be called once"); + assert.ok(output.hasImage, "docker build should reference the correct image tag"); + assert.ok(output.usesShellQuote, "docker build should use shellQuote for image name"); + }); + + it("patchGatewayImageForJetson cleans up temp directory even on build failure", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-patch-cleanup-")); + const scriptPath = path.join(tmpDir, "jetson-patch-cleanup.js"); + const fakeBin = path.join(tmpDir, "bin"); + const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js")); + const fsPath = JSON.stringify("fs"); + const osPath = JSON.stringify("os"); + + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\necho 0.0.10\n", { + mode: 0o755, + }); + + const script = String.raw` +const fs = require(${fsPath}); +const nodeOs = require(${osPath}); +const runner = require(${runnerPath}); + +runner.run = (command) => { + if (command.includes("docker build")) { + throw new Error("simulated docker build failure"); + } + return { status: 0 }; +}; +runner.runCapture = (command) => { + if (command.includes("openshell --version")) return "0.0.10"; + if (command.includes("docker inspect") && command.includes("jetson-patched")) return ""; + return ""; +}; + +const { patchGatewayImageForJetson } = require(${onboardPath}); + +const tmpBefore = fs.readdirSync(nodeOs.tmpdir()).filter(d => d.startsWith("nemoclaw-jetson-")); + +let threw = false; +try { + patchGatewayImageForJetson(); +} catch (e) { + threw = true; +} + +const tmpAfter = fs.readdirSync(nodeOs.tmpdir()).filter(d => d.startsWith("nemoclaw-jetson-")); +const newDirs = tmpAfter.filter(d => !tmpBefore.includes(d)); +console.log(JSON.stringify({ threw, leakedDirs: newDirs.length })); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { ...process.env, HOME: tmpDir, PATH: `${fakeBin}:${process.env.PATH || ""}` }, + }); + + assert.equal(result.status, 0, `stderr: ${result.stderr}`); + const output = JSON.parse(result.stdout.trim().split("\n").pop()); + assert.ok(output.threw, "should have thrown on docker build failure"); + assert.equal(output.leakedDirs, 0, "temp directory should be cleaned up after failure"); + }); });