NVIDIA · ericksoa · Mar 19, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/README.md b/README.md
@@ -48,9 +48,18 @@ The sandbox image is approximately 2.4 GB compressed. During image push, the Doc
 | Linux      | Ubuntu 22.04 LTS or later |
 | Node.js    | 20 or later |
 | npm        | 10 or later |
-| Docker     | Installed and running |
+| Container runtime | Supported runtime installed and running |
 | [OpenShell](https://github.com/NVIDIA/OpenShell) | Installed |
 
+#### Container Runtime Support
+
+| Platform | Supported runtimes | Notes |
+|----------|--------------------|-------|
+| Linux | Docker | Primary supported path today |
+| macOS (Apple Silicon) | Colima, Docker Desktop | Recommended runtimes for supported macOS setups |
+| macOS | Podman | Not supported yet. NemoClaw currently depends on OpenShell support for Podman on macOS. |
+| Windows WSL | Docker Desktop (WSL backend) | Supported target path |
+
 ### Install NemoClaw and Onboard OpenClaw Agent
 
 Download and run the installer script.
@@ -144,6 +153,8 @@ Inference requests from the agent never leave the sandbox directly. OpenShell in
 
 Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
 
+Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
+
-Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
+vLLM local inference remains experimental. Ollama local inference is supported when host routing/reachability requirements are met. On macOS, local inference depends on OpenShell host-routing support and host service reachability.
+
-Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
+vLLM local inference remains experimental. Ollama local inference is supported when host routing/reachability requirements are met. On macOS, local inference depends on OpenShell host-routing support and host service reachability.
+
 ---
 
 ## Protection Layers

diff --git a/bin/lib/credentials.js b/bin/lib/credentials.js
@@ -36,6 +36,14 @@ function prompt(question) {
     const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
     rl.question(question, (answer) => {
       rl.close();
+      if (!process.stdin.isTTY) {
+        if (typeof process.stdin.pause === "function") {
+          process.stdin.pause();
+        }
+        if (typeof process.stdin.unref === "function") {
+          process.stdin.unref();
+        }
+      }
       resolve(answer.trim());
     });
   });

diff --git a/bin/lib/inference-config.js b/bin/lib/inference-config.js
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const INFERENCE_ROUTE_URL = "https://inference.local/v1";
+const DEFAULT_CLOUD_MODEL = "nvidia/nemotron-3-super-120b-a12b";
+const CLOUD_MODEL_OPTIONS = [
+  { id: "nvidia/nemotron-3-super-120b-a12b", label: "Nemotron 3 Super 120B" },
+  { id: "moonshotai/kimi-k2.5", label: "Kimi K2.5" },
+  { id: "z-ai/glm5", label: "GLM-5" },
+  { id: "minimaxai/minimax-m2.5", label: "MiniMax M2.5" },
+  { id: "qwen/qwen3.5-397b-a17b", label: "Qwen3.5 397B A17B" },
+  { id: "openai/gpt-oss-120b", label: "GPT-OSS 120B" },
+];
+const DEFAULT_ROUTE_PROFILE = "inference-local";
+const DEFAULT_ROUTE_CREDENTIAL_ENV = "OPENAI_API_KEY";
+const MANAGED_PROVIDER_ID = "inference";
+const { DEFAULT_OLLAMA_MODEL } = require("./local-inference");
+
+function getProviderSelectionConfig(provider, model) {
+  switch (provider) {
+    case "nvidia-nim":
+      return {
+        endpointType: "custom",
+        endpointUrl: INFERENCE_ROUTE_URL,
+        ncpPartner: null,
+        model: model || DEFAULT_CLOUD_MODEL,
+        profile: DEFAULT_ROUTE_PROFILE,
+        credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
+        provider,
+        providerLabel: "NVIDIA Cloud API",
+      };
+    case "vllm-local":
+      return {
+        endpointType: "custom",
+        endpointUrl: INFERENCE_ROUTE_URL,
+        ncpPartner: null,
+        model: model || "vllm-local",
+        profile: DEFAULT_ROUTE_PROFILE,
+        credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
+        provider,
+        providerLabel: "Local vLLM",
+      };
+    case "ollama-local":
+      return {
+        endpointType: "custom",
+        endpointUrl: INFERENCE_ROUTE_URL,
+        ncpPartner: null,
+        model: model || DEFAULT_OLLAMA_MODEL,
+        profile: DEFAULT_ROUTE_PROFILE,
+        credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
+        provider,
+        providerLabel: "Local Ollama",
+      };
+    default:
+      return null;
+  }
+}
+
+function getOpenClawPrimaryModel(provider, model) {
+  const resolvedModel =
+    model || (provider === "ollama-local" ? DEFAULT_OLLAMA_MODEL : DEFAULT_CLOUD_MODEL);
+  return resolvedModel ? `${MANAGED_PROVIDER_ID}/${resolvedModel}` : null;
+}
+
+module.exports = {
+  CLOUD_MODEL_OPTIONS,
+  DEFAULT_CLOUD_MODEL,
+  DEFAULT_OLLAMA_MODEL,
+  DEFAULT_ROUTE_CREDENTIAL_ENV,
+  DEFAULT_ROUTE_PROFILE,
+  INFERENCE_ROUTE_URL,
+  MANAGED_PROVIDER_ID,
+  getOpenClawPrimaryModel,
+  getProviderSelectionConfig,
+};
diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js
@@ -0,0 +1,179 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const HOST_GATEWAY_URL = "http://host.openshell.internal";
+const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
+const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
+
+function getLocalProviderBaseUrl(provider) {
+  switch (provider) {
+    case "vllm-local":
+      return `${HOST_GATEWAY_URL}:8000/v1`;
+    case "ollama-local":
+      return `${HOST_GATEWAY_URL}:11434/v1`;
+    default:
+      return null;
+  }
+}
+
+function getLocalProviderHealthCheck(provider) {
+  switch (provider) {
+    case "vllm-local":
+      return "curl -sf http://localhost:8000/v1/models 2>/dev/null";
+    case "ollama-local":
+      return "curl -sf http://localhost:11434/api/tags 2>/dev/null";
+    default:
+      return null;
+  }
+}
+
+function getLocalProviderContainerReachabilityCheck(provider) {
+  switch (provider) {
+    case "vllm-local":
+      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`;
+    case "ollama-local":
+      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`;
+    default:
+      return null;
+  }
+}
+
+function validateLocalProvider(provider, runCapture) {
+  const command = getLocalProviderHealthCheck(provider);
+  if (!command) {
+    return { ok: true };
+  }
-  const command = getLocalProviderHealthCheck(provider);
-  if (!command) {
-    return { ok: true };
-  }
+  const command = getLocalProviderHealthCheck(provider);
+  if (!command) {
+    return {
+      ok: false,
+      message: `Unsupported local inference provider: ${provider}.`,
+    };
+  }
-  const command = getLocalProviderHealthCheck(provider);
-  if (!command) {
-    return { ok: true };
-  }
+  const command = getLocalProviderHealthCheck(provider);
+  if (!command) {
+    return {
+      ok: false,
+      message: `Unsupported local inference provider: ${provider}.`,
+    };
+  }
+
+  const output = runCapture(command, { ignoreError: true });
+  if (!output) {
+    switch (provider) {
+      case "vllm-local":
+        return {
+          ok: false,
+          message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.",
+        };
+      case "ollama-local":
+        return {
+          ok: false,
+          message: "Local Ollama was selected, but nothing is responding on http://localhost:11434.",
+        };
+      default:
+        return { ok: false, message: "The selected local inference provider is unavailable." };
+    }
+  }
+
+  const containerCommand = getLocalProviderContainerReachabilityCheck(provider);
+  if (!containerCommand) {
+    return { ok: true };
+  }
+
+  const containerOutput = runCapture(containerCommand, { ignoreError: true });
+  if (containerOutput) {
+    return { ok: true };
+  }
+
+  switch (provider) {
+    case "vllm-local":
+      return {
+        ok: false,
+        message:
+          "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.",
+      };
+    case "ollama-local":
+      return {
+        ok: false,
+        message:
+          "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.",
+      };
+    default:
+      return { ok: false, message: "The selected local inference provider is unavailable from containers." };
+  }
+}
+
+function parseOllamaList(output) {
+  return String(output || "")
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .filter((line) => !/^NAME\s+/i.test(line))
+    .map((line) => line.split(/\s{2,}/)[0])
+    .filter(Boolean);
+}
+
+function getOllamaModelOptions(runCapture) {
+  const output = runCapture("ollama list 2>/dev/null", { ignoreError: true });
+  const parsed = parseOllamaList(output);
+  if (parsed.length > 0) {
+    return parsed;
+  }
+  return [DEFAULT_OLLAMA_MODEL];
+}
+
+function getDefaultOllamaModel(runCapture) {
+  const models = getOllamaModelOptions(runCapture);
+  return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
+}
+
+function shellQuote(value) {
+  return `'${String(value).replace(/'/g, `'\\''`)}'`;
+}
+
+function getOllamaWarmupCommand(model, keepAlive = "15m") {
+  const payload = JSON.stringify({
+    model,
+    prompt: "hello",
+    stream: false,
+    keep_alive: keepAlive,
+  });
+  return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`;
+}
+
+function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") {
+  const payload = JSON.stringify({
+    model,
+    prompt: "hello",
+    stream: false,
+    keep_alive: keepAlive,
+  });
+  return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`;
+}
+
+function validateOllamaModel(model, runCapture) {
+  const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true });
+  if (!output) {
+    return {
+      ok: false,
+      message:
+        `Selected Ollama model '${model}' did not answer the local probe in time. ` +
+        "It may still be loading, too large for the host, or otherwise unhealthy.",
+    };
+  }
+
+  try {
+    const parsed = JSON.parse(output);
+    if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
+      return {
+        ok: false,
+        message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`,
+      };
+    }
+  } catch {}
+
+  return { ok: true };
+}
+
+module.exports = {
+  CONTAINER_REACHABILITY_IMAGE,
+  DEFAULT_OLLAMA_MODEL,
+  HOST_GATEWAY_URL,
+  getDefaultOllamaModel,
+  getLocalProviderBaseUrl,
+  getLocalProviderContainerReachabilityCheck,
+  getLocalProviderHealthCheck,
+  getOllamaModelOptions,
+  getOllamaProbeCommand,
+  getOllamaWarmupCommand,
+  parseOllamaList,
+  validateOllamaModel,
+  validateLocalProvider,
+};