NVIDIA · kagura-agent · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
@@ -55,8 +55,132 @@ async function promptOrDefault(question, envVar, defaultValue) {
   return prompt(question);
 }
 
+// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
+// See: https://github.com/NVIDIA/NemoClaw/issues/246
+const KNOWN_REASONING_MODEL_PATTERNS = [
+  /^nemotron.*nano/i,
+  /^deepseek-r1/i,
+  /^qwq/i,
+];
+
+/**
+ * Parse an Ollama model reference into its components.
+ * Handles fully-qualified refs like "ghcr.io/org/deepseek-r1:8b" as well as
+ * simple refs like "deepseek-r1:8b" or "deepseek-r1".
+ */
+function parseOllamaModelRef(modelRef) {
+  // Strip @digest if present
+  const ref = String(modelRef).split("@", 1)[0];
+  // Strip :tag suffix (only the last one, after the last /)
+  const tagMatch = ref.match(/:([^/]*)$/);
+  const tag = tagMatch ? tagMatch[1] : "";
+  const withoutTag = tagMatch ? ref.slice(0, tagMatch.index) : ref;
+  return {
+    withoutTag,
+    baseName: withoutTag.slice(withoutTag.lastIndexOf("/") + 1),
+    tag,
+  };
+}
+
+function isReasoningModel(modelName) {
+  if (typeof modelName !== "string" || modelName.length === 0) return false;
+  // Extract base model name — strips registry, namespace, and tag
+  // so "ghcr.io/org/deepseek-r1:8b" → baseName "deepseek-r1"
+  const { baseName } = parseOllamaModelRef(modelName);
+  // Exclude chat variants
+  if (/-chat$/i.test(baseName)) return false;
+  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(baseName));
+}
+
+function listOllamaModels() {
+  try {
+    const { execSync } = require("child_process");
+    const raw = execSync("curl -sf http://localhost:11434/api/tags", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 5000,
+    });
+    const data = JSON.parse(raw);
+    return (data.models || [])
+      .map((m) => m && m.name)
+      .filter((name) => typeof name === "string" && name.length > 0);
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Build a tag-safe chat variant name that preserves the Ollama tag to avoid
+ * collisions (e.g. deepseek-r1:8b → deepseek-r1-8b-chat, deepseek-r1:14b → deepseek-r1-14b-chat).
+ * Correctly handles registry refs with ports (e.g. registry.example.com:5000/model:v1).
+ *
+ * A colon is treated as a tag separator only when it appears after the last '/'.
+ * This prevents misinterpreting a port number as a tag in registry URLs.
+ */
+function buildChatVariantName(baseModel) {
+  const lastSlash = baseModel.lastIndexOf("/");
+  const colonIndex = baseModel.lastIndexOf(":");
+  // Treat as a tag only when the colon occurs after the last slash
+  // (i.e. "registry:5000/model" has no tag, "model:v1" does)
+  const hasTag = colonIndex > lastSlash;
+  const namePart = hasTag ? baseModel.slice(0, colonIndex) : baseModel;
+  const safeTag = hasTag
+    ? `-${baseModel.slice(colonIndex + 1).replace(/[^a-z0-9._-]/gi, "-")}`
+    : "";
+  return `${namePart}${safeTag}-chat`;
+}
+
+function createOllamaChatVariant(baseModel, variantName) {
+  const { execFileSync } = require("child_process");
+  const os = require("os");
+  // Use mkdtempSync for atomic temp directory creation — avoids TOCTOU races
+  // with predictable filenames on multi-user systems
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-modelfile-"));
+  const modelfilePath = path.join(tempDir, "Modelfile");
+  try {
+    fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, {
+      encoding: "utf-8",
+      flag: "wx", // exclusive creation — fails if file already exists
+    });
+    execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 120000,
+    });
+    return variantName;
+  } catch (err) {
+    console.log(`  ⚠ Could not create chat variant '${variantName}': ${err.message || err}`);
+    return null;
+  } finally {
+    try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ }
+  }
+}
+
 // ── Helpers ──────────────────────────────────────────────────────
 
+/**
+ * If the model is a known reasoning model, create a chat variant and return it.
+ * Otherwise return the original model unchanged.
+ */
+function handleReasoningModel(model) {
+  if (!isReasoningModel(model)) return model;
+  const variantName = buildChatVariantName(model);
+  // Reuse existing variant — makes the operation idempotent
+  const existingModels = listOllamaModels();
+  if (existingModels.includes(variantName)) {
+    console.log(`  ✓ Using existing chat variant: ${variantName}`);
+    return variantName;
+  }
+  console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
+  const chatVariant = createOllamaChatVariant(model, variantName);
+  if (chatVariant) {
+    console.log(`  ✓ Using chat variant: ${chatVariant}`);
+    return chatVariant;
+  }
+  console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
+  return model;
+}
+
 function step(n, total, msg) {
   console.log("");
   console.log(`  [${n}/${total}] ${msg}`);
@@ -591,13 +715,29 @@ async function setupNim(sandboxName, gpu) {
         run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true });
         sleep(2);
       }
-      console.log("  ✓ Using Ollama on localhost:11434");
+      // List available models and let the user pick
+      const ollamaModels = listOllamaModels();
+      if (ollamaModels.length > 0) {
+        console.log("");
+        console.log("  Available Ollama models:");
+        ollamaModels.forEach((m, i) => {
+          console.log(`    ${i + 1}) ${m}`);
+        });
+        console.log("");
+        const modelChoice = await prompt(`  Choose model [1]: `);
+        const midx = parseInt(modelChoice || "1", 10) - 1;
+        model = ollamaModels[midx] || ollamaModels[0];
+      } else {
+        model = "nemotron-3-nano";
+      }
       provider = "ollama-local";
       if (isNonInteractive()) {
         model = requestedModel || getDefaultOllamaModel(runCapture);
       } else {
         model = await promptOllamaModel();
       }
+      model = handleReasoningModel(model);
+      console.log(`  ✓ Using Ollama on localhost:11434 with model: ${model}`);
     } else if (selected.key === "install-ollama") {
       console.log("  Installing Ollama via Homebrew...");
       run("brew install ollama", { ignoreError: true });
@@ -611,6 +751,7 @@ async function setupNim(sandboxName, gpu) {
       } else {
         model = await promptOllamaModel();
       }
+      model = handleReasoningModel(model);
     } else if (selected.key === "vllm") {
       console.log("  ✓ Using existing vLLM on localhost:8000");
       provider = "vllm-local";

diff --git a/nemoclaw/src/commands/onboard.ts b/nemoclaw/src/commands/onboard.ts
@@ -2,6 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { execFileSync, execSync } from "node:child_process";
+import { writeFileSync, unlinkSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import type { PluginLogger, NemoClawConfig } from "../index.js";
 import {
   describeOnboardEndpoint,
@@ -14,6 +17,55 @@ import {
 import { promptInput, promptConfirm, promptSelect } from "../onboard/prompt.js";
 import { validateApiKey, maskApiKey } from "../onboard/validate.js";
 
+// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
+// See: https://github.com/NVIDIA/NemoClaw/issues/246
+const KNOWN_REASONING_MODEL_PATTERNS: RegExp[] = [
+  /^nemotron.*nano/i,
+  /^deepseek-r1/i,
+  /^qwq/i,
+];
+
+function isReasoningModel(modelName: string): boolean {
+  // Exclude chat variants (e.g. nemotron-3-nano-chat) — they don't use reasoning mode
+  if (/-chat$/i.test(modelName)) return false;
+  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
+}
+
+function listOllamaModels(): string[] {
+  try {
+    const raw = execSync("curl -sf http://localhost:11434/api/tags", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 5000,
+    });
+    const data = JSON.parse(raw) as { models?: Array<{ name: string }> };
+    return (data.models ?? []).map((m) => m.name);
+  } catch {
+    return [];
+  }
+}
+
+function createOllamaChatVariant(baseModel: string, logger: PluginLogger): string | null {
+  const variantName = baseModel.replace(/:.*$/, "") + "-chat";
+  const modelfilePath = join(tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
+  try {
+    writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
+    execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 120_000,
+    });
+    return variantName;
+  } catch (err) {
+    logger.warn(
+      `Could not create chat variant '${variantName}': ${err instanceof Error ? err.message : String(err)}`,
+    );
+    return null;
+  } finally {
+    try { unlinkSync(modelfilePath); } catch { /* ignore */ }
+  }
+}
+
 export interface OnboardOptions {
   apiKey?: string;
   endpoint?: string;
@@ -366,6 +418,17 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
   let model: string;
   if (opts.model) {
     model = opts.model;
+  } else if (endpointType === "ollama") {
+    // For Ollama, list locally available models first
+    const ollamaModels = listOllamaModels();
+    if (ollamaModels.length > 0) {
+      logger.info(`Found ${String(ollamaModels.length)} model(s) in Ollama:`);
+      const modelOptions = ollamaModels.map((id) => ({ label: id, value: id }));
+      model = await promptSelect("Select your primary model:", modelOptions);
+    } else {
+      logger.info("No models found in Ollama. Enter a model name manually.");
+      model = await promptInput("Model name (e.g., nemotron-3-nano)");
+    }
   } else {
     const discoveredModelOptions =
       endpointType === "ollama"
@@ -395,6 +458,24 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
     model = await promptSelect("Select your primary model:", modelOptions, defaultIndex);
   }
 
+  // For Ollama reasoning models, create a chat variant to avoid blank responses.
+  // Reasoning models (e.g. deepseek-r1, qwq) output to the `reasoning` field
+  // instead of `content`, which causes empty responses in chat mode.
+  // Creating a "-chat" variant forces the model into standard chat mode.
+  if (endpointType === "ollama" && isReasoningModel(model)) {
+    logger.warn(
+      `Model '${model}' is a reasoning model that may return blank responses in chat mode.`,
+    );
+    logger.info("Creating a chat variant to ensure proper output...");
+    const chatVariant = createOllamaChatVariant(model, logger);
+    if (chatVariant) {
+      logger.info(`Using chat variant: ${chatVariant}`);
+      model = chatVariant;
+    } else {
+      logger.warn("Could not create chat variant. The model may return empty responses.");
+    }
+  }
+
   // Step 6: Resolve profile
   const profile = resolveProfile(endpointType);
   const providerName = resolveProviderName(endpointType);