From 7a2f9dfe2c91eb7496c8275fa5c4a7a120080c64 Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 13:57:24 +0800
Subject: [PATCH 1/7] fix: detect Ollama reasoning models and create chat
 variants (#246)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reasoning models (deepseek-r1, qwq, nemotron-*-nano) output to the
`reasoning` field instead of `content`, causing empty responses in
chat mode. During onboard, we now:

1. Detect known reasoning model patterns via isReasoningModel()
2. Exclude models already suffixed with -chat
3. Auto-create a "-chat" Ollama variant using a tmpfile + execFileSync
   (no shell interpolation — avoids injection)
4. Switch to the chat variant for inference

Also adds listOllamaModels() to show available models during Ollama
endpoint selection in both the TypeScript CLI (onboard.ts) and the
legacy JavaScript wizard (onboard.js).

Closes: https://github.com/NVIDIA/NemoClaw/issues/246
---
 bin/lib/onboard.js               | 89 +++++++++++++++++++++++++++++++-
 nemoclaw/src/commands/onboard.ts | 81 +++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 23f19b01a8..4f478c5530 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -55,6 +55,56 @@ async function promptOrDefault(question, envVar, defaultValue) {
   return prompt(question);
 }
 
+// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
+// See: https://github.com/NVIDIA/NemoClaw/issues/246
+const KNOWN_REASONING_MODEL_PATTERNS = [
+  /^nemotron.*nano/i,
+  /^deepseek-r1/i,
+  /^qwq/i,
+];
+
+function isReasoningModel(modelName) {
+  // Exclude chat variants (e.g. nemotron-3-nano-chat) — they don't use reasoning mode
+  if (/-chat$/i.test(modelName)) return false;
+  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
+}
+
+function listOllamaModels() {
+  try {
+    const { execSync } = require("child_process");
+    const raw = execSync("curl -sf http://localhost:11434/api/tags", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 5000,
+    });
+    const data = JSON.parse(raw);
+    return (data.models || []).map((m) => m.name);
+  } catch {
+    return [];
+  }
+}
+
+function createOllamaChatVariant(baseModel) {
+  const { execFileSync } = require("child_process");
+  const os = require("os");
+  const variantName = baseModel.replace(/:.*$/, "") + "-chat";
+  const modelfilePath = path.join(os.tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
+  try {
+    fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
+    execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 120000,
+    });
+    return variantName;
+  } catch (err) {
+    console.log(`  ⚠ Could not create chat variant '${variantName}': ${err.message || err}`);
+    return null;
+  } finally {
+    try { fs.unlinkSync(modelfilePath); } catch { /* ignore */ }
+  }
+}
+
 // ── Helpers ──────────────────────────────────────────────────────
 
 function step(n, total, msg) {
@@ -591,13 +641,39 @@ async function setupNim(sandboxName, gpu) {
         run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true });
         sleep(2);
       }
-      console.log("  ✓ Using Ollama on localhost:11434");
+      // List available models and let the user pick
+      const ollamaModels = listOllamaModels();
+      if (ollamaModels.length > 0) {
+        console.log("");
+        console.log("  Available Ollama models:");
+        ollamaModels.forEach((m, i) => {
+          console.log(`    ${i + 1}) ${m}`);
+        });
+        console.log("");
+        const modelChoice = await prompt(`  Choose model [1]: `);
+        const midx = parseInt(modelChoice || "1", 10) - 1;
+        model = ollamaModels[midx] || ollamaModels[0];
+      } else {
+        model = "nemotron-3-nano";
+      }
+      console.log(`  ✓ Using Ollama on localhost:11434 with model: ${model}`);
       provider = "ollama-local";
       if (isNonInteractive()) {
         model = requestedModel || getDefaultOllamaModel(runCapture);
       } else {
         model = await promptOllamaModel();
       }
+      // If the model is a reasoning model, create a chat variant to avoid blank responses
+      if (isReasoningModel(model)) {
+        console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
+        const chatVariant = createOllamaChatVariant(model);
+        if (chatVariant) {
+          console.log(`  ✓ Using chat variant: ${chatVariant}`);
+          model = chatVariant;
+        } else {
+          console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
+        }
+      }
     } else if (selected.key === "install-ollama") {
       console.log("  Installing Ollama via Homebrew...");
       run("brew install ollama", { ignoreError: true });
@@ -611,6 +687,17 @@ async function setupNim(sandboxName, gpu) {
       } else {
         model = await promptOllamaModel();
       }
+      // If the model is a reasoning model, create a chat variant to avoid blank responses
+      if (isReasoningModel(model)) {
+        console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
+        const chatVariant = createOllamaChatVariant(model);
+        if (chatVariant) {
+          console.log(`  ✓ Using chat variant: ${chatVariant}`);
+          model = chatVariant;
+        } else {
+          console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
+        }
+      }
     } else if (selected.key === "vllm") {
       console.log("  ✓ Using existing vLLM on localhost:8000");
       provider = "vllm-local";
diff --git a/nemoclaw/src/commands/onboard.ts b/nemoclaw/src/commands/onboard.ts
index 72fb9fcdd4..731d8eea53 100644
--- a/nemoclaw/src/commands/onboard.ts
+++ b/nemoclaw/src/commands/onboard.ts
@@ -2,6 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { execFileSync, execSync } from "node:child_process";
+import { writeFileSync, unlinkSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import type { PluginLogger, NemoClawConfig } from "../index.js";
 import {
   describeOnboardEndpoint,
@@ -14,6 +17,55 @@ import {
 import { promptInput, promptConfirm, promptSelect } from "../onboard/prompt.js";
 import { validateApiKey, maskApiKey } from "../onboard/validate.js";
 
+// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
+// See: https://github.com/NVIDIA/NemoClaw/issues/246
+const KNOWN_REASONING_MODEL_PATTERNS: RegExp[] = [
+  /^nemotron.*nano/i,
+  /^deepseek-r1/i,
+  /^qwq/i,
+];
+
+function isReasoningModel(modelName: string): boolean {
+  // Exclude chat variants (e.g. nemotron-3-nano-chat) — they don't use reasoning mode
+  if (/-chat$/i.test(modelName)) return false;
+  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
+}
+
+function listOllamaModels(): string[] {
+  try {
+    const raw = execSync("curl -sf http://localhost:11434/api/tags", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 5000,
+    });
+    const data = JSON.parse(raw) as { models?: Array<{ name: string }> };
+    return (data.models ?? []).map((m) => m.name);
+  } catch {
+    return [];
+  }
+}
+
+function createOllamaChatVariant(baseModel: string, logger: PluginLogger): string | null {
+  const variantName = baseModel.replace(/:.*$/, "") + "-chat";
+  const modelfilePath = join(tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
+  try {
+    writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
+    execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: 120_000,
+    });
+    return variantName;
+  } catch (err) {
+    logger.warn(
+      `Could not create chat variant '${variantName}': ${err instanceof Error ? err.message : String(err)}`,
+    );
+    return null;
+  } finally {
+    try { unlinkSync(modelfilePath); } catch { /* ignore */ }
+  }
+}
+
 export interface OnboardOptions {
   apiKey?: string;
   endpoint?: string;
@@ -366,6 +418,17 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
   let model: string;
   if (opts.model) {
     model = opts.model;
+  } else if (endpointType === "ollama") {
+    // For Ollama, list locally available models first
+    const ollamaModels = listOllamaModels();
+    if (ollamaModels.length > 0) {
+      logger.info(`Found ${String(ollamaModels.length)} model(s) in Ollama:`);
+      const modelOptions = ollamaModels.map((id) => ({ label: id, value: id }));
+      model = await promptSelect("Select your primary model:", modelOptions);
+    } else {
+      logger.info("No models found in Ollama. Enter a model name manually.");
+      model = await promptInput("Model name (e.g., nemotron-3-nano)");
+    }
   } else {
     const discoveredModelOptions =
       endpointType === "ollama"
@@ -395,6 +458,24 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
     model = await promptSelect("Select your primary model:", modelOptions, defaultIndex);
   }
 
+  // For Ollama reasoning models, create a chat variant to avoid blank responses.
+  // Reasoning models (e.g. deepseek-r1, qwq) output to the `reasoning` field
+  // instead of `content`, which causes empty responses in chat mode.
+  // Creating a "-chat" variant forces the model into standard chat mode.
+  if (endpointType === "ollama" && isReasoningModel(model)) {
+    logger.warn(
+      `Model '${model}' is a reasoning model that may return blank responses in chat mode.`,
+    );
+    logger.info("Creating a chat variant to ensure proper output...");
+    const chatVariant = createOllamaChatVariant(model, logger);
+    if (chatVariant) {
+      logger.info(`Using chat variant: ${chatVariant}`);
+      model = chatVariant;
+    } else {
+      logger.warn("Could not create chat variant. The model may return empty responses.");
+    }
+  }
+
   // Step 6: Resolve profile
   const profile = resolveProfile(endpointType);
   const providerName = resolveProviderName(endpointType);

From fe8a0f10cfb539aec25ee2260c62e019e1dfd6a0 Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 14:03:02 +0800
Subject: [PATCH 2/7] refactor: extract handleReasoningModel helper to reduce
 duplication

Address CodeRabbit review: the reasoning model detection + chat variant
creation logic was duplicated across three Ollama code paths. Extracted
into handleReasoningModel() which returns the chat variant or original
model unchanged.
---
 bin/lib/onboard.js | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 4f478c5530..a49344db60 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -107,6 +107,22 @@ function createOllamaChatVariant(baseModel) {
 
 // ── Helpers ──────────────────────────────────────────────────────
 
+/**
+ * If the model is a known reasoning model, create a chat variant and return it.
+ * Otherwise return the original model unchanged.
+ */
+function handleReasoningModel(model) {
+  if (!isReasoningModel(model)) return model;
+  console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
+  const chatVariant = createOllamaChatVariant(model);
+  if (chatVariant) {
+    console.log(`  ✓ Using chat variant: ${chatVariant}`);
+    return chatVariant;
+  }
+  console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
+  return model;
+}
+
 function step(n, total, msg) {
   console.log("");
   console.log(`  [${n}/${total}] ${msg}`);
@@ -663,17 +679,7 @@ async function setupNim(sandboxName, gpu) {
       } else {
         model = await promptOllamaModel();
       }
-      // If the model is a reasoning model, create a chat variant to avoid blank responses
-      if (isReasoningModel(model)) {
-        console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
-        const chatVariant = createOllamaChatVariant(model);
-        if (chatVariant) {
-          console.log(`  ✓ Using chat variant: ${chatVariant}`);
-          model = chatVariant;
-        } else {
-          console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
-        }
-      }
+      model = handleReasoningModel(model);
     } else if (selected.key === "install-ollama") {
       console.log("  Installing Ollama via Homebrew...");
       run("brew install ollama", { ignoreError: true });
@@ -687,17 +693,7 @@ async function setupNim(sandboxName, gpu) {
       } else {
         model = await promptOllamaModel();
       }
-      // If the model is a reasoning model, create a chat variant to avoid blank responses
-      if (isReasoningModel(model)) {
-        console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
-        const chatVariant = createOllamaChatVariant(model);
-        if (chatVariant) {
-          console.log(`  ✓ Using chat variant: ${chatVariant}`);
-          model = chatVariant;
-        } else {
-          console.log("  ⚠ Could not create chat variant. Model may return empty responses.");
-        }
-      }
+      model = handleReasoningModel(model);
     } else if (selected.key === "vllm") {
       console.log("  ✓ Using existing vLLM on localhost:8000");
       provider = "vllm-local";

From 24b0fdb1565bdae22ec7e7ba957d94c894ca79f3 Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 15:02:52 +0800
Subject: [PATCH 3/7] fix: handle tagged chat variants in reasoning detection
 and filter invalid Ollama entries

- isReasoningModel: strip :tag suffix before checking -chat, so models
  like 'deepseek-r1-chat:8b' are correctly excluded
- listOllamaModels: filter out undefined/empty entries from Ollama API
  response to prevent downstream breakage

Addresses review feedback from CodeRabbit on PR #291.
---
 bin/lib/onboard.js | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index a49344db60..14ed55f795 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -64,8 +64,11 @@ const KNOWN_REASONING_MODEL_PATTERNS = [
 ];
 
 function isReasoningModel(modelName) {
-  // Exclude chat variants (e.g. nemotron-3-nano-chat) — they don't use reasoning mode
-  if (/-chat$/i.test(modelName)) return false;
+  if (typeof modelName !== "string" || modelName.length === 0) return false;
+  // Exclude chat variants — strip optional :tag suffix before checking
+  // Handles both "model-chat" and tagged forms like "deepseek-r1-chat:8b"
+  const baseName = modelName.replace(/:.*$/, "");
+  if (/-chat$/i.test(baseName)) return false;
   return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
 }
 
@@ -78,7 +81,9 @@ function listOllamaModels() {
       timeout: 5000,
     });
     const data = JSON.parse(raw);
-    return (data.models || []).map((m) => m.name);
+    return (data.models || [])
+      .map((m) => m && m.name)
+      .filter((name) => typeof name === "string" && name.length > 0);
   } catch {
     return [];
   }

From 077b2a19c788c406d86afb8f94ce7b1426cad95e Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 15:33:22 +0800
Subject: [PATCH 4/7] refactor(onboard): make chat variant creation idempotent
 and tag-safe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add buildChatVariantName() that preserves Ollama tag in variant name
  (e.g. deepseek-r1:8b → deepseek-r1-8b-chat) to prevent collisions
  between different-sized models
- Check for existing variant via listOllamaModels() before creating,
  making re-runs idempotent
- Move model log message after handleReasoningModel() so it reflects
  the actual model used at runtime

Addresses CodeRabbit review feedback.
---
 bin/lib/onboard.js | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 14ed55f795..bc189049d0 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -89,10 +89,19 @@ function listOllamaModels() {
   }
 }
 
-function createOllamaChatVariant(baseModel) {
+/**
+ * Build a tag-safe chat variant name that preserves the Ollama tag to avoid
+ * collisions (e.g. deepseek-r1:8b → deepseek-r1-8b-chat, deepseek-r1:14b → deepseek-r1-14b-chat).
+ */
+function buildChatVariantName(baseModel) {
+  const [name, tag] = String(baseModel).split(":", 2);
+  const safeTag = tag ? `-${tag.replace(/[^a-z0-9._-]/gi, "-")}` : "";
+  return `${name}${safeTag}-chat`;
+}
+
+function createOllamaChatVariant(baseModel, variantName) {
   const { execFileSync } = require("child_process");
   const os = require("os");
-  const variantName = baseModel.replace(/:.*$/, "") + "-chat";
   const modelfilePath = path.join(os.tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
   try {
     fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
@@ -118,8 +127,15 @@ function createOllamaChatVariant(baseModel) {
  */
 function handleReasoningModel(model) {
   if (!isReasoningModel(model)) return model;
+  const variantName = buildChatVariantName(model);
+  // Reuse existing variant — makes the operation idempotent
+  const existingModels = listOllamaModels();
+  if (existingModels.includes(variantName)) {
+    console.log(`  ✓ Using existing chat variant: ${variantName}`);
+    return variantName;
+  }
   console.log(`  ⚠ '${model}' is a reasoning model — creating chat variant...`);
-  const chatVariant = createOllamaChatVariant(model);
+  const chatVariant = createOllamaChatVariant(model, variantName);
   if (chatVariant) {
     console.log(`  ✓ Using chat variant: ${chatVariant}`);
     return chatVariant;
@@ -677,7 +693,6 @@ async function setupNim(sandboxName, gpu) {
       } else {
         model = "nemotron-3-nano";
       }
-      console.log(`  ✓ Using Ollama on localhost:11434 with model: ${model}`);
       provider = "ollama-local";
       if (isNonInteractive()) {
         model = requestedModel || getDefaultOllamaModel(runCapture);
@@ -685,6 +700,7 @@ async function setupNim(sandboxName, gpu) {
         model = await promptOllamaModel();
       }
       model = handleReasoningModel(model);
+      console.log(`  ✓ Using Ollama on localhost:11434 with model: ${model}`);
     } else if (selected.key === "install-ollama") {
       console.log("  Installing Ollama via Homebrew...");
       run("brew install ollama", { ignoreError: true });

From 060a593ff29d081f90687026f3ccc2d31fd86fed Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 16:03:23 +0800
Subject: [PATCH 5/7] fix(onboard): parse registry-prefixed model refs + atomic
 Modelfile creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add parseOllamaModelRef() to extract base model name from fully-qualified
  Ollama refs (e.g. ghcr.io/org/deepseek-r1:8b → deepseek-r1). Fixes
  isReasoningModel() failing on namespaced/registry model references where
  ^-anchored patterns couldn't match.

- Switch createOllamaChatVariant() to use fs.mkdtempSync() + exclusive file
  creation (flag: 'wx') instead of predictable Date.now()-based filename.
  Prevents TOCTOU race conditions on multi-user systems.

Addresses CodeRabbit review feedback on PR #291.
---
 bin/lib/onboard.js | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index bc189049d0..9e7c7912ba 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -63,13 +63,30 @@ const KNOWN_REASONING_MODEL_PATTERNS = [
   /^qwq/i,
 ];
 
+/**
+ * Parse an Ollama model reference into its components.
+ * Handles fully-qualified refs like "ghcr.io/org/deepseek-r1:8b" as well as
+ * simple refs like "deepseek-r1:8b" or "deepseek-r1".
+ */
+function parseOllamaModelRef(modelRef) {
+  // Strip @digest if present
+  const ref = String(modelRef).split("@", 1)[0];
+  // Strip :tag suffix (only the last one, after the last /)
+  const withoutTag = ref.replace(/:(?=[^/]*$).*/, "");
+  return {
+    withoutTag,
+    baseName: withoutTag.slice(withoutTag.lastIndexOf("/") + 1),
+  };
+}
+
 function isReasoningModel(modelName) {
   if (typeof modelName !== "string" || modelName.length === 0) return false;
-  // Exclude chat variants — strip optional :tag suffix before checking
-  // Handles both "model-chat" and tagged forms like "deepseek-r1-chat:8b"
-  const baseName = modelName.replace(/:.*$/, "");
+  // Extract base model name — strips registry, namespace, and tag
+  // so "ghcr.io/org/deepseek-r1:8b" → baseName "deepseek-r1"
+  const { baseName } = parseOllamaModelRef(modelName);
+  // Exclude chat variants
   if (/-chat$/i.test(baseName)) return false;
-  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
+  return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(baseName));
 }
 
 function listOllamaModels() {
@@ -102,9 +119,15 @@ function buildChatVariantName(baseModel) {
 function createOllamaChatVariant(baseModel, variantName) {
   const { execFileSync } = require("child_process");
   const os = require("os");
-  const modelfilePath = path.join(os.tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
+  // Use mkdtempSync for atomic temp directory creation — avoids TOCTOU races
+  // with predictable filenames on multi-user systems
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-modelfile-"));
+  const modelfilePath = path.join(tempDir, "Modelfile");
   try {
-    fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
+    fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, {
+      encoding: "utf-8",
+      flag: "wx", // exclusive creation — fails if file already exists
+    });
     execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
       encoding: "utf-8",
       stdio: ["pipe", "pipe", "pipe"],
@@ -115,7 +138,7 @@ function createOllamaChatVariant(baseModel, variantName) {
     console.log(`  ⚠ Could not create chat variant '${variantName}': ${err.message || err}`);
     return null;
   } finally {
-    try { fs.unlinkSync(modelfilePath); } catch { /* ignore */ }
+    try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ }
   }
 }
 

From cc21e44a3c924716629b32e48836f2f37f08854c Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 16:32:51 +0800
Subject: [PATCH 6/7] fix(onboard): use parseOllamaModelRef in
 buildChatVariantName

buildChatVariantName previously used split(':', 2) which misparses
registry refs with ports (e.g. 'localhost:5000/ns/model:tag' would
produce 'localhost-5000/ns/model' instead of the correct variant name).

Now uses parseOllamaModelRef() for consistent tag/name extraction,
matching Ollama's convention of using the last ':' after the last '/'
as the tag separator.

Also extends parseOllamaModelRef to return the 'tag' component.

Addresses CodeRabbit review feedback on PR #291.
---
 bin/lib/onboard.js | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 9e7c7912ba..f535cdf0be 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -72,10 +72,13 @@ function parseOllamaModelRef(modelRef) {
   // Strip @digest if present
   const ref = String(modelRef).split("@", 1)[0];
   // Strip :tag suffix (only the last one, after the last /)
-  const withoutTag = ref.replace(/:(?=[^/]*$).*/, "");
+  const tagMatch = ref.match(/:([^/]*)$/);
+  const tag = tagMatch ? tagMatch[1] : "";
+  const withoutTag = tagMatch ? ref.slice(0, tagMatch.index) : ref;
   return {
     withoutTag,
     baseName: withoutTag.slice(withoutTag.lastIndexOf("/") + 1),
+    tag,
   };
 }
 
@@ -109,11 +112,12 @@ function listOllamaModels() {
 /**
  * Build a tag-safe chat variant name that preserves the Ollama tag to avoid
  * collisions (e.g. deepseek-r1:8b → deepseek-r1-8b-chat, deepseek-r1:14b → deepseek-r1-14b-chat).
+ * Correctly handles registry refs with ports (e.g. localhost:5000/ns/model:tag).
  */
 function buildChatVariantName(baseModel) {
-  const [name, tag] = String(baseModel).split(":", 2);
+  const { withoutTag, tag } = parseOllamaModelRef(baseModel);
   const safeTag = tag ? `-${tag.replace(/[^a-z0-9._-]/gi, "-")}` : "";
-  return `${name}${safeTag}-chat`;
+  return `${withoutTag}${safeTag}-chat`;
 }
 
 function createOllamaChatVariant(baseModel, variantName) {

From 83fb806a3869619dc7389072f5bba3c2886d9e27 Mon Sep 17 00:00:00 2001
From: Kagura Chen <daniyuu19@sjtu.edu.cn>
Date: Thu, 19 Mar 2026 13:13:19 +0800
Subject: [PATCH 7/7] fix: handle registry port in buildChatVariantName parsing

Replace parseOllamaModelRef() call with explicit lastIndexOf logic to
detect tags. A colon is treated as a tag separator only when it appears
after the last slash (lastIndexOf(':') > lastIndexOf('/')), preventing
misinterpretation of port numbers in registry URLs like
registry.example.com:5000/model:v1.

Addresses CodeRabbit review feedback on PR #291.
---
 bin/lib/onboard.js | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index f535cdf0be..6b5536cea3 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -112,12 +112,22 @@ function listOllamaModels() {
 /**
  * Build a tag-safe chat variant name that preserves the Ollama tag to avoid
  * collisions (e.g. deepseek-r1:8b → deepseek-r1-8b-chat, deepseek-r1:14b → deepseek-r1-14b-chat).
- * Correctly handles registry refs with ports (e.g. localhost:5000/ns/model:tag).
+ * Correctly handles registry refs with ports (e.g. registry.example.com:5000/model:v1).
+ *
+ * A colon is treated as a tag separator only when it appears after the last '/'.
+ * This prevents misinterpreting a port number as a tag in registry URLs.
  */
 function buildChatVariantName(baseModel) {
-  const { withoutTag, tag } = parseOllamaModelRef(baseModel);
-  const safeTag = tag ? `-${tag.replace(/[^a-z0-9._-]/gi, "-")}` : "";
-  return `${withoutTag}${safeTag}-chat`;
+  const lastSlash = baseModel.lastIndexOf("/");
+  const colonIndex = baseModel.lastIndexOf(":");
+  // Treat as a tag only when the colon occurs after the last slash
+  // (i.e. "registry:5000/model" has no tag, "model:v1" does)
+  const hasTag = colonIndex > lastSlash;
+  const namePart = hasTag ? baseModel.slice(0, colonIndex) : baseModel;
+  const safeTag = hasTag
+    ? `-${baseModel.slice(colonIndex + 1).replace(/[^a-z0-9._-]/gi, "-")}`
+    : "";
+  return `${namePart}${safeTag}-chat`;
 }
 
 function createOllamaChatVariant(baseModel, variantName) {