Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 142 additions & 1 deletion bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,132 @@ async function promptOrDefault(question, envVar, defaultValue) {
return prompt(question);
}

// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
// See: https://github.com/NVIDIA/NemoClaw/issues/246
const KNOWN_REASONING_MODEL_PATTERNS = [
/^nemotron.*nano/i,
/^deepseek-r1/i,
/^qwq/i,
];

/**
* Parse an Ollama model reference into its components.
* Handles fully-qualified refs like "ghcr.io/org/deepseek-r1:8b" as well as
* simple refs like "deepseek-r1:8b" or "deepseek-r1".
*/
function parseOllamaModelRef(modelRef) {
// Strip @digest if present
const ref = String(modelRef).split("@", 1)[0];
// Strip :tag suffix (only the last one, after the last /)
const tagMatch = ref.match(/:([^/]*)$/);
const tag = tagMatch ? tagMatch[1] : "";
const withoutTag = tagMatch ? ref.slice(0, tagMatch.index) : ref;
return {
withoutTag,
baseName: withoutTag.slice(withoutTag.lastIndexOf("/") + 1),
tag,
};
}

function isReasoningModel(modelName) {
if (typeof modelName !== "string" || modelName.length === 0) return false;
// Extract base model name — strips registry, namespace, and tag
// so "ghcr.io/org/deepseek-r1:8b" → baseName "deepseek-r1"
const { baseName } = parseOllamaModelRef(modelName);
// Exclude chat variants
if (/-chat$/i.test(baseName)) return false;
return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(baseName));
}

function listOllamaModels() {
try {
const { execSync } = require("child_process");
const raw = execSync("curl -sf http://localhost:11434/api/tags", {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
timeout: 5000,
});
const data = JSON.parse(raw);
return (data.models || [])
.map((m) => m && m.name)
.filter((name) => typeof name === "string" && name.length > 0);
} catch {
return [];
}
}

/**
* Build a tag-safe chat variant name that preserves the Ollama tag to avoid
* collisions (e.g. deepseek-r1:8b → deepseek-r1-8b-chat, deepseek-r1:14b → deepseek-r1-14b-chat).
* Correctly handles registry refs with ports (e.g. registry.example.com:5000/model:v1).
*
* A colon is treated as a tag separator only when it appears after the last '/'.
* This prevents misinterpreting a port number as a tag in registry URLs.
*/
function buildChatVariantName(baseModel) {
const lastSlash = baseModel.lastIndexOf("/");
const colonIndex = baseModel.lastIndexOf(":");
// Treat as a tag only when the colon occurs after the last slash
// (i.e. "registry:5000/model" has no tag, "model:v1" does)
const hasTag = colonIndex > lastSlash;
const namePart = hasTag ? baseModel.slice(0, colonIndex) : baseModel;
const safeTag = hasTag
? `-${baseModel.slice(colonIndex + 1).replace(/[^a-z0-9._-]/gi, "-")}`
: "";
return `${namePart}${safeTag}-chat`;
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

function createOllamaChatVariant(baseModel, variantName) {
const { execFileSync } = require("child_process");
const os = require("os");
// Use mkdtempSync for atomic temp directory creation — avoids TOCTOU races
// with predictable filenames on multi-user systems
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-modelfile-"));
const modelfilePath = path.join(tempDir, "Modelfile");
try {
fs.writeFileSync(modelfilePath, `FROM ${baseModel}\n`, {
encoding: "utf-8",
flag: "wx", // exclusive creation — fails if file already exists
});
execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
timeout: 120000,
});
return variantName;
} catch (err) {
console.log(` ⚠ Could not create chat variant '${variantName}': ${err.message || err}`);
return null;
} finally {
try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}

// ── Helpers ──────────────────────────────────────────────────────

/**
* If the model is a known reasoning model, create a chat variant and return it.
* Otherwise return the original model unchanged.
*/
function handleReasoningModel(model) {
if (!isReasoningModel(model)) return model;
const variantName = buildChatVariantName(model);
// Reuse existing variant — makes the operation idempotent
const existingModels = listOllamaModels();
if (existingModels.includes(variantName)) {
console.log(` ✓ Using existing chat variant: ${variantName}`);
return variantName;
}
console.log(` ⚠ '${model}' is a reasoning model — creating chat variant...`);
const chatVariant = createOllamaChatVariant(model, variantName);
if (chatVariant) {
console.log(` ✓ Using chat variant: ${chatVariant}`);
return chatVariant;
}
console.log(" ⚠ Could not create chat variant. Model may return empty responses.");
return model;
}

function step(n, total, msg) {
console.log("");
console.log(` [${n}/${total}] ${msg}`);
Expand Down Expand Up @@ -591,13 +715,29 @@ async function setupNim(sandboxName, gpu) {
run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true });
sleep(2);
}
console.log(" ✓ Using Ollama on localhost:11434");
// List available models and let the user pick
const ollamaModels = listOllamaModels();
if (ollamaModels.length > 0) {
console.log("");
console.log(" Available Ollama models:");
ollamaModels.forEach((m, i) => {
console.log(` ${i + 1}) ${m}`);
});
console.log("");
const modelChoice = await prompt(` Choose model [1]: `);
const midx = parseInt(modelChoice || "1", 10) - 1;
model = ollamaModels[midx] || ollamaModels[0];
} else {
model = "nemotron-3-nano";
}
provider = "ollama-local";
if (isNonInteractive()) {
model = requestedModel || getDefaultOllamaModel(runCapture);
} else {
model = await promptOllamaModel();
}
model = handleReasoningModel(model);
console.log(` ✓ Using Ollama on localhost:11434 with model: ${model}`);
} else if (selected.key === "install-ollama") {
console.log(" Installing Ollama via Homebrew...");
run("brew install ollama", { ignoreError: true });
Expand All @@ -611,6 +751,7 @@ async function setupNim(sandboxName, gpu) {
} else {
model = await promptOllamaModel();
}
model = handleReasoningModel(model);
} else if (selected.key === "vllm") {
console.log(" ✓ Using existing vLLM on localhost:8000");
provider = "vllm-local";
Expand Down
81 changes: 81 additions & 0 deletions nemoclaw/src/commands/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// SPDX-License-Identifier: Apache-2.0

import { execFileSync, execSync } from "node:child_process";
import { writeFileSync, unlinkSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import type { PluginLogger, NemoClawConfig } from "../index.js";
import {
describeOnboardEndpoint,
Expand All @@ -14,6 +17,55 @@ import {
import { promptInput, promptConfirm, promptSelect } from "../onboard/prompt.js";
import { validateApiKey, maskApiKey } from "../onboard/validate.js";

// Known Ollama reasoning models that output to `reasoning` field instead of `content`.
// See: https://github.com/NVIDIA/NemoClaw/issues/246
const KNOWN_REASONING_MODEL_PATTERNS: RegExp[] = [
/^nemotron.*nano/i,
/^deepseek-r1/i,
/^qwq/i,
];

function isReasoningModel(modelName: string): boolean {
// Exclude chat variants (e.g. nemotron-3-nano-chat) — they don't use reasoning mode
if (/-chat$/i.test(modelName)) return false;
return KNOWN_REASONING_MODEL_PATTERNS.some((p) => p.test(modelName));
}

function listOllamaModels(): string[] {
try {
const raw = execSync("curl -sf http://localhost:11434/api/tags", {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
timeout: 5000,
});
const data = JSON.parse(raw) as { models?: Array<{ name: string }> };
return (data.models ?? []).map((m) => m.name);
} catch {
return [];
}
}

function createOllamaChatVariant(baseModel: string, logger: PluginLogger): string | null {
const variantName = baseModel.replace(/:.*$/, "") + "-chat";
const modelfilePath = join(tmpdir(), `nemoclaw-modelfile-${Date.now()}`);
try {
writeFileSync(modelfilePath, `FROM ${baseModel}\n`, "utf-8");
execFileSync("ollama", ["create", variantName, "-f", modelfilePath], {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
timeout: 120_000,
});
return variantName;
} catch (err) {
logger.warn(
`Could not create chat variant '${variantName}': ${err instanceof Error ? err.message : String(err)}`,
);
return null;
} finally {
try { unlinkSync(modelfilePath); } catch { /* ignore */ }
}
}

export interface OnboardOptions {
apiKey?: string;
endpoint?: string;
Expand Down Expand Up @@ -366,6 +418,17 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
let model: string;
if (opts.model) {
model = opts.model;
} else if (endpointType === "ollama") {
// For Ollama, list locally available models first
const ollamaModels = listOllamaModels();
if (ollamaModels.length > 0) {
logger.info(`Found ${String(ollamaModels.length)} model(s) in Ollama:`);
const modelOptions = ollamaModels.map((id) => ({ label: id, value: id }));
model = await promptSelect("Select your primary model:", modelOptions);
} else {
logger.info("No models found in Ollama. Enter a model name manually.");
model = await promptInput("Model name (e.g., nemotron-3-nano)");
}
} else {
const discoveredModelOptions =
endpointType === "ollama"
Expand Down Expand Up @@ -395,6 +458,24 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
model = await promptSelect("Select your primary model:", modelOptions, defaultIndex);
}

// For Ollama reasoning models, create a chat variant to avoid blank responses.
// Reasoning models (e.g. deepseek-r1, qwq) output to the `reasoning` field
// instead of `content`, which causes empty responses in chat mode.
// Creating a "-chat" variant forces the model into standard chat mode.
if (endpointType === "ollama" && isReasoningModel(model)) {
logger.warn(
`Model '${model}' is a reasoning model that may return blank responses in chat mode.`,
);
logger.info("Creating a chat variant to ensure proper output...");
const chatVariant = createOllamaChatVariant(model, logger);
if (chatVariant) {
logger.info(`Using chat variant: ${chatVariant}`);
model = chatVariant;
} else {
logger.warn("Could not create chat variant. The model may return empty responses.");
}
}

// Step 6: Resolve profile
const profile = resolveProfile(endpointType);
const providerName = resolveProviderName(endpointType);
Expand Down