Skip to content
70 changes: 62 additions & 8 deletions bin/lib/nim.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@
const { run, runCapture } = require("./runner");
const nimImages = require("./nim-images.json");

/** @param {string} sandboxName @returns {string} Docker container name. */
function containerName(sandboxName) {
return `nemoclaw-nim-${sandboxName}`;
}

/** @param {string} modelName @returns {string|null} NIM container image or null. */
function getImageForModel(modelName) {
const entry = nimImages.models.find((m) => m.name === modelName);
return entry ? entry.image : null;
}

/** @returns {Array<{name: string, image: string, minGpuMemoryMB: number}>} */
function listModels() {
return nimImages.models.map((m) => ({
name: m.name,
Expand All @@ -23,10 +26,21 @@ function listModels() {
}));
}

function detectGpu() {
/**
* Detect GPU hardware. Returns an object describing the GPU (type, count,
* memory, capabilities) or null if no GPU is found.
* @param {object} [opts] - Optional overrides for dependency injection.
* @param {Function} [opts.runCapture] - Command runner (default: runner.runCapture).
* @param {string} [opts.platform] - OS platform (default: process.platform).
* @returns {{ type: string, count: number, totalMemoryMB: number, perGpuMB: number, nimCapable: boolean, spark?: boolean, name?: string, cores?: number } | null}
*/
function detectGpu(opts) {
const runCmd = (opts && opts.runCapture) || runCapture;
const platform = (opts && opts.platform) || process.platform;

// Try NVIDIA first — query VRAM
try {
const output = runCapture(
const output = runCmd(
"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
{ ignoreError: true }
);
Expand All @@ -35,8 +49,18 @@ function detectGpu() {
const perGpuMB = lines.map((l) => parseInt(l.trim(), 10)).filter((n) => !isNaN(n));
if (perGpuMB.length > 0) {
const totalMemoryMB = perGpuMB.reduce((a, b) => a + b, 0);
// Query GPU name for display
let name;
try {
name = runCmd(
"nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
{ ignoreError: true }
);
if (name) name = name.split("\n")[0].trim();
} catch {}
return {
type: "nvidia",
name,
count: perGpuMB.length,
totalMemoryMB,
perGpuMB: perGpuMB[0],
Expand All @@ -48,19 +72,21 @@ function detectGpu() {

// Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture
try {
const nameOutput = runCapture(
const nameOutput = runCmd(
"nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
{ ignoreError: true }
);
if (nameOutput && nameOutput.includes("GB10")) {
const name = nameOutput.split("\n")[0].trim();
// GB10 has 128GB unified memory shared with Grace CPU — use system RAM
let totalMemoryMB = 0;
try {
const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
const memLine = runCmd("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0;
} catch {}
return {
type: "nvidia",
name,
count: 1,
totalMemoryMB,
perGpuMB: totalMemoryMB,
Expand All @@ -71,9 +97,9 @@ function detectGpu() {
} catch {}

// macOS: detect Apple Silicon or discrete GPU
if (process.platform === "darwin") {
if (platform === "darwin") {
try {
const spOutput = runCapture(
const spOutput = runCmd(
"system_profiler SPDisplaysDataType 2>/dev/null",
{ ignoreError: true }
);
Expand All @@ -92,7 +118,7 @@ function detectGpu() {
} else {
// Apple Silicon shares system RAM — read total memory
try {
const memBytes = runCapture("sysctl -n hw.memsize", { ignoreError: true });
const memBytes = runCmd("sysctl -n hw.memsize", { ignoreError: true });
if (memBytes) memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024);
} catch {}
}
Expand All @@ -101,7 +127,7 @@ function detectGpu() {
type: "apple",
name,
count: 1,
cores: coresMatch ? parseInt(coresMatch[1], 10) : null,
...(coresMatch ? { cores: parseInt(coresMatch[1], 10) } : {}),
totalMemoryMB: memoryMB,
perGpuMB: memoryMB,
nimCapable: false,
Expand All @@ -114,6 +140,29 @@ function detectGpu() {
return null;
}

/**
* Suggest NIM models ranked by fit for a given GPU.
* Returns models sorted by VRAM requirement (descending), with the largest
* model that uses <=90% of available VRAM marked as recommended.
* @param {{ totalMemoryMB: number, nimCapable: boolean } | null} gpu
* @returns {Array<{ name: string, image: string, minGpuMemoryMB: number, recommended: boolean }>}
*/
function suggestModelsForGpu(gpu) {
if (!gpu || !gpu.nimCapable) return [];
const vram = gpu.totalMemoryMB;
const fits = listModels()
.filter((m) => m.minGpuMemoryMB <= vram)
.sort((a, b) => b.minGpuMemoryMB - a.minGpuMemoryMB);

const threshold = vram * 0.9;
let recommended = false;
return fits.map((m) => {
const rec = !recommended && m.minGpuMemoryMB <= threshold;
if (rec) recommended = true;
return { ...m, recommended: rec };
});
}

function pullNimImage(model) {
const image = getImageForModel(model);
if (!image) {
Expand All @@ -125,6 +174,7 @@ function pullNimImage(model) {
return image;
}

/** @param {string} sandboxName @param {string} model @param {number} [port=8000] @returns {string} Container name. */
function startNimContainer(sandboxName, model, port = 8000) {
const name = containerName(sandboxName);
const image = getImageForModel(model);
Expand All @@ -143,6 +193,7 @@ function startNimContainer(sandboxName, model, port = 8000) {
return name;
}

/** @param {number} [port=8000] @param {number} [timeout=300] @returns {boolean} True if healthy. */
function waitForNimHealth(port = 8000, timeout = 300) {
const start = Date.now();
const interval = 5000;
Expand All @@ -165,13 +216,15 @@ function waitForNimHealth(port = 8000, timeout = 300) {
return false;
}

/** @param {string} sandboxName - Stop and remove the NIM container. */
function stopNimContainer(sandboxName) {
const name = containerName(sandboxName);
console.log(` Stopping NIM container: ${name}`);
run(`docker stop ${name} 2>/dev/null || true`, { ignoreError: true });
run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true });
}

/** @param {string} sandboxName @returns {{running: boolean, healthy?: boolean, container: string, state?: string}} */
function nimStatus(sandboxName) {
const name = containerName(sandboxName);
try {
Expand Down Expand Up @@ -199,6 +252,7 @@ module.exports = {
getImageForModel,
listModels,
detectGpu,
suggestModelsForGpu,
pullNimImage,
startNimContainer,
waitForNimHealth,
Expand Down
18 changes: 11 additions & 7 deletions bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ async function preflight() {
// GPU
const gpu = nim.detectGpu();
if (gpu && gpu.type === "nvidia") {
console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`);
const label = gpu.name ? `${gpu.name}, ` : "";
console.log(` ✓ NVIDIA GPU detected: ${label}${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`);
} else if (gpu && gpu.type === "apple") {
console.log(` ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`);
console.log(" ⓘ NIM requires NVIDIA GPU — will use cloud inference");
Expand Down Expand Up @@ -540,11 +541,12 @@ async function setupNim(sandboxName, gpu) {

if (selected.key === "nim") {
// List models that fit GPU VRAM
const models = nim.listModels().filter((m) => m.minGpuMemoryMB <= gpu.totalMemoryMB);
const models = nim.suggestModelsForGpu(gpu);
if (models.length === 0) {
console.log(" No NIM models fit your GPU VRAM. Falling back to cloud API.");
} else {
let sel;
const defaultModelIndex = Math.max(0, models.findIndex((m) => m.recommended));
if (isNonInteractive()) {
if (requestedModel) {
sel = models.find((m) => m.name === requestedModel);
Expand All @@ -553,20 +555,22 @@ async function setupNim(sandboxName, gpu) {
process.exit(1);
}
} else {
sel = models[0];
sel = models[defaultModelIndex];
}
console.log(` [non-interactive] NIM model: ${sel.name}`);
} else {
console.log("");
console.log(" Models that fit your GPU:");
models.forEach((m, i) => {
console.log(` ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)`);
const tag = m.recommended ? " (recommended)" : "";
console.log(` ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)${tag}`);
});
console.log("");

const modelChoice = await prompt(` Choose model [1]: `);
const midx = parseInt(modelChoice || "1", 10) - 1;
sel = models[midx] || models[0];
const defaultChoice = String(defaultModelIndex + 1);
const modelChoice = await prompt(` Choose model [${defaultChoice}]: `);
const midx = parseInt(modelChoice || defaultChoice, 10) - 1;
sel = models[midx] || models[defaultModelIndex];
}
model = sel.name;

Expand Down
Loading