diff --git a/bin/lib/nim.js b/bin/lib/nim.js index 4f2233e435..a0ed10160d 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -6,15 +6,18 @@ const { run, runCapture } = require("./runner"); const nimImages = require("./nim-images.json"); +/** @param {string} sandboxName @returns {string} Docker container name. */ function containerName(sandboxName) { return `nemoclaw-nim-${sandboxName}`; } +/** @param {string} modelName @returns {string|null} NIM container image or null. */ function getImageForModel(modelName) { const entry = nimImages.models.find((m) => m.name === modelName); return entry ? entry.image : null; } +/** @returns {Array<{name: string, image: string, minGpuMemoryMB: number}>} */ function listModels() { return nimImages.models.map((m) => ({ name: m.name, @@ -23,10 +26,21 @@ function listModels() { })); } -function detectGpu() { +/** + * Detect GPU hardware. Returns an object describing the GPU (type, count, + * memory, capabilities) or null if no GPU is found. + * @param {object} [opts] - Optional overrides for dependency injection. + * @param {Function} [opts.runCapture] - Command runner (default: runner.runCapture). + * @param {string} [opts.platform] - OS platform (default: process.platform). + * @returns {{ type: string, count: number, totalMemoryMB: number, perGpuMB: number, nimCapable: boolean, spark?: boolean, name?: string, cores?: number } | null} + */ +function detectGpu(opts) { + const runCmd = (opts && opts.runCapture) || runCapture; + const platform = (opts && opts.platform) || process.platform; + // Try NVIDIA first — query VRAM try { - const output = runCapture( + const output = runCmd( "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits", { ignoreError: true } ); @@ -35,8 +49,18 @@ function detectGpu() { const perGpuMB = lines.map((l) => parseInt(l.trim(), 10)).filter((n) => !isNaN(n)); if (perGpuMB.length > 0) { const totalMemoryMB = perGpuMB.reduce((a, b) => a + b, 0); + // Query GPU name for display + let name; + try { + name = runCmd( + "nvidia-smi --query-gpu=name --format=csv,noheader,nounits", + { ignoreError: true } + ); + if (name) name = name.split("\n")[0].trim(); + } catch {} return { type: "nvidia", + name, count: perGpuMB.length, totalMemoryMB, perGpuMB: perGpuMB[0], @@ -48,19 +72,21 @@ function detectGpu() { // Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture try { - const nameOutput = runCapture( + const nameOutput = runCmd( "nvidia-smi --query-gpu=name --format=csv,noheader,nounits", { ignoreError: true } ); if (nameOutput && nameOutput.includes("GB10")) { + const name = nameOutput.split("\n")[0].trim(); // GB10 has 128GB unified memory shared with Grace CPU — use system RAM let totalMemoryMB = 0; try { - const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); + const memLine = runCmd("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; } catch {} return { type: "nvidia", + name, count: 1, totalMemoryMB, perGpuMB: totalMemoryMB, @@ -71,9 +97,9 @@ function detectGpu() { } catch {} // macOS: detect Apple Silicon or discrete GPU - if (process.platform === "darwin") { + if (platform === "darwin") { try { - const spOutput = runCapture( + const spOutput = runCmd( "system_profiler SPDisplaysDataType 2>/dev/null", { ignoreError: true } ); @@ -92,7 +118,7 @@ function detectGpu() { } else { // Apple Silicon shares system RAM — read total memory try { - const memBytes = runCapture("sysctl -n hw.memsize", { ignoreError: true }); + const memBytes = runCmd("sysctl -n hw.memsize", { ignoreError: true }); if (memBytes) memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024); } catch {} } @@ -101,7 +127,7 @@ function detectGpu() { type: "apple", name, count: 1, - cores: coresMatch ? parseInt(coresMatch[1], 10) : null, + ...(coresMatch ? { cores: parseInt(coresMatch[1], 10) } : {}), totalMemoryMB: memoryMB, perGpuMB: memoryMB, nimCapable: false, @@ -114,6 +140,29 @@ function detectGpu() { return null; } +/** + * Suggest NIM models ranked by fit for a given GPU. + * Returns models sorted by VRAM requirement (descending), with the largest + * model that uses <=90% of available VRAM marked as recommended. + * @param {{ totalMemoryMB: number, nimCapable: boolean } | null} gpu + * @returns {Array<{ name: string, image: string, minGpuMemoryMB: number, recommended: boolean }>} + */ +function suggestModelsForGpu(gpu) { + if (!gpu || !gpu.nimCapable) return []; + const vram = gpu.totalMemoryMB; + const fits = listModels() + .filter((m) => m.minGpuMemoryMB <= vram) + .sort((a, b) => b.minGpuMemoryMB - a.minGpuMemoryMB); + + const threshold = vram * 0.9; + let recommended = false; + return fits.map((m) => { + const rec = !recommended && m.minGpuMemoryMB <= threshold; + if (rec) recommended = true; + return { ...m, recommended: rec }; + }); +} + function pullNimImage(model) { const image = getImageForModel(model); if (!image) { @@ -125,6 +174,7 @@ function pullNimImage(model) { return image; } +/** @param {string} sandboxName @param {string} model @param {number} [port=8000] @returns {string} Container name. */ function startNimContainer(sandboxName, model, port = 8000) { const name = containerName(sandboxName); const image = getImageForModel(model); @@ -143,6 +193,7 @@ function startNimContainer(sandboxName, model, port = 8000) { return name; } +/** @param {number} [port=8000] @param {number} [timeout=300] @returns {boolean} True if healthy. */ function waitForNimHealth(port = 8000, timeout = 300) { const start = Date.now(); const interval = 5000; @@ -165,6 +216,7 @@ function waitForNimHealth(port = 8000, timeout = 300) { return false; } +/** @param {string} sandboxName - Stop and remove the NIM container. */ function stopNimContainer(sandboxName) { const name = containerName(sandboxName); console.log(` Stopping NIM container: ${name}`); @@ -172,6 +224,7 @@ function stopNimContainer(sandboxName) { run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true }); } +/** @param {string} sandboxName @returns {{running: boolean, healthy?: boolean, container: string, state?: string}} */ function nimStatus(sandboxName) { const name = containerName(sandboxName); try { @@ -199,6 +252,7 @@ module.exports = { getImageForModel, listModels, detectGpu, + suggestModelsForGpu, pullNimImage, startNimContainer, waitForNimHealth, diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 23f19b01a8..b370561865 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -314,7 +314,8 @@ async function preflight() { // GPU const gpu = nim.detectGpu(); if (gpu && gpu.type === "nvidia") { - console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); + const label = gpu.name ? `${gpu.name}, ` : ""; + console.log(` ✓ NVIDIA GPU detected: ${label}${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); } else if (gpu && gpu.type === "apple") { console.log(` ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`); console.log(" ⓘ NIM requires NVIDIA GPU — will use cloud inference"); @@ -540,11 +541,12 @@ async function setupNim(sandboxName, gpu) { if (selected.key === "nim") { // List models that fit GPU VRAM - const models = nim.listModels().filter((m) => m.minGpuMemoryMB <= gpu.totalMemoryMB); + const models = nim.suggestModelsForGpu(gpu); if (models.length === 0) { console.log(" No NIM models fit your GPU VRAM. Falling back to cloud API."); } else { let sel; + const defaultModelIndex = Math.max(0, models.findIndex((m) => m.recommended)); if (isNonInteractive()) { if (requestedModel) { sel = models.find((m) => m.name === requestedModel); @@ -553,20 +555,22 @@ async function setupNim(sandboxName, gpu) { process.exit(1); } } else { - sel = models[0]; + sel = models[defaultModelIndex]; } console.log(` [non-interactive] NIM model: ${sel.name}`); } else { console.log(""); console.log(" Models that fit your GPU:"); models.forEach((m, i) => { - console.log(` ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)`); + const tag = m.recommended ? " (recommended)" : ""; + console.log(` ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)${tag}`); }); console.log(""); - const modelChoice = await prompt(` Choose model [1]: `); - const midx = parseInt(modelChoice || "1", 10) - 1; - sel = models[midx] || models[0]; + const defaultChoice = String(defaultModelIndex + 1); + const modelChoice = await prompt(` Choose model [${defaultChoice}]: `); + const midx = parseInt(modelChoice || defaultChoice, 10) - 1; + sel = models[midx] || models[defaultModelIndex]; } model = sel.name; diff --git a/test/nim.test.js b/test/nim.test.js index 8166cf6c43..4da1596d07 100644 --- a/test/nim.test.js +++ b/test/nim.test.js @@ -74,4 +74,174 @@ describe("nim", () => { assert.equal(st.running, false); }); }); + + describe("detectGpu (injected)", () => { + function mockRunCapture(responses) { + return function (cmd) { + for (const [pattern, response] of responses) { + if (cmd.includes(pattern)) { + if (response instanceof Error) throw response; + return response; + } + } + throw new Error("mock: no match for " + cmd); + }; + } + + it("detects standard NVIDIA GPU", () => { + const gpu = nim.detectGpu({ + runCapture: mockRunCapture([ + ["memory.total", "8192"], + ]), + }); + assert.equal(gpu.type, "nvidia"); + assert.equal(gpu.count, 1); + assert.equal(gpu.totalMemoryMB, 8192); + assert.equal(gpu.perGpuMB, 8192); + assert.equal(gpu.nimCapable, true); + assert.equal(gpu.spark, undefined); + }); + + it("detects multiple NVIDIA GPUs", () => { + const gpu = nim.detectGpu({ + runCapture: mockRunCapture([ + ["memory.total", "8192\n8192"], + ]), + }); + assert.equal(gpu.type, "nvidia"); + assert.equal(gpu.count, 2); + assert.equal(gpu.totalMemoryMB, 16384); + assert.equal(gpu.perGpuMB, 8192); + }); + + it("detects DGX Spark GB10", () => { + const gpu = nim.detectGpu({ + runCapture: mockRunCapture([ + ["memory.total", ""], + ["name", "NVIDIA GB10"], + ["free -m", "122880"], + ]), + }); + assert.equal(gpu.type, "nvidia"); + assert.equal(gpu.name, "NVIDIA GB10"); + assert.equal(gpu.spark, true); + assert.equal(gpu.count, 1); + assert.equal(gpu.totalMemoryMB, 122880); + }); + + it("handles Spark with free -m failure", () => { + const gpu = nim.detectGpu({ + runCapture: mockRunCapture([ + ["memory.total", ""], + ["name", "NVIDIA GB10"], + ["free -m", new Error("command failed")], + ]), + }); + assert.equal(gpu.type, "nvidia"); + assert.equal(gpu.spark, true); + assert.equal(gpu.totalMemoryMB, 0); + }); + + it("detects macOS discrete GPU via VRAM", () => { + const gpu = nim.detectGpu({ + platform: "darwin", + runCapture: mockRunCapture([ + ["memory.total", new Error("no nvidia-smi")], + ["name", new Error("no nvidia-smi")], + ["system_profiler", "Chipset Model: Apple M2 Pro\n VRAM (Total): 16 GB\n Total Number of Cores: 19"], + ]), + }); + assert.equal(gpu.type, "apple"); + assert.equal(gpu.name, "Apple M2 Pro"); + assert.equal(gpu.nimCapable, false); + assert.equal(gpu.totalMemoryMB, 16384); + assert.equal(gpu.cores, 19); + }); + + it("detects Apple Silicon with unified memory", () => { + const gpu = nim.detectGpu({ + platform: "darwin", + runCapture: mockRunCapture([ + ["memory.total", new Error("no nvidia-smi")], + ["query-gpu=name", new Error("no nvidia-smi")], + ["system_profiler", "Chipset Model: Apple M4\n Total Number of Cores: 10"], + ["hw.memsize", "17179869184"], + ]), + }); + assert.equal(gpu.type, "apple"); + assert.equal(gpu.name, "Apple M4"); + assert.equal(gpu.nimCapable, false); + assert.equal(gpu.totalMemoryMB, 16384); + assert.equal(gpu.cores, 10); + }); + + it("returns null when no GPU detected", () => { + const gpu = nim.detectGpu({ + platform: "linux", + runCapture: mockRunCapture([ + ["memory.total", new Error("no nvidia-smi")], + ["name", new Error("no nvidia-smi")], + ]), + }); + assert.equal(gpu, null); + }); + + it("non-GB10 NVIDIA has no spark property", () => { + const gpu = nim.detectGpu({ + runCapture: mockRunCapture([ + ["memory.total", "24576"], + ]), + }); + assert.equal(gpu.type, "nvidia"); + assert.equal(gpu.spark, undefined); + }); + }); + + describe("suggestModelsForGpu", () => { + it("returns empty for null GPU", () => { + assert.deepEqual(nim.suggestModelsForGpu(null), []); + }); + + it("returns empty for non-nimCapable GPU", () => { + assert.deepEqual(nim.suggestModelsForGpu({ totalMemoryMB: 16384, nimCapable: false }), []); + }); + + it("filters models that exceed VRAM", () => { + const models = nim.suggestModelsForGpu({ totalMemoryMB: 8000, nimCapable: true }); + for (const m of models) { + assert.ok(m.minGpuMemoryMB <= 8000, `${m.name} requires ${m.minGpuMemoryMB} MB`); + } + }); + + it("sorts by VRAM descending", () => { + const models = nim.suggestModelsForGpu({ totalMemoryMB: 200000, nimCapable: true }); + for (let i = 1; i < models.length; i++) { + assert.ok(models[i - 1].minGpuMemoryMB >= models[i].minGpuMemoryMB, + "models should be sorted by VRAM descending"); + } + }); + + it("marks exactly one model as recommended", () => { + const models = nim.suggestModelsForGpu({ totalMemoryMB: 200000, nimCapable: true }); + const recommended = models.filter((m) => m.recommended); + assert.equal(recommended.length, 1, "exactly one model should be recommended"); + }); + + it("recommended model fits within 90% VRAM", () => { + const vram = 32000; + const models = nim.suggestModelsForGpu({ totalMemoryMB: vram, nimCapable: true }); + const rec = models.find((m) => m.recommended); + if (rec) { + assert.ok(rec.minGpuMemoryMB <= vram * 0.9, + `recommended model (${rec.minGpuMemoryMB} MB) should fit within 90% of ${vram} MB`); + } + }); + + it("each entry has recommended boolean", () => { + const models = nim.suggestModelsForGpu({ totalMemoryMB: 200000, nimCapable: true }); + for (const m of models) { + assert.equal(typeof m.recommended, "boolean"); + } + }); + }); });