NVIDIA · brianwtaylor · Mar 17, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/bin/lib/nim.js b/bin/lib/nim.js
@@ -6,15 +6,18 @@
 const { run, runCapture } = require("./runner");
 const nimImages = require("./nim-images.json");
 
+/** @param {string} sandboxName @returns {string} Docker container name. */
 function containerName(sandboxName) {
   return `nemoclaw-nim-${sandboxName}`;
 }
 
+/** @param {string} modelName @returns {string|null} NIM container image or null. */
 function getImageForModel(modelName) {
   const entry = nimImages.models.find((m) => m.name === modelName);
   return entry ? entry.image : null;
 }
 
+/** @returns {Array<{name: string, image: string, minGpuMemoryMB: number}>} */
 function listModels() {
   return nimImages.models.map((m) => ({
     name: m.name,
@@ -23,10 +26,21 @@ function listModels() {
   }));
 }
 
-function detectGpu() {
+/**
+ * Detect GPU hardware. Returns an object describing the GPU (type, count,
+ * memory, capabilities) or null if no GPU is found.
+ * @param {object} [opts] - Optional overrides for dependency injection.
+ * @param {Function} [opts.runCapture] - Command runner (default: runner.runCapture).
+ * @param {string} [opts.platform] - OS platform (default: process.platform).
+ * @returns {{ type: string, count: number, totalMemoryMB: number, perGpuMB: number, nimCapable: boolean, spark?: boolean, name?: string, cores?: number } | null}
+ */
+function detectGpu(opts) {
+  const runCmd = (opts && opts.runCapture) || runCapture;
+  const platform = (opts && opts.platform) || process.platform;
+
   // Try NVIDIA first — query VRAM
   try {
-    const output = runCapture(
+    const output = runCmd(
       "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
       { ignoreError: true }
     );
@@ -35,8 +49,18 @@ function detectGpu() {
       const perGpuMB = lines.map((l) => parseInt(l.trim(), 10)).filter((n) => !isNaN(n));
       if (perGpuMB.length > 0) {
         const totalMemoryMB = perGpuMB.reduce((a, b) => a + b, 0);
+        // Query GPU name for display
+        let name;
+        try {
+          name = runCmd(
+            "nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
+            { ignoreError: true }
+          );
+          if (name) name = name.split("\n")[0].trim();
+        } catch {}
         return {
           type: "nvidia",
+          name,
           count: perGpuMB.length,
           totalMemoryMB,
           perGpuMB: perGpuMB[0],
@@ -48,19 +72,21 @@ function detectGpu() {
 
   // Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture
   try {
-    const nameOutput = runCapture(
+    const nameOutput = runCmd(
       "nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
       { ignoreError: true }
     );
     if (nameOutput && nameOutput.includes("GB10")) {
+      const name = nameOutput.split("\n")[0].trim();
       // GB10 has 128GB unified memory shared with Grace CPU — use system RAM
       let totalMemoryMB = 0;
       try {
-        const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
+        const memLine = runCmd("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
         if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0;
       } catch {}
       return {
         type: "nvidia",
+        name,
         count: 1,
         totalMemoryMB,
         perGpuMB: totalMemoryMB,
@@ -71,9 +97,9 @@ function detectGpu() {
   } catch {}
 
   // macOS: detect Apple Silicon or discrete GPU
-  if (process.platform === "darwin") {
+  if (platform === "darwin") {
     try {
-      const spOutput = runCapture(
+      const spOutput = runCmd(
         "system_profiler SPDisplaysDataType 2>/dev/null",
         { ignoreError: true }
       );
@@ -92,7 +118,7 @@ function detectGpu() {
           } else {
             // Apple Silicon shares system RAM — read total memory
             try {
-              const memBytes = runCapture("sysctl -n hw.memsize", { ignoreError: true });
+              const memBytes = runCmd("sysctl -n hw.memsize", { ignoreError: true });
               if (memBytes) memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024);
             } catch {}
           }
@@ -101,7 +127,7 @@ function detectGpu() {
             type: "apple",
             name,
             count: 1,
-            cores: coresMatch ? parseInt(coresMatch[1], 10) : null,
+            ...(coresMatch ? { cores: parseInt(coresMatch[1], 10) } : {}),
             totalMemoryMB: memoryMB,
             perGpuMB: memoryMB,
             nimCapable: false,
@@ -114,6 +140,29 @@ function detectGpu() {
   return null;
 }
 
+/**
+ * Suggest NIM models ranked by fit for a given GPU.
+ * Returns models sorted by VRAM requirement (descending), with the largest
+ * model that uses <=90% of available VRAM marked as recommended.
+ * @param {{ totalMemoryMB: number, nimCapable: boolean } | null} gpu
+ * @returns {Array<{ name: string, image: string, minGpuMemoryMB: number, recommended: boolean }>}
+ */
+function suggestModelsForGpu(gpu) {
+  if (!gpu || !gpu.nimCapable) return [];
+  const vram = gpu.totalMemoryMB;
+  const fits = listModels()
+    .filter((m) => m.minGpuMemoryMB <= vram)
+    .sort((a, b) => b.minGpuMemoryMB - a.minGpuMemoryMB);
+
+  const threshold = vram * 0.9;
+  let recommended = false;
+  return fits.map((m) => {
+    const rec = !recommended && m.minGpuMemoryMB <= threshold;
+    if (rec) recommended = true;
+    return { ...m, recommended: rec };
+  });
+}
+
 function pullNimImage(model) {
   const image = getImageForModel(model);
   if (!image) {
@@ -125,6 +174,7 @@ function pullNimImage(model) {
   return image;
 }
 
+/** @param {string} sandboxName @param {string} model @param {number} [port=8000] @returns {string} Container name. */
 function startNimContainer(sandboxName, model, port = 8000) {
   const name = containerName(sandboxName);
   const image = getImageForModel(model);
@@ -143,6 +193,7 @@ function startNimContainer(sandboxName, model, port = 8000) {
   return name;
 }
 
+/** @param {number} [port=8000] @param {number} [timeout=300] @returns {boolean} True if healthy. */
 function waitForNimHealth(port = 8000, timeout = 300) {
   const start = Date.now();
   const interval = 5000;
@@ -165,13 +216,15 @@ function waitForNimHealth(port = 8000, timeout = 300) {
   return false;
 }
 
+/** @param {string} sandboxName - Stop and remove the NIM container. */
 function stopNimContainer(sandboxName) {
   const name = containerName(sandboxName);
   console.log(`  Stopping NIM container: ${name}`);
   run(`docker stop ${name} 2>/dev/null || true`, { ignoreError: true });
   run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true });
 }
 
+/** @param {string} sandboxName @returns {{running: boolean, healthy?: boolean, container: string, state?: string}} */
 function nimStatus(sandboxName) {
   const name = containerName(sandboxName);
   try {
@@ -199,6 +252,7 @@ module.exports = {
   getImageForModel,
   listModels,
   detectGpu,
+  suggestModelsForGpu,
   pullNimImage,
   startNimContainer,
   waitForNimHealth,

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
@@ -314,7 +314,8 @@ async function preflight() {
   // GPU
   const gpu = nim.detectGpu();
   if (gpu && gpu.type === "nvidia") {
-    console.log(`  ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`);
+    const label = gpu.name ? `${gpu.name}, ` : "";
+    console.log(`  ✓ NVIDIA GPU detected: ${label}${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`);
   } else if (gpu && gpu.type === "apple") {
     console.log(`  ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`);
     console.log("  ⓘ NIM requires NVIDIA GPU — will use cloud inference");
@@ -540,11 +541,12 @@ async function setupNim(sandboxName, gpu) {
 
     if (selected.key === "nim") {
       // List models that fit GPU VRAM
-      const models = nim.listModels().filter((m) => m.minGpuMemoryMB <= gpu.totalMemoryMB);
+      const models = nim.suggestModelsForGpu(gpu);
       if (models.length === 0) {
         console.log("  No NIM models fit your GPU VRAM. Falling back to cloud API.");
       } else {
         let sel;
+        const defaultModelIndex = Math.max(0, models.findIndex((m) => m.recommended));
         if (isNonInteractive()) {
           if (requestedModel) {
             sel = models.find((m) => m.name === requestedModel);
@@ -553,20 +555,22 @@ async function setupNim(sandboxName, gpu) {
               process.exit(1);
             }
           } else {
-            sel = models[0];
+            sel = models[defaultModelIndex];
           }
           console.log(`  [non-interactive] NIM model: ${sel.name}`);
         } else {
           console.log("");
           console.log("  Models that fit your GPU:");
           models.forEach((m, i) => {
-            console.log(`    ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)`);
+            const tag = m.recommended ? " (recommended)" : "";
+            console.log(`    ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)${tag}`);
           });
           console.log("");
 
-          const modelChoice = await prompt(`  Choose model [1]: `);
-          const midx = parseInt(modelChoice || "1", 10) - 1;
-          sel = models[midx] || models[0];
+          const defaultChoice = String(defaultModelIndex + 1);
+          const modelChoice = await prompt(`  Choose model [${defaultChoice}]: `);
+          const midx = parseInt(modelChoice || defaultChoice, 10) - 1;
+          sel = models[midx] || models[defaultModelIndex];
         }
         model = sel.name;