bradygaster · bradygaster · Mar 23, 2026 · Mar 23, 2026
diff --git a/.changeset/dual-mode-capabilities.md b/.changeset/dual-mode-capabilities.md
@@ -0,0 +1,12 @@
+---
+'@bradygaster/squad-sdk': minor
+---
+
+Add dual-mode deployment support for capabilities routing.
+
+New features:
+- `SQUAD_POD_ID` env var for pod-specific capability manifests
+- `SQUAD_DEPLOYMENT_MODE` env var (`agent-per-node` | `squad-per-pod`)
+- Pod-specific manifest loading: `.squad/machine-capabilities-{podId}.json`
+- Fallback chain: pod-specific → shared → user-home → null (opt-in)
+- New exports: `getDeploymentMode()`, `getPodId()`, `DeploymentMode` type
diff --git a/packages/squad-sdk/src/ralph/capabilities.ts b/packages/squad-sdk/src/ralph/capabilities.ts
@@ -13,12 +13,17 @@ import { existsSync } from 'node:fs';
 import path from 'node:path';
 import os from 'node:os';
 
+/** Deployment mode for capability routing */
+export type DeploymentMode = 'agent-per-node' | 'squad-per-pod';
+
 /** Machine capability manifest */
 export interface MachineCapabilities {
   machine: string;
   capabilities: string[];
   missing: string[];
   lastUpdated: string;
+  /** Pod identifier when running in squad-per-pod mode */
+  podId?: string;
 }
 
 /** Well-known capability identifiers */
@@ -38,9 +43,45 @@ export type KnownCapability = typeof KNOWN_CAPABILITIES[number];
 /** Prefix for capability requirement labels */
 const NEEDS_PREFIX = 'needs:';
 
+/**
+ * Get the deployment mode from the `SQUAD_DEPLOYMENT_MODE` env var.
+ * Defaults to `'agent-per-node'` when unset.
+ */
+export function getDeploymentMode(): DeploymentMode {
+  const raw = process.env.SQUAD_DEPLOYMENT_MODE;
+  if (raw === 'squad-per-pod') return 'squad-per-pod';
+  return 'agent-per-node';
+}
+
+/**
+ * Get the pod identifier from the `SQUAD_POD_ID` env var.
+ * Returns `undefined` when unset.
+ */
+export function getPodId(): string | undefined {
+  return process.env.SQUAD_POD_ID || undefined;
+}
+
+/**
+ * Build the path for a pod-specific capabilities manifest.
+ *
+ * @example
+ *   generatePodCapabilitiesPath('/app', 'squad-worker-7b4f6')
+ *   // → '/app/.squad/machine-capabilities-squad-worker-7b4f6.json'
+ */
+export function generatePodCapabilitiesPath(teamRoot: string, podId: string): string {
+  return path.join(teamRoot, '.squad', `machine-capabilities-${podId}.json`);
+}
+
 /**
  * Load machine capabilities from the standard location.
- * Checks (in order):
+ *
+ * When `SQUAD_POD_ID` is set **and** `SQUAD_DEPLOYMENT_MODE` is
+ * `squad-per-pod`, the search order becomes:
+ *   1. `.squad/machine-capabilities-{podId}.json` (pod-specific)
+ *   2. `.squad/machine-capabilities.json` (shared fallback)
+ *   3. `~/.squad/machine-capabilities.json` (user home fallback)
+ *
+ * Otherwise (default `agent-per-node` mode):
  *   1. `.squad/machine-capabilities.json` in the team root
  *   2. `~/.squad/machine-capabilities.json` in the user home
  *
@@ -50,8 +91,14 @@ export async function loadCapabilities(
   teamRoot?: string
 ): Promise<MachineCapabilities | null> {
   const candidates: string[] = [];
+  const mode = getDeploymentMode();
+  const podId = getPodId();
 
   if (teamRoot) {
+    // In squad-per-pod mode, try pod-specific manifest first
+    if (mode === 'squad-per-pod' && podId) {
+      candidates.push(generatePodCapabilitiesPath(teamRoot, podId));
+    }
     candidates.push(path.join(teamRoot, '.squad', 'machine-capabilities.json'));
   }
   candidates.push(path.join(os.homedir(), '.squad', 'machine-capabilities.json'));
@@ -60,7 +107,12 @@ export async function loadCapabilities(
     if (existsSync(candidate)) {
       try {
         const raw = await readFile(candidate, 'utf8');
-        return JSON.parse(raw) as MachineCapabilities;
+        const parsed = JSON.parse(raw) as MachineCapabilities;
+        // Stamp podId onto the loaded manifest when running in pod mode
+        if (mode === 'squad-per-pod' && podId) {
+          parsed.podId = parsed.podId ?? podId;
+        }
+        return parsed;
       } catch {
         // Malformed file — skip
       }

diff --git a/packages/squad-sdk/src/ralph/index.ts b/packages/squad-sdk/src/ralph/index.ts
@@ -184,5 +184,5 @@ export class RalphMonitor {
   }
 }
 
-export { loadCapabilities, canHandleIssue, filterByCapabilities, extractNeeds, type MachineCapabilities, KNOWN_CAPABILITIES } from './capabilities.js';
+export { loadCapabilities, canHandleIssue, filterByCapabilities, extractNeeds, getDeploymentMode, getPodId, generatePodCapabilitiesPath, type MachineCapabilities, type DeploymentMode, KNOWN_CAPABILITIES } from './capabilities.js';
 export { getTrafficLight, shouldProceed, getRetryDelay, PredictiveCircuitBreaker, canUseQuota, loadRatePool, type RatePool, type RatePoolAllocation, type RateSample, type TrafficLight, type AgentPriority } from './rate-limiting.js';
diff --git a/templates/machine-capabilities.md b/templates/machine-capabilities.md
@@ -59,7 +59,11 @@ Ralph will log skipped issues:
 
 ## Kubernetes Integration
 
-On Kubernetes, machine capabilities map to node labels:
+Machine capabilities support two deployment modes on Kubernetes:
+
+### Mode A — Agent-per-node (default)
+
+One Ralph process per Kubernetes node. Each reads the node-local `machine-capabilities.json`. Use `nodeSelector` to pin Ralphs to nodes with the right hardware.
 
 ```yaml
 # Node labels (set by capability DaemonSet or manually)
@@ -72,4 +76,46 @@ spec:
     node.squad.dev/gpu: "true"
 ```
 
+No extra environment variables needed — this is the default mode.
+
+### Mode B — Squad-per-pod
+
+Multiple full Squad instances run as separate pods (on the same or different nodes). Each pod gets its own identity via the `SQUAD_POD_ID` environment variable, which enables pod-specific capability manifests.
+
+```yaml
+# Deployment spec for squad-per-pod mode
+spec:
+  replicas: 3
+  template:
+    spec:
+      containers:
+        - name: squad
+          env:
+            - name: SQUAD_POD_ID
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: SQUAD_DEPLOYMENT_MODE
+              value: squad-per-pod
+```
+
+When `SQUAD_POD_ID` is set and `SQUAD_DEPLOYMENT_MODE` is `squad-per-pod`, Ralph looks for a pod-specific manifest first:
+
+1. `.squad/machine-capabilities-{podId}.json` (pod-specific)
+2. `.squad/machine-capabilities.json` (shared fallback)
+3. `~/.squad/machine-capabilities.json` (user home fallback)
+4. `null` (opt-in — all issues pass through)
+
+Example pod-specific manifest (`.squad/machine-capabilities-squad-worker-7b4f6.json`):
+
+```json
+{
+  "machine": "squad-worker-7b4f6",
+  "capabilities": ["gpu", "docker", "azure-cli"],
+  "missing": ["browser", "onedrive"],
+  "lastUpdated": "2026-06-01T00:00:00Z",
+  "podId": "squad-worker-7b4f6"
+}
+```
+
 A DaemonSet can run capability discovery on each node and maintain labels automatically. See the [squad-on-aks](https://github.com/tamirdresher/squad-on-aks) project for a complete Kubernetes deployment example.
diff --git a/test/capabilities.test.ts b/test/capabilities.test.ts
@@ -1,10 +1,17 @@
-import { describe, it, expect } from 'vitest';
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import {
   extractNeeds,
   canHandleIssue,
   filterByCapabilities,
+  loadCapabilities,
+  getDeploymentMode,
+  getPodId,
+  generatePodCapabilitiesPath,
   type MachineCapabilities,
 } from '@bradygaster/squad-sdk/ralph/capabilities';
+import { existsSync, mkdirSync, writeFileSync, rmSync } from 'node:fs';
+import path from 'node:path';
+import os from 'node:os';
 
 const gpuMachine: MachineCapabilities = {
   machine: 'GPU-SERVER',
@@ -104,4 +111,132 @@ describe('filterByCapabilities', () => {
     expect(handled).toHaveLength(0);
     expect(skipped).toHaveLength(0);
   });
+});
+
+describe('dual-mode deployment', () => {
+  let savedPodId: string | undefined;
+  let savedMode: string | undefined;
+  let tmpDir: string;
+
+  beforeEach(() => {
+    savedPodId = process.env.SQUAD_POD_ID;
+    savedMode = process.env.SQUAD_DEPLOYMENT_MODE;
+    delete process.env.SQUAD_POD_ID;
+    delete process.env.SQUAD_DEPLOYMENT_MODE;
+
+    tmpDir = path.join(os.tmpdir(), `squad-cap-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    mkdirSync(path.join(tmpDir, '.squad'), { recursive: true });
+  });
+
+  afterEach(() => {
+    if (savedPodId !== undefined) process.env.SQUAD_POD_ID = savedPodId;
+    else delete process.env.SQUAD_POD_ID;
+    if (savedMode !== undefined) process.env.SQUAD_DEPLOYMENT_MODE = savedMode;
+    else delete process.env.SQUAD_DEPLOYMENT_MODE;
+
+    try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
+  });
+
+  it('loadCapabilities reads pod-specific manifest when SQUAD_POD_ID is set', async () => {
+    process.env.SQUAD_POD_ID = 'squad-worker-abc';
+    process.env.SQUAD_DEPLOYMENT_MODE = 'squad-per-pod';
+
+    const podManifest: MachineCapabilities = {
+      machine: 'POD-ABC',
+      capabilities: ['gpu', 'docker'],
+      missing: [],
+      lastUpdated: '2026-06-01T00:00:00Z',
+    };
+    writeFileSync(
+      path.join(tmpDir, '.squad', 'machine-capabilities-squad-worker-abc.json'),
+      JSON.stringify(podManifest),
+    );
+    // Also write shared manifest to ensure pod-specific wins
+    const sharedManifest: MachineCapabilities = {
+      machine: 'SHARED',
+      capabilities: ['browser'],
+      missing: ['gpu'],
+      lastUpdated: '2026-06-01T00:00:00Z',
+    };
+    writeFileSync(
+      path.join(tmpDir, '.squad', 'machine-capabilities.json'),
+      JSON.stringify(sharedManifest),
+    );
+
+    const caps = await loadCapabilities(tmpDir);
+    expect(caps).not.toBeNull();
+    expect(caps!.machine).toBe('POD-ABC');
+    expect(caps!.podId).toBe('squad-worker-abc');
+  });
+
+  it('loadCapabilities falls back to shared manifest when pod-specific not found', async () => {
+    process.env.SQUAD_POD_ID = 'squad-worker-xyz';
+    process.env.SQUAD_DEPLOYMENT_MODE = 'squad-per-pod';
+
+    const sharedManifest: MachineCapabilities = {
+      machine: 'SHARED-FALLBACK',
+      capabilities: ['browser'],
+      missing: ['gpu'],
+      lastUpdated: '2026-06-01T00:00:00Z',
+    };
+    writeFileSync(
+      path.join(tmpDir, '.squad', 'machine-capabilities.json'),
+      JSON.stringify(sharedManifest),
+    );
+
+    const caps = await loadCapabilities(tmpDir);
+    expect(caps).not.toBeNull();
+    expect(caps!.machine).toBe('SHARED-FALLBACK');
+    expect(caps!.podId).toBe('squad-worker-xyz');
+  });
+
+  it('loadCapabilities ignores SQUAD_POD_ID when SQUAD_DEPLOYMENT_MODE is agent-per-node', async () => {
+    process.env.SQUAD_POD_ID = 'squad-worker-abc';
+    process.env.SQUAD_DEPLOYMENT_MODE = 'agent-per-node';
+
+    const podManifest: MachineCapabilities = {
+      machine: 'POD-ABC',
+      capabilities: ['gpu', 'docker'],
+      missing: [],
+      lastUpdated: '2026-06-01T00:00:00Z',
+    };
+    writeFileSync(
+      path.join(tmpDir, '.squad', 'machine-capabilities-squad-worker-abc.json'),
+      JSON.stringify(podManifest),
+    );
+    const sharedManifest: MachineCapabilities = {
+      machine: 'SHARED',
+      capabilities: ['browser'],
+      missing: ['gpu'],
+      lastUpdated: '2026-06-01T00:00:00Z',
+    };
+    writeFileSync(
+      path.join(tmpDir, '.squad', 'machine-capabilities.json'),
+      JSON.stringify(sharedManifest),
+    );
+
+    const caps = await loadCapabilities(tmpDir);
+    expect(caps).not.toBeNull();
+    // Should read shared, not pod-specific, because mode is agent-per-node
+    expect(caps!.machine).toBe('SHARED');
+    expect(caps!.podId).toBeUndefined();
+  });
+
+  it('getDeploymentMode defaults to agent-per-node', () => {
+    delete process.env.SQUAD_DEPLOYMENT_MODE;
+    expect(getDeploymentMode()).toBe('agent-per-node');
+  });
+
+  it('getDeploymentMode reads SQUAD_DEPLOYMENT_MODE env var', () => {
+    process.env.SQUAD_DEPLOYMENT_MODE = 'squad-per-pod';
+    expect(getDeploymentMode()).toBe('squad-per-pod');
+  });
+
+  it('getPodId reads SQUAD_POD_ID env var', () => {
+    delete process.env.SQUAD_POD_ID;
+    expect(getPodId()).toBeUndefined();
+
+    process.env.SQUAD_POD_ID = 'my-pod-42';
+    expect(getPodId()).toBe('my-pod-42');
+  });
 });