From 1e5ba7ac11abd4759282db1edb0c4ea94037a754 Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Tue, 24 Mar 2026 19:07:20 +0000 Subject: [PATCH 1/3] feat(router): add snapshot manager and capture/reuse worker container snapshots --- src/router/container-manager.ts | 188 ++++++- src/router/snapshot-manager.ts | 96 ++++ .../unit/router/snapshot-integration.test.ts | 467 ++++++++++++++++++ tests/unit/router/snapshot-manager.test.ts | 172 +++++++ 4 files changed, 905 insertions(+), 18 deletions(-) create mode 100644 src/router/snapshot-manager.ts create mode 100644 tests/unit/router/snapshot-integration.test.ts create mode 100644 tests/unit/router/snapshot-manager.test.ts diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index 4eb92ac7..bad19f88 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -5,9 +5,10 @@ * Each BullMQ job gets its own isolated Docker container. * * State management, env building, and orphan cleanup are in dedicated modules: - * - active-workers.ts — ActiveWorker state tracking - * - worker-env.ts — Job data parsing + env building - * - orphan-cleanup.ts — Periodic orphan container cleanup + * - active-workers.ts — ActiveWorker state tracking + * - worker-env.ts — Job data parsing + env building + * - orphan-cleanup.ts — Periodic orphan container cleanup + * - snapshot-manager.ts — Snapshot metadata registry */ import type { Job } from 'bullmq'; @@ -21,6 +22,7 @@ import { loadProjectConfig, routerConfig } from './config.js'; import { notifyTimeout } from './notifications.js'; import { stopOrphanCleanup } from './orphan-cleanup.js'; import type { CascadeJob } from './queue.js'; +import { getSnapshot, registerSnapshot } from './snapshot-manager.js'; import { clearAllWorkItemLocks } from './work-item-lock.js'; import { buildWorkerEnvWithProjectId, @@ -46,15 +48,145 @@ export { buildWorkerEnv, extractProjectIdFromJob, } from './worker-env.js'; +export { + getSnapshot, + invalidateSnapshot, + registerSnapshot, +} from './snapshot-manager.js'; const docker = new Docker(); /** Buffer added on top of the in-container watchdog so the router kill is always a backstop. */ const ROUTER_KILL_BUFFER_MS = 2 * 60 * 1000; +/** + * Build a stable Docker image name for a snapshot. + * Uses a sanitised project+workItem key so it's valid as a Docker image tag. + */ +function buildSnapshotImageName(projectId: string, workItemId: string): string { + // Sanitise: lowercase, replace non-alphanumeric with '-', collapse runs + const sanitise = (s: string) => + s + .toLowerCase() + .replace(/[^a-z0-9]/g, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, ''); + return `cascade-snapshot-${sanitise(projectId)}-${sanitise(workItemId)}:latest`; +} + +/** + * Commit a container to a snapshot image and register the metadata. + * On failure the error is logged and swallowed — snapshot failure must not + * break the normal post-run flow. + */ +async function commitContainerToSnapshot( + containerId: string, + projectId: string, + workItemId: string, +): Promise { + const imageName = buildSnapshotImageName(projectId, workItemId); + try { + const container = docker.getContainer(containerId); + await container.commit({ repo: imageName.split(':')[0], tag: 'latest' }); + registerSnapshot(projectId, workItemId, imageName); + logger.info('[WorkerManager] Committed container to snapshot image:', { + containerId: containerId.slice(0, 12), + imageName, + projectId, + workItemId, + }); + } catch (err) { + logger.warn('[WorkerManager] Failed to commit container to snapshot (non-fatal):', { + containerId: containerId.slice(0, 12), + imageName, + error: String(err), + }); + captureException(err, { + tags: { source: 'snapshot_commit' }, + extra: { containerId, imageName, projectId, workItemId }, + level: 'warning', + }); + } +} + +/** + * Remove a container (used after manual snapshot commit to clean up). + * Swallows errors — the container may already be removed. + */ +async function removeContainer(containerId: string): Promise { + try { + const container = docker.getContainer(containerId); + await container.remove({ force: true }); + } catch { + // Container may already be removed — not an error + } +} + +interface SpawnSettings { + snapshotEnabled: boolean; + workerImage: string; + containerTimeoutMs: number; +} + +/** + * Resolve per-project spawn settings (snapshot flag, image, timeout). + * Centralises all loadProjectConfig() calls so spawnWorker stays simple. + */ +async function resolveSpawnSettings( + projectId: string | null, + workItemId: string | undefined, + jobId: string, +): Promise { + let snapshotEnabled = false; + let workerImage = routerConfig.workerImage; + let containerTimeoutMs = routerConfig.workerTimeoutMs; + + if (!projectId) return { snapshotEnabled, workerImage, containerTimeoutMs }; + + const { fullProjects } = await loadProjectConfig(); + const projectCfg = fullProjects.find((p) => p.id === projectId); + + // Project-level snapshotEnabled overrides the global default + snapshotEnabled = projectCfg?.snapshotEnabled ?? routerConfig.snapshotEnabled; + + if (snapshotEnabled && workItemId) { + const snapshot = getSnapshot(projectId, workItemId); + if (snapshot) { + logger.info('[WorkerManager] Snapshot hit — using snapshot image:', { + jobId, + imageName: snapshot.imageName, + projectId, + workItemId, + }); + workerImage = snapshot.imageName; + } else { + logger.info('[WorkerManager] Snapshot miss — using base worker image:', { + jobId, + projectId, + workItemId, + }); + } + } + + // Determine container timeout: use project's watchdogTimeoutMs + buffer if available, + // falling back to the global workerTimeoutMs. This makes watchdogTimeoutMs the single source + // of truth — the in-container watchdog fires first, router kill is a backup. + if (projectCfg?.watchdogTimeoutMs) { + containerTimeoutMs = projectCfg.watchdogTimeoutMs + ROUTER_KILL_BUFFER_MS; + } + + return { snapshotEnabled, workerImage, containerTimeoutMs }; +} + /** * Spawn a worker container for a job. * Sets up timeout tracking and monitors container exit asynchronously. + * + * Snapshot behaviour (when the project has snapshotEnabled): + * - Prefers a valid snapshot image over the base worker image when available. + * - Disables AutoRemove so the container can be committed on clean exit. + * - On successful exit, commits the container to a snapshot image. + * - On failed/timed-out exit, does NOT create a snapshot. */ export async function spawnWorker(job: Job): Promise { const jobId = job.id ?? `unknown-${Date.now()}`; @@ -69,35 +201,35 @@ export async function spawnWorker(job: Job): Promise { // (needed by orphan cleanup to narrow DB fallback queries to the right agent type) const agentType = extractAgentType(job.data); - // Determine container timeout: use project's watchdogTimeoutMs + buffer if available, - // falling back to the global workerTimeoutMs. This makes watchdogTimeoutMs the single source - // of truth — the in-container watchdog fires first, router kill is a backup. - let containerTimeoutMs = routerConfig.workerTimeoutMs; - if (projectId) { - const { fullProjects } = await loadProjectConfig(); - const projectCfg = fullProjects.find((p) => p.id === projectId); - if (projectCfg?.watchdogTimeoutMs) { - containerTimeoutMs = projectCfg.watchdogTimeoutMs + ROUTER_KILL_BUFFER_MS; - } - } + const workItemId = extractWorkItemId(job.data); + + const { snapshotEnabled, workerImage, containerTimeoutMs } = await resolveSpawnSettings( + projectId, + workItemId, + jobId, + ); logger.info('[WorkerManager] Spawning worker:', { jobId, type: job.data.type, containerName, hasCredentials, + snapshotEnabled, + workerImage, }); try { const container = await docker.createContainer({ - Image: routerConfig.workerImage, + Image: workerImage, name: containerName, Env: workerEnv, HostConfig: { Memory: routerConfig.workerMemoryMb * 1024 * 1024, MemorySwap: routerConfig.workerMemoryMb * 1024 * 1024, // No swap NetworkMode: routerConfig.dockerNetwork, - AutoRemove: true, // Clean up container on exit + // Disable AutoRemove for snapshot-enabled runs so the container remains + // available for docker commit after a successful exit. + AutoRemove: !snapshotEnabled, }, Labels: { 'cascade.job.id': jobId, @@ -105,6 +237,7 @@ export async function spawnWorker(job: Job): Promise { 'cascade.managed': 'true', 'cascade.project.id': projectId ?? '', 'cascade.agent.type': agentType ?? '', + 'cascade.snapshot.enabled': snapshotEnabled ? 'true' : 'false', }, }); @@ -129,7 +262,6 @@ export async function spawnWorker(job: Job): Promise { }, containerTimeoutMs); // Track the worker - const workItemId = extractWorkItemId(job.data); activeWorkers.set(jobId, { containerId: container.id, jobId, @@ -150,7 +282,7 @@ export async function spawnWorker(job: Job): Promise { container .wait() .then(async (result) => { - // Collect worker logs before auto-removal + // Collect worker logs before removal try { const logs = await container.logs({ stdout: true, @@ -179,6 +311,22 @@ export async function spawnWorker(job: Job): Promise { jobId, statusCode: result.StatusCode, }); + + // For snapshot-enabled runs, commit on clean exit and then remove the container. + // Failed or timed-out runs must NOT create a snapshot. + if (snapshotEnabled && projectId && workItemId) { + if (result.StatusCode === 0) { + await commitContainerToSnapshot(container.id, projectId, workItemId); + } else { + logger.info('[WorkerManager] Skipping snapshot commit after non-zero exit:', { + jobId, + statusCode: result.StatusCode, + }); + } + // Always remove the container manually since AutoRemove is disabled + await removeContainer(container.id); + } + cleanupWorker(jobId, result.StatusCode); }) .catch((err) => { @@ -187,6 +335,10 @@ export async function spawnWorker(job: Job): Promise { tags: { source: 'worker_wait', jobType: job.data.type }, extra: { jobId }, }); + // Ensure container is cleaned up even on wait error (snapshot runs only) + if (snapshotEnabled) { + removeContainer(container.id).catch(() => {}); + } cleanupWorker(jobId); }); } catch (err) { diff --git a/src/router/snapshot-manager.ts b/src/router/snapshot-manager.ts new file mode 100644 index 00000000..4d132f06 --- /dev/null +++ b/src/router/snapshot-manager.ts @@ -0,0 +1,96 @@ +/** + * Snapshot metadata manager for CASCADE worker containers. + * + * Tracks reusable snapshot metadata keyed by project and work item so the + * router can make reuse decisions safely. Snapshots allow repeat runs of + * the same work item to start from a pre-built Docker image instead of the + * base worker image, reducing setup time. + * + * This module is pure state management — no Docker API usage. + * Docker commit operations are triggered from container-manager.ts. + */ + +import { logger } from '../utils/logging.js'; + +export interface SnapshotMetadata { + /** Docker image name (e.g., 'cascade-snapshot-proj-1-card-abc:latest') */ + imageName: string; + /** Project ID this snapshot belongs to */ + projectId: string; + /** Work item ID this snapshot was built for */ + workItemId: string; + /** Wall-clock timestamp when the snapshot was created */ + createdAt: Date; +} + +/** In-memory snapshot registry keyed by `${projectId}:${workItemId}` */ +const snapshots = new Map(); + +function snapshotKey(projectId: string, workItemId: string): string { + return `${projectId}:${workItemId}`; +} + +/** + * Register or refresh snapshot metadata for a project+workItem pair. + * Overwrites any existing entry for the same key. + */ +export function registerSnapshot( + projectId: string, + workItemId: string, + imageName: string, +): SnapshotMetadata { + const key = snapshotKey(projectId, workItemId); + const metadata: SnapshotMetadata = { + imageName, + projectId, + workItemId, + createdAt: new Date(), + }; + snapshots.set(key, metadata); + logger.info('[SnapshotManager] Snapshot registered:', { + projectId, + workItemId, + imageName, + }); + return metadata; +} + +/** + * Look up snapshot metadata for a project+workItem pair. + * Returns undefined if no snapshot exists. + */ +export function getSnapshot(projectId: string, workItemId: string): SnapshotMetadata | undefined { + return snapshots.get(snapshotKey(projectId, workItemId)); +} + +/** + * Invalidate (remove) snapshot metadata for a project+workItem pair. + * Safe to call even if no snapshot exists. + */ +export function invalidateSnapshot(projectId: string, workItemId: string): void { + const key = snapshotKey(projectId, workItemId); + const hadEntry = snapshots.delete(key); + if (hadEntry) { + logger.info('[SnapshotManager] Snapshot invalidated:', { + projectId, + workItemId, + }); + } +} + +/** + * Return the number of currently registered snapshots. + * Primarily useful for tests and monitoring. + */ +export function getSnapshotCount(): number { + return snapshots.size; +} + +/** + * Clear all snapshot metadata. + * Intended for use in tests and clean-shutdown scenarios. + * @internal Visible for testing + */ +export function _clearAllSnapshots(): void { + snapshots.clear(); +} diff --git a/tests/unit/router/snapshot-integration.test.ts b/tests/unit/router/snapshot-integration.test.ts new file mode 100644 index 00000000..bd294481 --- /dev/null +++ b/tests/unit/router/snapshot-integration.test.ts @@ -0,0 +1,467 @@ +/** + * Tests for snapshot-related behaviour in container-manager.ts: + * - Snapshot-disabled projects use AutoRemove=true and the base worker image + * - Snapshot hit: uses snapshot image, AutoRemove=false + * - Snapshot miss: uses base image, AutoRemove=false + * - Successful exit: commits container to snapshot + * - Non-zero exit: does NOT commit container to snapshot + * - Container is removed after exit for snapshot-enabled runs + */ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Hoisted mock state +// --------------------------------------------------------------------------- + +const { + mockDockerCreateContainer, + mockDockerGetContainer, + mockLoadProjectConfig, + mockGetSnapshot, + mockRegisterSnapshot, +} = vi.hoisted(() => ({ + mockDockerCreateContainer: vi.fn(), + mockDockerGetContainer: vi.fn(), + mockLoadProjectConfig: vi.fn().mockResolvedValue({ projects: [], fullProjects: [] }), + mockGetSnapshot: vi.fn().mockReturnValue(undefined), + mockRegisterSnapshot: vi.fn(), +})); + +// --------------------------------------------------------------------------- +// Module-level mocks +// --------------------------------------------------------------------------- + +vi.mock('dockerode', () => ({ + default: vi.fn().mockImplementation(() => ({ + createContainer: mockDockerCreateContainer, + getContainer: mockDockerGetContainer, + })), +})); + +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), +})); + +vi.mock('../../../src/config/provider.js', () => ({ + findProjectByRepo: vi.fn(), + getAllProjectCredentials: vi.fn().mockResolvedValue({}), +})); + +vi.mock('../../../src/db/repositories/runsRepository.js', () => ({ + failOrphanedRun: vi.fn().mockResolvedValue(null), + failOrphanedRunFallback: vi.fn().mockResolvedValue(null), +})); + +vi.mock('../../../src/config/configCache.js', () => ({ + configCache: { + getConfig: vi.fn().mockReturnValue(null), + getProjectByBoardId: vi.fn().mockReturnValue(null), + getProjectByRepo: vi.fn().mockReturnValue(null), + setConfig: vi.fn(), + setProjectByBoardId: vi.fn(), + setProjectByRepo: vi.fn(), + invalidate: vi.fn(), + }, +})); + +vi.mock('../../../src/router/notifications.js', () => ({ + notifyTimeout: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock('../../../src/router/work-item-lock.js', () => ({ + clearWorkItemEnqueued: vi.fn(), + clearAllWorkItemLocks: vi.fn(), +})); + +vi.mock('../../../src/router/agent-type-lock.js', () => ({ + clearAgentTypeEnqueued: vi.fn(), + clearAllAgentTypeLocks: vi.fn(), +})); + +vi.mock('../../../src/router/snapshot-manager.js', () => ({ + getSnapshot: (...args: unknown[]) => mockGetSnapshot(...args), + registerSnapshot: (...args: unknown[]) => mockRegisterSnapshot(...args), + invalidateSnapshot: vi.fn(), +})); + +vi.mock('../../../src/router/config.js', () => ({ + routerConfig: { + redisUrl: 'redis://localhost:6379', + maxWorkers: 3, + workerImage: 'base-worker:latest', + workerMemoryMb: 512, + workerTimeoutMs: 5000, + dockerNetwork: 'test-network', + snapshotEnabled: false, + snapshotDefaultTtlMs: 86400000, + snapshotMaxCount: 5, + snapshotMaxSizeBytes: 10737418240, + }, + loadProjectConfig: (...args: unknown[]) => mockLoadProjectConfig(...args), +})); + +// --------------------------------------------------------------------------- +// Imports (after mocks) +// --------------------------------------------------------------------------- + +import { getAllProjectCredentials } from '../../../src/config/provider.js'; +import { detachAll, spawnWorker } from '../../../src/router/container-manager.js'; +import type { CascadeJob } from '../../../src/router/queue.js'; + +const mockGetAllProjectCredentials = vi.mocked(getAllProjectCredentials); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeJob(overrides: Partial<{ id: string; data: CascadeJob }> = {}) { + return { + id: overrides.id ?? 'job-snap-1', + data: + overrides.data ?? + ({ + type: 'trello', + projectId: 'proj-snap', + workItemId: 'card-snap', + } as CascadeJob), + }; +} + +function setupMockContainer(exitCode = 0) { + let resolveWait!: (v: { StatusCode: number }) => void; + const waitPromise = new Promise<{ StatusCode: number }>((res) => { + resolveWait = res; + }); + + const container = { + id: 'container-snap-abc123', + start: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockReturnValue(waitPromise), + logs: vi.fn().mockResolvedValue(Buffer.from('')), + stop: vi.fn().mockResolvedValue(undefined), + commit: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), + }; + + mockDockerCreateContainer.mockResolvedValue(container); + mockDockerGetContainer.mockReturnValue(container); + + return { + container, + resolveWait: (code = exitCode) => resolveWait({ StatusCode: code }), + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('spawnWorker — snapshot disabled', () => { + beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'info').mockImplementation(() => {}); + mockGetAllProjectCredentials.mockResolvedValue({}); + mockLoadProjectConfig.mockResolvedValue({ projects: [], fullProjects: [] }); + mockGetSnapshot.mockReturnValue(undefined); + detachAll(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + detachAll(); + }); + + it('uses base worker image and AutoRemove=true when snapshot is disabled for project', async () => { + // snapshotEnabled not set on project — defaults to routerConfig.snapshotEnabled (false) + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', watchdogTimeoutMs: undefined, snapshotEnabled: false }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + Image: 'base-worker:latest', + HostConfig: expect.objectContaining({ AutoRemove: true }), + }), + ); + + resolveWait(); + }); + + it('does NOT commit the container on successful exit when snapshot is disabled', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: false }], + }); + const { container, resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + resolveWait(0); + await new Promise((r) => setTimeout(r, 20)); + + expect(container.commit).not.toHaveBeenCalled(); + expect(mockRegisterSnapshot).not.toHaveBeenCalled(); + }); +}); + +describe('spawnWorker — snapshot miss (enabled, no existing snapshot)', () => { + beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'info').mockImplementation(() => {}); + mockGetAllProjectCredentials.mockResolvedValue({}); + mockGetSnapshot.mockReturnValue(undefined); // no snapshot in registry + detachAll(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + detachAll(); + }); + + it('falls back to base worker image when no snapshot exists', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ Image: 'base-worker:latest' }), + ); + + resolveWait(); + }); + + it('uses AutoRemove=false when snapshot is enabled (miss)', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + HostConfig: expect.objectContaining({ AutoRemove: false }), + }), + ); + + resolveWait(); + }); + + it('commits container on successful exit and removes it', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { container, resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + resolveWait(0); + await new Promise((r) => setTimeout(r, 20)); + + expect(container.commit).toHaveBeenCalledWith( + expect.objectContaining({ + repo: expect.stringContaining('cascade-snapshot-proj-snap-card-snap'), + tag: 'latest', + }), + ); + expect(container.remove).toHaveBeenCalled(); + }); + + it('does NOT commit on non-zero exit', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { container, resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + resolveWait(1); // non-zero exit + await new Promise((r) => setTimeout(r, 20)); + + expect(container.commit).not.toHaveBeenCalled(); + expect(mockRegisterSnapshot).not.toHaveBeenCalled(); + }); + + it('removes container even on non-zero exit (snapshot run cleanup)', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { container, resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + resolveWait(1); + await new Promise((r) => setTimeout(r, 20)); + + expect(container.remove).toHaveBeenCalled(); + }); +}); + +describe('spawnWorker — snapshot hit (existing snapshot)', () => { + beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'info').mockImplementation(() => {}); + mockGetAllProjectCredentials.mockResolvedValue({}); + detachAll(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + detachAll(); + }); + + it('uses the snapshot image when a snapshot exists', async () => { + mockGetSnapshot.mockReturnValue({ + imageName: 'cascade-snapshot-proj-snap-card-snap:latest', + projectId: 'proj-snap', + workItemId: 'card-snap', + createdAt: new Date(), + }); + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + Image: 'cascade-snapshot-proj-snap-card-snap:latest', + }), + ); + + resolveWait(); + }); + + it('uses AutoRemove=false when snapshot is enabled (hit)', async () => { + mockGetSnapshot.mockReturnValue({ + imageName: 'cascade-snapshot-proj-snap-card-snap:latest', + projectId: 'proj-snap', + workItemId: 'card-snap', + createdAt: new Date(), + }); + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + HostConfig: expect.objectContaining({ AutoRemove: false }), + }), + ); + + resolveWait(); + }); + + it('sets cascade.snapshot.enabled label to true', async () => { + mockGetSnapshot.mockReturnValue({ + imageName: 'cascade-snapshot-proj-snap-card-snap:latest', + projectId: 'proj-snap', + workItemId: 'card-snap', + createdAt: new Date(), + }); + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + Labels: expect.objectContaining({ + 'cascade.snapshot.enabled': 'true', + }), + }), + ); + + resolveWait(); + }); + + it('commits container on successful exit after snapshot hit', async () => { + mockGetSnapshot.mockReturnValue({ + imageName: 'cascade-snapshot-proj-snap-card-snap:latest', + projectId: 'proj-snap', + workItemId: 'card-snap', + createdAt: new Date(), + }); + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + const { container, resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + resolveWait(0); + await new Promise((r) => setTimeout(r, 20)); + + expect(container.commit).toHaveBeenCalled(); + expect(mockRegisterSnapshot).toHaveBeenCalledWith( + 'proj-snap', + 'card-snap', + expect.stringContaining('cascade-snapshot-proj-snap-card-snap'), + ); + }); +}); + +describe('spawnWorker — snapshot label on disabled project', () => { + beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'info').mockImplementation(() => {}); + mockGetAllProjectCredentials.mockResolvedValue({}); + mockGetSnapshot.mockReturnValue(undefined); + detachAll(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + detachAll(); + }); + + it('sets cascade.snapshot.enabled label to false when snapshot is disabled', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: false }], + }); + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob() as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + Labels: expect.objectContaining({ + 'cascade.snapshot.enabled': 'false', + }), + }), + ); + + resolveWait(); + }); +}); diff --git a/tests/unit/router/snapshot-manager.test.ts b/tests/unit/router/snapshot-manager.test.ts new file mode 100644 index 00000000..2e5f3287 --- /dev/null +++ b/tests/unit/router/snapshot-manager.test.ts @@ -0,0 +1,172 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Module-level mocks +// --------------------------------------------------------------------------- + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, +})); + +// --------------------------------------------------------------------------- +// Imports (after mocks) +// --------------------------------------------------------------------------- + +import { + _clearAllSnapshots, + getSnapshot, + getSnapshotCount, + invalidateSnapshot, + registerSnapshot, +} from '../../../src/router/snapshot-manager.js'; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('snapshot-manager', () => { + beforeEach(() => { + _clearAllSnapshots(); + }); + + afterEach(() => { + _clearAllSnapshots(); + vi.clearAllMocks(); + }); + + // ------------------------------------------------------------------------- + // registerSnapshot + // ------------------------------------------------------------------------- + + describe('registerSnapshot', () => { + it('registers snapshot metadata and returns it', () => { + const result = registerSnapshot('proj-1', 'card-abc', 'my-image:latest'); + + expect(result).toMatchObject({ + imageName: 'my-image:latest', + projectId: 'proj-1', + workItemId: 'card-abc', + }); + expect(result.createdAt).toBeInstanceOf(Date); + }); + + it('can register snapshots for different project+workItem pairs', () => { + registerSnapshot('proj-1', 'card-1', 'image-1:latest'); + registerSnapshot('proj-1', 'card-2', 'image-2:latest'); + registerSnapshot('proj-2', 'card-1', 'image-3:latest'); + + expect(getSnapshotCount()).toBe(3); + }); + + it('overwrites existing snapshot for the same key', () => { + registerSnapshot('proj-1', 'card-abc', 'old-image:latest'); + registerSnapshot('proj-1', 'card-abc', 'new-image:latest'); + + const snapshot = getSnapshot('proj-1', 'card-abc'); + expect(snapshot?.imageName).toBe('new-image:latest'); + expect(getSnapshotCount()).toBe(1); + }); + + it('sets createdAt to current time', () => { + const before = new Date(); + const result = registerSnapshot('proj-1', 'card-abc', 'img:latest'); + const after = new Date(); + + expect(result.createdAt.getTime()).toBeGreaterThanOrEqual(before.getTime()); + expect(result.createdAt.getTime()).toBeLessThanOrEqual(after.getTime()); + }); + }); + + // ------------------------------------------------------------------------- + // getSnapshot + // ------------------------------------------------------------------------- + + describe('getSnapshot', () => { + it('returns undefined when no snapshot exists', () => { + expect(getSnapshot('proj-missing', 'card-missing')).toBeUndefined(); + }); + + it('returns registered snapshot metadata', () => { + registerSnapshot('proj-1', 'card-abc', 'my-image:latest'); + + const snapshot = getSnapshot('proj-1', 'card-abc'); + expect(snapshot).toBeDefined(); + expect(snapshot?.imageName).toBe('my-image:latest'); + expect(snapshot?.projectId).toBe('proj-1'); + expect(snapshot?.workItemId).toBe('card-abc'); + }); + + it('returns undefined for a different project with the same workItem', () => { + registerSnapshot('proj-1', 'card-abc', 'my-image:latest'); + + expect(getSnapshot('proj-2', 'card-abc')).toBeUndefined(); + }); + + it('returns undefined for a different workItem with the same project', () => { + registerSnapshot('proj-1', 'card-abc', 'my-image:latest'); + + expect(getSnapshot('proj-1', 'card-xyz')).toBeUndefined(); + }); + }); + + // ------------------------------------------------------------------------- + // invalidateSnapshot + // ------------------------------------------------------------------------- + + describe('invalidateSnapshot', () => { + it('removes a registered snapshot', () => { + registerSnapshot('proj-1', 'card-abc', 'my-image:latest'); + invalidateSnapshot('proj-1', 'card-abc'); + + expect(getSnapshot('proj-1', 'card-abc')).toBeUndefined(); + expect(getSnapshotCount()).toBe(0); + }); + + it('is a no-op when no snapshot exists for the key', () => { + expect(() => invalidateSnapshot('proj-missing', 'card-missing')).not.toThrow(); + }); + + it('only removes the targeted snapshot, leaving others intact', () => { + registerSnapshot('proj-1', 'card-1', 'image-1:latest'); + registerSnapshot('proj-1', 'card-2', 'image-2:latest'); + + invalidateSnapshot('proj-1', 'card-1'); + + expect(getSnapshot('proj-1', 'card-1')).toBeUndefined(); + expect(getSnapshot('proj-1', 'card-2')).toBeDefined(); + expect(getSnapshotCount()).toBe(1); + }); + }); + + // ------------------------------------------------------------------------- + // getSnapshotCount + // ------------------------------------------------------------------------- + + describe('getSnapshotCount', () => { + it('returns 0 when no snapshots are registered', () => { + expect(getSnapshotCount()).toBe(0); + }); + + it('returns the correct count after registering multiple snapshots', () => { + registerSnapshot('proj-1', 'card-1', 'img-1:latest'); + registerSnapshot('proj-1', 'card-2', 'img-2:latest'); + registerSnapshot('proj-2', 'card-1', 'img-3:latest'); + + expect(getSnapshotCount()).toBe(3); + }); + + it('decrements after invalidation', () => { + registerSnapshot('proj-1', 'card-1', 'img-1:latest'); + registerSnapshot('proj-1', 'card-2', 'img-2:latest'); + + invalidateSnapshot('proj-1', 'card-1'); + + expect(getSnapshotCount()).toBe(1); + }); + }); +}); From 2f8c2013b87d19fd2c9ecdb02345fae71df68383 Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Tue, 24 Mar 2026 19:18:23 +0000 Subject: [PATCH 2/3] fix(router): prevent container orphan when snapshotEnabled=true but workItemId is undefined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix container leak: broaden success-path guard from `snapshotEnabled && projectId && workItemId` to just `snapshotEnabled`, so removeContainer() is always reached for snapshot-enabled runs regardless of whether workItemId is present — consistent with the .catch() error path. Commit is still gated on `projectId && workItemId` inside the block. - Add TTL eviction to getSnapshot(): expired entries (age > snapshotDefaultTtlMs) are eagerly deleted and undefined is returned, preventing unbounded Map growth. - Mock routerConfig in snapshot-manager unit tests and add TTL eviction tests. Co-Authored-By: Claude Opus 4.6 --- src/router/container-manager.ts | 8 +++-- src/router/snapshot-manager.ts | 22 ++++++++++++-- tests/unit/router/snapshot-manager.test.ts | 35 ++++++++++++++++++++++ 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index bad19f88..3a491c8d 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -314,10 +314,12 @@ export async function spawnWorker(job: Job): Promise { // For snapshot-enabled runs, commit on clean exit and then remove the container. // Failed or timed-out runs must NOT create a snapshot. - if (snapshotEnabled && projectId && workItemId) { - if (result.StatusCode === 0) { + // Always remove — even when projectId/workItemId are absent — to avoid + // orphaning containers that ran with AutoRemove=false. + if (snapshotEnabled) { + if (result.StatusCode === 0 && projectId && workItemId) { await commitContainerToSnapshot(container.id, projectId, workItemId); - } else { + } else if (result.StatusCode !== 0) { logger.info('[WorkerManager] Skipping snapshot commit after non-zero exit:', { jobId, statusCode: result.StatusCode, diff --git a/src/router/snapshot-manager.ts b/src/router/snapshot-manager.ts index 4d132f06..37a7e9b4 100644 --- a/src/router/snapshot-manager.ts +++ b/src/router/snapshot-manager.ts @@ -11,6 +11,7 @@ */ import { logger } from '../utils/logging.js'; +import { routerConfig } from './config.js'; export interface SnapshotMetadata { /** Docker image name (e.g., 'cascade-snapshot-proj-1-card-abc:latest') */ @@ -57,10 +58,27 @@ export function registerSnapshot( /** * Look up snapshot metadata for a project+workItem pair. - * Returns undefined if no snapshot exists. + * Returns undefined if no snapshot exists or if the snapshot has exceeded the + * configured TTL (snapshotDefaultTtlMs). Expired entries are removed eagerly. */ export function getSnapshot(projectId: string, workItemId: string): SnapshotMetadata | undefined { - return snapshots.get(snapshotKey(projectId, workItemId)); + const key = snapshotKey(projectId, workItemId); + const metadata = snapshots.get(key); + if (!metadata) return undefined; + + const ageMs = Date.now() - metadata.createdAt.getTime(); + if (ageMs > routerConfig.snapshotDefaultTtlMs) { + snapshots.delete(key); + logger.info('[SnapshotManager] Snapshot expired and evicted:', { + projectId, + workItemId, + ageMs, + ttlMs: routerConfig.snapshotDefaultTtlMs, + }); + return undefined; + } + + return metadata; } /** diff --git a/tests/unit/router/snapshot-manager.test.ts b/tests/unit/router/snapshot-manager.test.ts index 2e5f3287..6613e96c 100644 --- a/tests/unit/router/snapshot-manager.test.ts +++ b/tests/unit/router/snapshot-manager.test.ts @@ -13,10 +13,20 @@ vi.mock('../../../src/utils/logging.js', () => ({ }, })); +vi.mock('../../../src/router/config.js', () => ({ + routerConfig: { + snapshotEnabled: false, + snapshotDefaultTtlMs: 86400000, // 24h default + snapshotMaxCount: 5, + snapshotMaxSizeBytes: 10737418240, + }, +})); + // --------------------------------------------------------------------------- // Imports (after mocks) // --------------------------------------------------------------------------- +import { routerConfig } from '../../../src/router/config.js'; import { _clearAllSnapshots, getSnapshot, @@ -112,6 +122,31 @@ describe('snapshot-manager', () => { expect(getSnapshot('proj-1', 'card-xyz')).toBeUndefined(); }); + + it('returns undefined and evicts an expired snapshot', () => { + // Register a snapshot with a createdAt in the past + const expired = registerSnapshot('proj-1', 'card-abc', 'old-image:latest'); + // Backdate createdAt beyond the TTL + const originalTtl = routerConfig.snapshotDefaultTtlMs; + (routerConfig as { snapshotDefaultTtlMs: number }).snapshotDefaultTtlMs = 1000; + // Set createdAt to 2 seconds ago so it's past the 1000ms TTL + expired.createdAt = new Date(Date.now() - 2000); + + expect(getSnapshot('proj-1', 'card-abc')).toBeUndefined(); + expect(getSnapshotCount()).toBe(0); + + // Restore original TTL + (routerConfig as { snapshotDefaultTtlMs: number }).snapshotDefaultTtlMs = originalTtl; + }); + + it('returns a valid snapshot that has not yet expired', () => { + const snapshot = registerSnapshot('proj-1', 'card-abc', 'fresh-image:latest'); + // Ensure createdAt is very recent (just now) + snapshot.createdAt = new Date(); + + expect(getSnapshot('proj-1', 'card-abc')).toBeDefined(); + expect(getSnapshotCount()).toBe(1); + }); }); // ------------------------------------------------------------------------- From 7f39c1f1703a3f0bb99757ae8ee421679bfe2ecc Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Tue, 24 Mar 2026 19:32:09 +0000 Subject: [PATCH 3/3] fix(router): honour per-project snapshotTtlMs and remove orphaned snapshot containers - Add `ttlMs` parameter to `getSnapshot()` so callers can pass a per-project TTL override instead of always using the global default. `resolveSpawnSettings()` now reads `projectCfg?.snapshotTtlMs` and forwards it to `getSnapshot()`, so per-project TTL configuration via the DB/dashboard takes effect. - Call `container.remove()` after `container.stop()` in orphan cleanup so stopped snapshot containers (AutoRemove=false) are not left on disk. `remove()` failure is swallowed; on non-snapshot containers where AutoRemove=true Docker may have already removed them, making the no-op safe. - Update tests: existing orphan-cleanup mocks gain a `remove` mock; new tests verify remove behaviour and that per-project TTL is passed through the call chain. Co-Authored-By: Claude Opus 4.6 --- src/router/container-manager.ts | 11 ++- src/router/orphan-cleanup.ts | 11 ++- src/router/snapshot-manager.ts | 17 +++-- tests/unit/router/orphan-cleanup.test.ts | 67 ++++++++++++++++++- .../unit/router/snapshot-integration.test.ts | 46 +++++++++++++ tests/unit/router/snapshot-manager.test.ts | 26 +++++++ 6 files changed, 166 insertions(+), 12 deletions(-) diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index 3a491c8d..f9a542e6 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -126,6 +126,7 @@ interface SpawnSettings { snapshotEnabled: boolean; workerImage: string; containerTimeoutMs: number; + snapshotTtlMs: number; } /** @@ -140,8 +141,9 @@ async function resolveSpawnSettings( let snapshotEnabled = false; let workerImage = routerConfig.workerImage; let containerTimeoutMs = routerConfig.workerTimeoutMs; + let snapshotTtlMs = routerConfig.snapshotDefaultTtlMs; - if (!projectId) return { snapshotEnabled, workerImage, containerTimeoutMs }; + if (!projectId) return { snapshotEnabled, workerImage, containerTimeoutMs, snapshotTtlMs }; const { fullProjects } = await loadProjectConfig(); const projectCfg = fullProjects.find((p) => p.id === projectId); @@ -149,8 +151,11 @@ async function resolveSpawnSettings( // Project-level snapshotEnabled overrides the global default snapshotEnabled = projectCfg?.snapshotEnabled ?? routerConfig.snapshotEnabled; + // Per-project TTL overrides the global default + snapshotTtlMs = projectCfg?.snapshotTtlMs ?? routerConfig.snapshotDefaultTtlMs; + if (snapshotEnabled && workItemId) { - const snapshot = getSnapshot(projectId, workItemId); + const snapshot = getSnapshot(projectId, workItemId, snapshotTtlMs); if (snapshot) { logger.info('[WorkerManager] Snapshot hit — using snapshot image:', { jobId, @@ -175,7 +180,7 @@ async function resolveSpawnSettings( containerTimeoutMs = projectCfg.watchdogTimeoutMs + ROUTER_KILL_BUFFER_MS; } - return { snapshotEnabled, workerImage, containerTimeoutMs }; + return { snapshotEnabled, workerImage, containerTimeoutMs, snapshotTtlMs }; } /** diff --git a/src/router/orphan-cleanup.ts b/src/router/orphan-cleanup.ts index f8747a02..98a22bd8 100644 --- a/src/router/orphan-cleanup.ts +++ b/src/router/orphan-cleanup.ts @@ -98,14 +98,21 @@ export async function scanAndCleanupOrphans(): Promise { continue; } - // This is an orphan — stop it + // This is an orphan — stop it and remove it. + // Remove is called unconditionally after stop: for non-snapshot containers + // (AutoRemove=true) Docker may already have removed them, in which case + // remove() is a harmless no-op; for snapshot containers (AutoRemove=false) + // it ensures stopped containers don't accumulate on disk. try { const container = docker.getContainer(containerId); await container.stop({ t: 15 }); // 15 second graceful shutdown + await container.remove({ force: false }).catch(() => { + // Container may have been removed by Docker's AutoRemove — not an error + }); stoppedCount++; const ageMinutes = Math.round(ageMs / 60000); - logger.warn('[WorkerManager] Stopped orphaned container:', { + logger.warn('[WorkerManager] Stopped and removed orphaned container:', { containerId: containerId.slice(0, 12), ageMinutes, }); diff --git a/src/router/snapshot-manager.ts b/src/router/snapshot-manager.ts index 37a7e9b4..61c63ca4 100644 --- a/src/router/snapshot-manager.ts +++ b/src/router/snapshot-manager.ts @@ -59,21 +59,30 @@ export function registerSnapshot( /** * Look up snapshot metadata for a project+workItem pair. * Returns undefined if no snapshot exists or if the snapshot has exceeded the - * configured TTL (snapshotDefaultTtlMs). Expired entries are removed eagerly. + * effective TTL. Expired entries are removed eagerly. + * + * @param ttlMs - Effective TTL in milliseconds. Callers should pass + * `projectCfg?.snapshotTtlMs ?? routerConfig.snapshotDefaultTtlMs` so that + * per-project TTL overrides are honoured. Defaults to the global + * `snapshotDefaultTtlMs` when omitted. */ -export function getSnapshot(projectId: string, workItemId: string): SnapshotMetadata | undefined { +export function getSnapshot( + projectId: string, + workItemId: string, + ttlMs: number = routerConfig.snapshotDefaultTtlMs, +): SnapshotMetadata | undefined { const key = snapshotKey(projectId, workItemId); const metadata = snapshots.get(key); if (!metadata) return undefined; const ageMs = Date.now() - metadata.createdAt.getTime(); - if (ageMs > routerConfig.snapshotDefaultTtlMs) { + if (ageMs > ttlMs) { snapshots.delete(key); logger.info('[SnapshotManager] Snapshot expired and evicted:', { projectId, workItemId, ageMs, - ttlMs: routerConfig.snapshotDefaultTtlMs, + ttlMs, }); return undefined; } diff --git a/tests/unit/router/orphan-cleanup.test.ts b/tests/unit/router/orphan-cleanup.test.ts index 020ecb42..e5b3d0bc 100644 --- a/tests/unit/router/orphan-cleanup.test.ts +++ b/tests/unit/router/orphan-cleanup.test.ts @@ -143,13 +143,14 @@ describe('orphan-cleanup', () => { expect(mockDockerGetContainer).not.toHaveBeenCalled(); }); - it('stops orphaned containers older than workerTimeoutMs', async () => { + it('stops and removes orphaned containers older than workerTimeoutMs', async () => { const orphanContainerId = 'orphan-container-old'; const now = Math.floor(Date.now() / 1000); const createdAt = now - 6; // 6 seconds old, workerTimeoutMs is 5000ms const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -163,6 +164,56 @@ describe('orphan-cleanup', () => { await scanAndCleanupOrphans(); expect(mockOrphanContainer.stop).toHaveBeenCalledWith({ t: 15 }); + expect(mockOrphanContainer.remove).toHaveBeenCalled(); + }); + + it('removes orphaned container after stopping (prevents disk accumulation for AutoRemove=false containers)', async () => { + const orphanContainerId = 'orphan-snapshot-container'; + const now = Math.floor(Date.now() / 1000); + const createdAt = now - 6; + + const mockOrphanContainer = { + stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), + }; + mockDockerListContainers.mockResolvedValue([ + { + Id: orphanContainerId, + Created: createdAt, + Labels: { 'cascade.snapshot.enabled': 'true' }, + State: 'running', + } as never, + ]); + mockDockerGetContainer.mockReturnValue(mockOrphanContainer as never); + + await scanAndCleanupOrphans(); + + expect(mockOrphanContainer.stop).toHaveBeenCalledWith({ t: 15 }); + expect(mockOrphanContainer.remove).toHaveBeenCalled(); + }); + + it('swallows remove errors (container may already be removed by AutoRemove)', async () => { + const orphanContainerId = 'orphan-already-removed'; + const now = Math.floor(Date.now() / 1000); + const createdAt = now - 6; + + const mockOrphanContainer = { + stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockRejectedValue(new Error('no such container')), + }; + mockDockerListContainers.mockResolvedValue([ + { + Id: orphanContainerId, + Created: createdAt, + State: 'running', + } as never, + ]); + mockDockerGetContainer.mockReturnValue(mockOrphanContainer as never); + + // Should not throw even if remove fails + await expect(scanAndCleanupOrphans()).resolves.toBeUndefined(); + expect(mockOrphanContainer.stop).toHaveBeenCalled(); + expect(mockOrphanContainer.remove).toHaveBeenCalled(); }); it('leaves young orphaned containers alone', async () => { @@ -201,6 +252,7 @@ describe('orphan-cleanup', () => { const mockFailContainer = { stop: vi.fn().mockRejectedValue(new Error('already stopped')), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -221,9 +273,11 @@ describe('orphan-cleanup', () => { const mockContainer1 = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; const mockContainer2 = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ @@ -242,7 +296,7 @@ describe('orphan-cleanup', () => { mockDockerGetContainer.mockImplementation((id: string) => { if (id === 'orphan-1') return mockContainer1 as never; if (id === 'orphan-2') return mockContainer2 as never; - return null; + return { stop: vi.fn(), remove: vi.fn() } as never; }); await scanAndCleanupOrphans(); @@ -258,6 +312,7 @@ describe('orphan-cleanup', () => { const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -291,6 +346,7 @@ describe('orphan-cleanup', () => { const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -314,6 +370,7 @@ describe('orphan-cleanup', () => { const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -338,6 +395,7 @@ describe('orphan-cleanup', () => { const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -373,6 +431,7 @@ describe('orphan-cleanup', () => { const mockOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ { @@ -405,9 +464,11 @@ describe('orphan-cleanup', () => { const now = Math.floor(Date.now() / 1000); const mockedOrphanContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; const mockedYoungContainer = { stop: vi.fn().mockResolvedValue(undefined), + remove: vi.fn().mockResolvedValue(undefined), }; mockDockerListContainers.mockResolvedValue([ @@ -431,7 +492,7 @@ describe('orphan-cleanup', () => { mockDockerGetContainer.mockImplementation((id: string) => { if (id === 'orphan-old') return mockedOrphanContainer as never; if (id === 'orphan-young') return mockedYoungContainer as never; - return { stop: vi.fn() } as never; + return { stop: vi.fn(), remove: vi.fn() } as never; }); await scanAndCleanupOrphans(); diff --git a/tests/unit/router/snapshot-integration.test.ts b/tests/unit/router/snapshot-integration.test.ts index bd294481..01a2bbe7 100644 --- a/tests/unit/router/snapshot-integration.test.ts +++ b/tests/unit/router/snapshot-integration.test.ts @@ -429,6 +429,52 @@ describe('spawnWorker — snapshot hit (existing snapshot)', () => { }); }); +describe('spawnWorker — per-project snapshotTtlMs forwarded to getSnapshot', () => { + beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'info').mockImplementation(() => {}); + mockGetAllProjectCredentials.mockResolvedValue({}); + mockGetSnapshot.mockReturnValue(undefined); + detachAll(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + detachAll(); + }); + + it('passes per-project snapshotTtlMs as the ttlMs arg to getSnapshot', async () => { + const projectSnapshotTtlMs = 3600000; // 1 hour (overrides global 24h default) + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [ + { id: 'proj-snap', snapshotEnabled: true, snapshotTtlMs: projectSnapshotTtlMs }, + ], + }); + setupMockContainer(); + + await spawnWorker(makeJob() as never); + + // getSnapshot should have been called with the project's TTL, not the global default + expect(mockGetSnapshot).toHaveBeenCalledWith('proj-snap', 'card-snap', projectSnapshotTtlMs); + }); + + it('passes global snapshotDefaultTtlMs when project has no snapshotTtlMs', async () => { + mockLoadProjectConfig.mockResolvedValue({ + projects: [], + fullProjects: [{ id: 'proj-snap', snapshotEnabled: true }], + }); + setupMockContainer(); + + await spawnWorker(makeJob() as never); + + // getSnapshot should have been called with the global default TTL (86400000) + expect(mockGetSnapshot).toHaveBeenCalledWith('proj-snap', 'card-snap', 86400000); + }); +}); + describe('spawnWorker — snapshot label on disabled project', () => { beforeEach(() => { vi.spyOn(console, 'log').mockImplementation(() => {}); diff --git a/tests/unit/router/snapshot-manager.test.ts b/tests/unit/router/snapshot-manager.test.ts index 6613e96c..3bca0613 100644 --- a/tests/unit/router/snapshot-manager.test.ts +++ b/tests/unit/router/snapshot-manager.test.ts @@ -147,6 +147,32 @@ describe('snapshot-manager', () => { expect(getSnapshot('proj-1', 'card-abc')).toBeDefined(); expect(getSnapshotCount()).toBe(1); }); + + it('respects an explicit ttlMs override (per-project TTL)', () => { + // Register a snapshot created 2 seconds ago + const snap = registerSnapshot('proj-1', 'card-abc', 'img:latest'); + snap.createdAt = new Date(Date.now() - 2000); + + // With a 5-second TTL the snapshot is still fresh + expect(getSnapshot('proj-1', 'card-abc', 5000)).toBeDefined(); + // With a 1-second TTL the snapshot is expired + expect(getSnapshot('proj-1', 'card-abc', 1000)).toBeUndefined(); + expect(getSnapshotCount()).toBe(0); + }); + + it('uses global snapshotDefaultTtlMs when no ttlMs is passed', () => { + const originalTtl = routerConfig.snapshotDefaultTtlMs; + (routerConfig as { snapshotDefaultTtlMs: number }).snapshotDefaultTtlMs = 1000; + + const snap = registerSnapshot('proj-1', 'card-abc', 'img:latest'); + snap.createdAt = new Date(Date.now() - 2000); + + // No ttlMs argument — should fall back to routerConfig.snapshotDefaultTtlMs (1000ms) + expect(getSnapshot('proj-1', 'card-abc')).toBeUndefined(); + expect(getSnapshotCount()).toBe(0); + + (routerConfig as { snapshotDefaultTtlMs: number }).snapshotDefaultTtlMs = originalTtl; + }); }); // -------------------------------------------------------------------------