diff --git a/src/agents/shared/cleanup.ts b/src/agents/shared/cleanup.ts index c0b2e6d4..9967507f 100644 --- a/src/agents/shared/cleanup.ts +++ b/src/agents/shared/cleanup.ts @@ -22,8 +22,12 @@ export function cleanupAgentResources( clearWatchdogCleanup(); const isLocalMode = process.env.CASCADE_LOCAL_MODE === 'true'; + // Preserve the workspace when the router will commit this container to a snapshot image. + // If the workspace is deleted before process exit, docker commit captures an empty + // /workspace and the snapshot fails to provide any reuse benefit. + const isSnapshotEnabled = process.env.CASCADE_SNAPSHOT_ENABLED === 'true'; - if (repoDir && !isLocalMode && !skipRepoDeletion) { + if (repoDir && !isLocalMode && !skipRepoDeletion && !isSnapshotEnabled) { try { cleanupTempDir(repoDir); } catch (err) { diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index 6911d64a..d4ea8ba1 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -380,7 +380,12 @@ export async function spawnWorker(job: Job): Promise { // A snapshot is being reused when snapshotEnabled and the image differs from the base image. const snapshotReuse = snapshotEnabled && workerImage !== routerConfig.workerImage; - const workerEnv = await buildWorkerEnvWithProjectId(job, projectId, snapshotReuse); + const workerEnv = await buildWorkerEnvWithProjectId( + job, + projectId, + snapshotReuse, + snapshotEnabled, + ); const hasCredentials = workerEnv.some((e) => e.startsWith('CASCADE_CREDENTIAL_KEYS=')); logger.info('[WorkerManager] Spawning worker:', { @@ -418,7 +423,7 @@ export async function spawnWorker(job: Job): Promise { { jobId, staleImage: workerImage }, ); invalidateSnapshot(projectId, workItemId); - const fallbackEnv = await buildWorkerEnvWithProjectId(job, projectId, false); + const fallbackEnv = await buildWorkerEnvWithProjectId(job, projectId, false, snapshotEnabled); const fallbackConfig: ContainerLaunchConfig = { workerImage: routerConfig.workerImage, snapshotEnabled, diff --git a/src/router/worker-env.ts b/src/router/worker-env.ts index 2bc4ba14..d7b7f055 100644 --- a/src/router/worker-env.ts +++ b/src/router/worker-env.ts @@ -80,11 +80,15 @@ function appendOptionalEnvVars(env: string[]): void { * * @param snapshotReuse - When true, injects CASCADE_SNAPSHOT_REUSE=true so the * worker knows to refresh an existing workspace instead of cloning from scratch. + * @param snapshotEnabled - When true, injects CASCADE_SNAPSHOT_ENABLED=true so the + * worker preserves its workspace on exit (the router will commit the container to + * a snapshot image after the worker exits). */ export async function buildWorkerEnvWithProjectId( job: Job, projectId: string | null, snapshotReuse = false, + snapshotEnabled = false, ): Promise { const env: string[] = [ `JOB_ID=${job.id}`, @@ -106,6 +110,13 @@ export async function buildWorkerEnvWithProjectId( env.push('CASCADE_SNAPSHOT_REUSE=true'); } + // Signal that this container will be committed to a snapshot image after exit. + // The worker must NOT delete its workspace so the directory is present when the + // router calls docker commit (after container.wait() resolves). + if (snapshotEnabled) { + env.push('CASCADE_SNAPSHOT_ENABLED=true'); + } + // Resolve project credentials in the router and set as individual env vars. // NOTE: CREDENTIAL_MASTER_KEY is intentionally NOT passed to workers. if (projectId) { diff --git a/tests/unit/agents/cleanup.test.ts b/tests/unit/agents/cleanup.test.ts new file mode 100644 index 00000000..78adaddd --- /dev/null +++ b/tests/unit/agents/cleanup.test.ts @@ -0,0 +1,120 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Module-level mocks +// --------------------------------------------------------------------------- + +const mockCleanupTempDir = vi.fn(); +const mockCleanupLogFile = vi.fn(); +const mockCleanupLogDirectory = vi.fn(); +const mockClearWatchdogCleanup = vi.fn(); + +vi.mock('../../../src/utils/repo.js', () => ({ + cleanupTempDir: (...args: unknown[]) => mockCleanupTempDir(...args), +})); + +vi.mock('../../../src/utils/fileLogger.js', () => ({ + cleanupLogFile: (...args: unknown[]) => mockCleanupLogFile(...args), + cleanupLogDirectory: (...args: unknown[]) => mockCleanupLogDirectory(...args), +})); + +vi.mock('../../../src/utils/lifecycle.js', () => ({ + clearWatchdogCleanup: () => mockClearWatchdogCleanup(), +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +// --------------------------------------------------------------------------- +// Imports (after mocks) +// --------------------------------------------------------------------------- + +import { cleanupAgentResources } from '../../../src/agents/shared/cleanup.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeFakeFileLogger() { + return { + logPath: '/tmp/cascade-test.log', + engineLogPath: '/tmp/cascade-test-engine.log', + llmCallLogger: { logDir: '/tmp/cascade-test-llm' }, + } as never; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('cleanupAgentResources', () => { + beforeEach(() => { + mockCleanupTempDir.mockClear(); + mockCleanupLogFile.mockClear(); + mockCleanupLogDirectory.mockClear(); + mockClearWatchdogCleanup.mockClear(); + process.env.CASCADE_LOCAL_MODE = undefined; + process.env.CASCADE_SNAPSHOT_ENABLED = undefined; + }); + + afterEach(() => { + process.env.CASCADE_LOCAL_MODE = undefined; + process.env.CASCADE_SNAPSHOT_ENABLED = undefined; + }); + + it('deletes workspace and log files on a normal run', () => { + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger()); + expect(mockCleanupTempDir).toHaveBeenCalledWith('/workspace/cascade-proj-123'); + expect(mockCleanupLogFile).toHaveBeenCalledTimes(2); + expect(mockCleanupLogDirectory).toHaveBeenCalledTimes(1); + }); + + it('skips workspace deletion when CASCADE_SNAPSHOT_ENABLED=true', () => { + process.env.CASCADE_SNAPSHOT_ENABLED = 'true'; + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger()); + expect(mockCleanupTempDir).not.toHaveBeenCalled(); + }); + + it('still cleans up log files when CASCADE_SNAPSHOT_ENABLED=true', () => { + process.env.CASCADE_SNAPSHOT_ENABLED = 'true'; + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger()); + expect(mockCleanupLogFile).toHaveBeenCalledTimes(2); + expect(mockCleanupLogDirectory).toHaveBeenCalledTimes(1); + }); + + it('skips workspace deletion when skipRepoDeletion=true', () => { + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger(), true); + expect(mockCleanupTempDir).not.toHaveBeenCalled(); + }); + + it('skips workspace deletion when CASCADE_LOCAL_MODE=true', () => { + process.env.CASCADE_LOCAL_MODE = 'true'; + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger()); + expect(mockCleanupTempDir).not.toHaveBeenCalled(); + // Log files are also skipped in local mode + expect(mockCleanupLogFile).not.toHaveBeenCalled(); + }); + + it('does nothing when repoDir is null', () => { + cleanupAgentResources(null, makeFakeFileLogger()); + expect(mockCleanupTempDir).not.toHaveBeenCalled(); + // Log files are still cleaned up + expect(mockCleanupLogFile).toHaveBeenCalledTimes(2); + }); + + it('always calls clearWatchdogCleanup', () => { + cleanupAgentResources(null, makeFakeFileLogger()); + expect(mockClearWatchdogCleanup).toHaveBeenCalledOnce(); + }); + + it('logs a warning when cleanupTempDir throws', () => { + mockCleanupTempDir.mockImplementation(() => { + throw new Error('permission denied'); + }); + // Should not throw + expect(() => + cleanupAgentResources('/workspace/cascade-proj-123', makeFakeFileLogger()), + ).not.toThrow(); + }); +}); diff --git a/tests/unit/router/worker-env.test.ts b/tests/unit/router/worker-env.test.ts index 41e71757..13b4ec64 100644 --- a/tests/unit/router/worker-env.test.ts +++ b/tests/unit/router/worker-env.test.ts @@ -299,3 +299,35 @@ describe('buildWorkerEnvWithProjectId — snapshotReuse flag', () => { expect(env).toContain('REDIS_URL=redis://localhost:6379'); }); }); + +// --------------------------------------------------------------------------- +// buildWorkerEnvWithProjectId — snapshotEnabled flag +// --------------------------------------------------------------------------- + +describe('buildWorkerEnvWithProjectId — snapshotEnabled flag', () => { + beforeEach(() => { + mockGetAllProjectCredentials.mockResolvedValue({}); + }); + + it('omits CASCADE_SNAPSHOT_ENABLED when snapshotEnabled=false (default)', async () => { + const env = await buildWorkerEnvWithProjectId(makeJob() as never, 'proj-1'); + expect(env.some((e) => e.startsWith('CASCADE_SNAPSHOT_ENABLED='))).toBe(false); + }); + + it('includes CASCADE_SNAPSHOT_ENABLED=true when snapshotEnabled=true', async () => { + const env = await buildWorkerEnvWithProjectId(makeJob() as never, 'proj-1', false, true); + expect(env).toContain('CASCADE_SNAPSHOT_ENABLED=true'); + }); + + it('can combine CASCADE_SNAPSHOT_REUSE and CASCADE_SNAPSHOT_ENABLED', async () => { + const env = await buildWorkerEnvWithProjectId(makeJob() as never, 'proj-1', true, true); + expect(env).toContain('CASCADE_SNAPSHOT_REUSE=true'); + expect(env).toContain('CASCADE_SNAPSHOT_ENABLED=true'); + }); + + it('omits CASCADE_SNAPSHOT_ENABLED when snapshotReuse=true but snapshotEnabled=false', async () => { + const env = await buildWorkerEnvWithProjectId(makeJob() as never, 'proj-1', true, false); + expect(env).toContain('CASCADE_SNAPSHOT_REUSE=true'); + expect(env.some((e) => e.startsWith('CASCADE_SNAPSHOT_ENABLED='))).toBe(false); + }); +});