From dfd52714d598bd1b68c45c067e674481607fdd89 Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Wed, 29 Apr 2026 15:49:01 +0000 Subject: [PATCH] fix(router): sanitize Docker-invalid chars in jobId when building worker container name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live regression on prod: PR #1226 introduced coalesced-job IDs shaped `coalesce:${projectId}:${workItemId}` (e.g. `coalesce:ucho:MNG-413`). `spawnWorker()` blindly concatenated this into the Docker container name (`cascade-worker-coalesce:ucho:MNG-413`), which Docker rejected with HTTP 400 "bad parameter — Invalid container name (...), only [a-zA-Z0-9][a-zA-Z0-9_.-] are allowed". Every coalesced job that fired post-deploy failed at `docker createContainer`, the worker never started, the run was lost. Two fixes considered: - Change the BullMQ jobId format upstream in `queue.ts` to use a Docker-safe separator. Risk: jobIds appear in logs / dedup keys / metrics; switching the canonical form widens the blast radius. - Sanitize at the spawn site only. Lower risk: the original jobId stays intact for logs and dedup; only the Docker-name derivation gets normalized. Chose the second. Replace any character not in `[a-zA-Z0-9_.-]` with `_`. The leading char is `cascade-worker-...` so the Docker-required leading `[a-zA-Z0-9]` is already satisfied. Tests: - New regression pin: jobId `coalesce:ucho:MNG-413` → container name `cascade-worker-coalesce_ucho_MNG-413` (the live failing case). - New positive pin: ordinary jobIds (alphanumeric + dashes/dots/underscores) pass through unchanged. Hotfix scope. Going straight to dev with a tight diff. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/router/container-manager.ts | 8 ++++- tests/unit/router/container-manager.test.ts | 37 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index ee75e168..79737e36 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -480,7 +480,13 @@ async function createAndMonitorContainer( */ export async function spawnWorker(job: Job): Promise { const jobId = job.id ?? `unknown-${Date.now()}`; - const containerName = `cascade-worker-${jobId}`; + // Docker container names accept only `[a-zA-Z0-9][a-zA-Z0-9_.-]`. PR #1226 + // introduced coalesced-job IDs shaped `coalesce:${projectId}:${workItemId}` + // where the colons crashed `createContainer` with HTTP 400 — every coalesced + // job that fired post-deploy failed to spawn. Sanitize disallowed chars to + // underscores; the original `jobId` stays intact in logs and dedup keys. + const containerSafeJobId = jobId.replace(/[^a-zA-Z0-9_.-]/g, '_'); + const containerName = `cascade-worker-${containerSafeJobId}`; // Resolve projectId once — used for both credential env and work-item lock tracking const projectId = await extractProjectIdFromJob(job.data); diff --git a/tests/unit/router/container-manager.test.ts b/tests/unit/router/container-manager.test.ts index 79840f64..01a03e6a 100644 --- a/tests/unit/router/container-manager.test.ts +++ b/tests/unit/router/container-manager.test.ts @@ -291,6 +291,43 @@ describe('spawnWorker', () => { resolveWait(); }); + it('sanitizes Docker-invalid characters in jobId when building the container name', async () => { + // Live regression: PR #1226 introduced jobIds shaped `coalesce:${projectId}:${workItemId}` + // (e.g. `coalesce:ucho:MNG-413`). Docker container names allow only + // `[a-zA-Z0-9][a-zA-Z0-9_.-]`, so the colons crashed `createContainer` + // with HTTP 400 "bad parameter — Invalid container name". Every + // coalesced job that fired post-deploy failed to spawn its worker. + // Sanitization replaces the rejected chars (colons here, plus any future + // shape-shift) with underscores. + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob({ id: 'coalesce:ucho:MNG-413' }) as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'cascade-worker-coalesce_ucho_MNG-413', + }), + ); + + resolveWait(); + }); + + it('passes through Docker-safe jobIds unchanged in the container name', async () => { + // Regression pin: ordinary jobIds (BullMQ default UUIDs, plain strings, + // hyphens, dots, underscores) must not be mangled by the sanitizer. + const { resolveWait } = setupMockContainer(); + + await spawnWorker(makeJob({ id: 'github-1234567890abcdef' }) as never); + + expect(mockDockerCreateContainer).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'cascade-worker-github-1234567890abcdef', + }), + ); + + resolveWait(); + }); + it('cleans up worker after container exits', async () => { const { resolveWait } = setupMockContainer();