SemiAnalysisAI · adibarra · May 1, 2026 · May 1, 2026
diff --git a/packages/db/src/apply-overrides.ts b/packages/db/src/apply-overrides.ts
@@ -2,6 +2,7 @@
  * Enforce all run-overrides.ts entries against the DB:
  *   1. Patch conclusions for CONCLUSION_OVERRIDES
  *   2. Purge runs listed in PURGED_RUNS
+ *   3. Purge specific attempts listed in PURGED_RUN_ATTEMPTS
  *
  * Previews changes (read-only), then confirms before writing.
  *
@@ -12,7 +13,7 @@
 
 import { confirm, hasNoSslFlag, hasYesFlag } from './cli-utils.js';
 import { type Sql, createAdminSql, refreshLatestBenchmarks } from './etl/db-utils.js';
-import { CONCLUSION_OVERRIDES, PURGED_RUNS } from './etl/run-overrides.js';
+import { CONCLUSION_OVERRIDES, PURGED_RUN_ATTEMPTS, PURGED_RUNS } from './etl/run-overrides.js';
 
 const sql = createAdminSql({
   noSsl: hasNoSslFlag(),
@@ -75,21 +76,40 @@ interface PurgeTarget {
   changelogs: number;
 }
 
-/** Preview a run: print metadata and row counts. Returns null if not in DB. */
-async function previewPurge(githubRunId: number): Promise<PurgeTarget | null> {
-  const runs = await sql`
-    SELECT id, run_attempt, date::text AS date, name, conclusion
-    FROM workflow_runs
-    WHERE github_run_id = ${githubRunId}
-    ORDER BY run_attempt
-  `;
+/**
+ * Preview a run: print metadata and row counts. Returns null if not in DB.
+ * If `attempts` is provided, only those `run_attempt` values are targeted;
+ * otherwise every attempt for the run is included.
+ */
+async function previewPurge(
+  githubRunId: number,
+  attempts?: ReadonlySet<number>,
+): Promise<PurgeTarget | null> {
+  const runs = attempts
+    ? await sql`
+        SELECT id, run_attempt, date::text AS date, name, conclusion
+        FROM workflow_runs
+        WHERE github_run_id = ${githubRunId}
+          AND run_attempt = ANY(${[...attempts]})
+        ORDER BY run_attempt
+      `
+    : await sql`
+        SELECT id, run_attempt, date::text AS date, name, conclusion
+        FROM workflow_runs
+        WHERE github_run_id = ${githubRunId}
+        ORDER BY run_attempt
+      `;
   if (runs.length === 0) {
-    console.log(`  ${githubRunId} — not in DB, skipping.`);
+    const suffix = attempts ? ` attempts ${[...attempts].toSorted().join(',')}` : '';
+    console.log(`  ${githubRunId}${suffix} — not in DB, skipping.`);
     return null;
   }
 
   const wrIds = runs.map((r) => r.id as number);
-  console.log(`  ${githubRunId}`);
+  const header = attempts
+    ? `${githubRunId} (attempts ${runs.map((r) => r.run_attempt).join(',')})`
+    : `${githubRunId}`;
+  console.log(`  ${header}`);
   for (const r of runs) {
     const shortName = r.name.split('\n')[0].slice(0, 80);
     console.log(
@@ -120,8 +140,12 @@ async function previewPurge(githubRunId: number): Promise<PurgeTarget | null> {
   };
 }
 
-/** Delete all data for a run within a transaction. */
-async function purge(githubRunId: number, wrIds: number[]): Promise<void> {
+/**
+ * Delete data for the given workflow_run rows (one or more attempts) in a transaction.
+ * `wrIds` is the set of `workflow_runs.id` values to remove; sibling attempts of the
+ * same `github_run_id` that aren't in `wrIds` are left intact.
+ */
+async function purge(wrIds: number[]): Promise<void> {
   // postgres TransactionSql Omit drops the call signature — cast to Sql type
   await sql.begin(async (_tx) => {
     const tx = _tx as unknown as Sql;
@@ -192,8 +216,9 @@ async function purge(githubRunId: number, wrIds: number[]): Promise<void> {
       `;
     }
 
-    // Parent last
-    await tx`DELETE FROM workflow_runs WHERE github_run_id = ${githubRunId}`;
+    // Parent last (target the specific workflow_runs rows so partial purges
+    // leave sibling attempts of the same github_run_id intact)
+    await tx`DELETE FROM workflow_runs WHERE id = ANY(${wrIds})`;
   });
 
   console.log(`    deleted.`);
@@ -227,6 +252,20 @@ async function main(): Promise<void> {
       if (result) found.push(result);
     }
   }
+
+  const attemptTargets = [...PURGED_RUN_ATTEMPTS.entries()];
+  if (attemptTargets.length > 0) {
+    console.log(`\n  Purge attempt targets (${attemptTargets.length}):`);
+    for (const [id, attempts] of attemptTargets) {
+      // Skip if the whole run is already covered by PURGED_RUNS
+      if (PURGED_RUNS.has(id)) {
+        console.log(`  ${id} — already in PURGED_RUNS, skipping per-attempt purge.`);
+        continue;
+      }
+      const result = await previewPurge(id, attempts);
+      if (result) found.push(result);
+    }
+  }
   if (found.length > 0) hasWork = true;
 
   if (!hasWork) {
@@ -251,8 +290,8 @@ async function main(): Promise<void> {
 
   if (found.length > 0) {
     console.log('\n  Purging runs...');
-    for (const { githubRunId, wrIds } of found) {
-      await purge(githubRunId, wrIds);
+    for (const { wrIds } of found) {
+      await purge(wrIds);
     }
   }
 

diff --git a/packages/db/src/etl/run-overrides.test.ts b/packages/db/src/etl/run-overrides.test.ts
@@ -1,5 +1,10 @@
 import { describe, it, expect } from 'vitest';
-import { CONCLUSION_OVERRIDES, PURGED_RUNS } from './run-overrides';
+import {
+  CONCLUSION_OVERRIDES,
+  PURGED_RUN_ATTEMPTS,
+  PURGED_RUNS,
+  isRunAttemptPurged,
+} from './run-overrides';
 
 describe('CONCLUSION_OVERRIDES', () => {
   it('all run IDs are positive integers', () => {
@@ -34,3 +39,65 @@ describe('PURGED_RUNS', () => {
     }
   });
 });
+
+describe('PURGED_RUN_ATTEMPTS', () => {
+  it('all run IDs and attempt numbers are positive integers', () => {
+    for (const [runId, attempts] of PURGED_RUN_ATTEMPTS) {
+      expect(runId).toBeGreaterThan(0);
+      expect(Number.isInteger(runId)).toBe(true);
+      expect(attempts.size).toBeGreaterThan(0);
+      for (const attempt of attempts) {
+        expect(attempt).toBeGreaterThan(0);
+        expect(Number.isInteger(attempt)).toBe(true);
+      }
+    }
+  });
+
+  it('does not overlap with PURGED_RUNS (use one or the other)', () => {
+    for (const runId of PURGED_RUN_ATTEMPTS.keys()) {
+      expect(
+        PURGED_RUNS.has(runId),
+        `run ${runId} appears in both PURGED_RUNS and PURGED_RUN_ATTEMPTS`,
+      ).toBe(false);
+    }
+  });
+
+  it('does not overlap with CONCLUSION_OVERRIDES', () => {
+    for (const runId of PURGED_RUN_ATTEMPTS.keys()) {
+      expect(
+        CONCLUSION_OVERRIDES.has(runId),
+        `run ${runId} is in both PURGED_RUN_ATTEMPTS and CONCLUSION_OVERRIDES`,
+      ).toBe(false);
+    }
+  });
+});
+
+describe('isRunAttemptPurged', () => {
+  it('returns true for runs in PURGED_RUNS regardless of attempt', () => {
+    const [first] = PURGED_RUNS;
+    if (first === undefined) return;
+    expect(isRunAttemptPurged(first)).toBe(true);
+    expect(isRunAttemptPurged(first, 1)).toBe(true);
+    expect(isRunAttemptPurged(first, 99)).toBe(true);
+  });
+
+  it('returns true only for the specific attempts listed in PURGED_RUN_ATTEMPTS', () => {
+    for (const [runId, attempts] of PURGED_RUN_ATTEMPTS) {
+      for (const attempt of attempts) {
+        expect(isRunAttemptPurged(runId, attempt)).toBe(true);
+      }
+      // An attempt not in the set should not be purged (assuming the run isn't in PURGED_RUNS)
+      const unlistedAttempt = Math.max(...attempts) + 1;
+      if (!attempts.has(unlistedAttempt)) {
+        expect(isRunAttemptPurged(runId, unlistedAttempt)).toBe(false);
+      }
+      // Without an attempt, only whole-run purges count → false here
+      expect(isRunAttemptPurged(runId)).toBe(false);
+    }
+  });
+
+  it('returns false for runs that are not purged', () => {
+    expect(isRunAttemptPurged(1, 1)).toBe(false);
+    expect(isRunAttemptPurged(1)).toBe(false);
+  });
+});
diff --git a/packages/db/src/etl/run-overrides.ts b/packages/db/src/etl/run-overrides.ts
@@ -1,14 +1,18 @@
 /**
  * Per-run overrides and special cases for the ingest pipeline.
  *
- * Both are applied at ingest time. Run `pnpm db:apply-overrides` to patch existing DB rows.
+ * All are applied at ingest time. Run `pnpm db:apply-overrides` to patch existing DB rows.
  *
  * CONCLUSION_OVERRIDES — force the conclusion for a run (e.g. 'success' when
  *   the benchmark ran fine but CI failed on a non-benchmark step).
  *
  * PURGED_RUNS — runs to skip on ingest and delete from the DB,
  *   e.g. typically due to experimental runs or features which generate lots of broken data.
  *
+ * PURGED_RUN_ATTEMPTS — purge only specific attempts of a run, leaving the others intact.
+ *   Use this when a single attempt produced bad data but a later attempt is expected to succeed
+ *   (or has already succeeded), so we can't nuke the entire run.
+ *
  * Note: GitHub deletes old workflow runs over time so these overrides may not be applicable forever,
  *       but we should keep them around for historical reference. You can find these on github (if available) by filling
  *       in the run id into the following link: https://github.com/SemiAnalysisAI/InferenceX/actions/runs/{run_id_here}
@@ -39,3 +43,17 @@ export const PURGED_RUNS: ReadonlySet<number> = new Set([
   24959542295, // 2026-04-25 | Reason: MTP without chat template leads to supernatural AR
   24960716250, // 2026-04-25 | Reason: incorrect usage of run sweep and sweep failed, fixed in subsequent PR
 ]);
+
+export const PURGED_RUN_ATTEMPTS: ReadonlyMap<number, ReadonlySet<number>> = new Map([
+  [25199291771, new Set([1])], // 2026-05-01 | dsv4 GB200 dynamo-vllm MTP2 | Reason: only 2 of 6 conc=1 points uploaded. re-run pending
+]);
+
+/**
+ * True when the (run, attempt) pair should be skipped on ingest. Pass `runAttempt`
+ * to honor PURGED_RUN_ATTEMPTS; omit it to check whole-run purges only.
+ */
+export function isRunAttemptPurged(githubRunId: number, runAttempt?: number): boolean {
+  if (PURGED_RUNS.has(githubRunId)) return true;
+  if (runAttempt === undefined) return false;
+  return PURGED_RUN_ATTEMPTS.get(githubRunId)?.has(runAttempt) ?? false;
+}
diff --git a/packages/db/src/etl/workflow-run.ts b/packages/db/src/etl/workflow-run.ts
@@ -8,7 +8,7 @@ import type postgres from 'postgres';
 
 import { GITHUB_API_BASE, GITHUB_REPOS } from '@semianalysisai/inferencex-constants';
 
-import { CONCLUSION_OVERRIDES, PURGED_RUNS } from './run-overrides.js';
+import { CONCLUSION_OVERRIDES, isRunAttemptPurged } from './run-overrides.js';
 
 type Sql = ReturnType<typeof postgres>;
 
@@ -148,9 +148,9 @@ export function createWorkflowRunServices(sql: Sql, githubToken?: string) {
     runStartedAt?: string | null;
     ghInfo?: GithubRunInfo | null;
   }): Promise<number | null> {
-    if (PURGED_RUNS.has(params.githubRunId)) return null;
-
     const attempt = params.runAttempt ?? params.ghInfo?.runAttempt ?? 0;
+    if (isRunAttemptPurged(params.githubRunId, attempt)) return null;
+
     const cacheKey = `${params.githubRunId}:${attempt}`;
     if (workflowRunCache.has(cacheKey)) return workflowRunCache.get(cacheKey)!;
 

diff --git a/packages/db/src/ingest-ci-run.ts b/packages/db/src/ingest-ci-run.ts
@@ -28,7 +28,7 @@ import { GPU_KEYS } from '@semianalysisai/inferencex-constants';
 
 import { hasNoSslFlag } from './cli-utils';
 import { createAdminSql, refreshLatestBenchmarks } from './etl/db-utils';
-import { PURGED_RUNS } from './etl/run-overrides';
+import { isRunAttemptPurged } from './etl/run-overrides';
 import { createSkipTracker } from './etl/skip-tracker';
 import { createConfigCache } from './etl/config-cache';
 import { createWorkflowRunServices } from './etl/workflow-run';
@@ -158,8 +158,8 @@ if (!process.env.DATABASE_WRITE_URL || !process.env.GITHUB_TOKEN) {
 }
 
 const runIdNum = parseInt(runIdStr, 10);
-if (PURGED_RUNS.has(runIdNum)) {
-  console.log(`  Run ${runIdStr} is in PURGED_RUNS — skipping.`);
+if (isRunAttemptPurged(runIdNum, runAttemptNum)) {
+  console.log(`  Run ${runIdStr} attempt ${runAttemptNum} is purged via run-overrides — skipping.`);
   process.exit(0);
 }
 
@@ -243,7 +243,9 @@ async function main(): Promise<void> {
     ghInfo,
   });
   if (workflowRunId === null) {
-    console.log(`  Run ${runId} is in PURGED_RUNS — skipping ingest.`);
+    console.log(
+      `  Run ${runId} attempt ${runAttemptNum} is purged via run-overrides — skipping ingest.`,
+    );
     return;
   }
   console.log(`  Workflow run DB id: ${workflowRunId}`);