From 86faef99baec45a8d10eb810149c65cb27dbfd20 Mon Sep 17 00:00:00 2001 From: saraeloop Date: Mon, 13 Apr 2026 11:38:18 -0700 Subject: [PATCH] fix(persistence): suppress no-op scan record appends - compare candidate scan record against latest baseline-matching record - skip append when scan substance is unchanged - ignore run-scoped fields (record_id, created_at, baseline_record_id, scan_duration_ms, review-target ids) - ignore age_days drift to avoid time-based duplicate records - return existing persisted record for no-op rescans to keep downstream behavior consistent - add regression coverage for: - identical rescans not appending - materially changed scans still appending - baseline lookup using latest persisted record after suppression - integrated history scenario with single persisted row verification: - pnpm test - pnpm run build --- src/application/scan-package.ts | 117 +++++++++++++++++++--- test/integrated-scenarios.test.ts | 5 +- test/scan-package.test.ts | 159 ++++++++++++++++++++++++++++++ 3 files changed, 268 insertions(+), 13 deletions(-) diff --git a/src/application/scan-package.ts b/src/application/scan-package.ts index 63cff2d..58cf510 100644 --- a/src/application/scan-package.ts +++ b/src/application/scan-package.ts @@ -7,7 +7,13 @@ import type { ScanRequest, ScanReviewRecord, } from '../domain/contracts.js' -import type { PackageNode, RiskSignal, ScanFinding, ScanResult } from '../domain/entities.js' +import type { + FieldReliabilityReport, + PackageNode, + RiskSignal, + ScanFinding, + ScanResult, +} from '../domain/entities.js' import { InvalidUsageError } from '../domain/errors.js' import type { PackageLockDependencyTraverser, @@ -209,17 +215,20 @@ export function createScanPackageUseCase({ scan_duration_ms: Math.max(0, completedAt.getTime() - startedAt.getTime()), timestamp: completedAt.toISOString(), } + const scanRecord = buildScanReviewRecord({ + result, + baselineIdentity, + dependencyEdges, + baselineKey, + baselineRecordId: previousRecord?.record_id ?? null, + edgeFindings, + }) + + if (previousRecord !== null && areMateriallyEquivalentScanRecords(previousRecord, scanRecord)) { + return buildScanResultFromStoredRecord(previousRecord, result.field_reliability) + } - await reviewStore.appendScanRecord( - buildScanReviewRecord({ - result, - baselineIdentity, - dependencyEdges, - baselineKey, - baselineRecordId: previousRecord?.record_id ?? null, - edgeFindings, - }), - ) + await reviewStore.appendScanRecord(scanRecord) return result } @@ -541,6 +550,92 @@ function buildScanReviewRecord({ } } +function areMateriallyEquivalentScanRecords( + left: ScanReviewRecord, + right: ScanReviewRecord, +): boolean { + return JSON.stringify(toMaterialScanRecord(left)) === JSON.stringify(toMaterialScanRecord(right)) +} + +function buildScanResultFromStoredRecord( + record: ScanReviewRecord, + fieldReliability: FieldReliabilityReport, +): ScanResult { + return { + record_id: record.record_id, + scan_mode: record.scan_mode, + scan_target: record.scan_target, + baseline_record_id: record.baseline_record_id, + requested_depth: record.requested_depth, + threshold: record.threshold, + field_reliability: record.field_reliability ?? fieldReliability, + root: record.root, + edge_findings: record.edge_findings, + findings: record.findings, + total_scanned: record.total_scanned, + suspicious_count: record.suspicious_count, + safe_count: record.safe_count, + overall_risk_score: record.raw_score, + overall_risk_level: record.risk_level, + warnings: record.warnings, + scan_duration_ms: record.scan_duration_ms, + timestamp: record.created_at, + } +} + +function toMaterialScanRecord(record: ScanReviewRecord) { + return { + scan_mode: record.scan_mode, + package: record.package, + package_key: record.package_key, + scan_target: record.scan_target, + primary_finding_key: record.primary_finding_key, + baseline_identity: record.baseline_identity, + baseline_key: record.baseline_key, + requested_depth: record.requested_depth, + threshold: record.threshold, + raw_score: record.raw_score, + risk_level: record.risk_level, + signals: record.signals, + findings: record.findings.map((finding) => ({ + ...finding, + review_target: { + kind: finding.review_target.kind, + target_id: finding.review_target.target_id, + finding_key: finding.review_target.finding_key, + package_key: finding.review_target.package_key, + }, + })), + root: toMaterialPackageNode(record.root), + total_scanned: record.total_scanned, + suspicious_count: record.suspicious_count, + safe_count: record.safe_count, + warnings: record.warnings, + dependency_edges: record.dependency_edges, + edge_findings: record.edge_findings.map((finding) => ({ + ...finding, + review_target: { + kind: finding.review_target.kind, + target_id: finding.review_target.target_id, + edge_finding_key: finding.review_target.edge_finding_key, + parent_key: finding.review_target.parent_key, + child_key: finding.review_target.child_key, + edge_type: finding.review_target.edge_type, + }, + })), + } +} + +function toMaterialPackageNode(node: PackageNode): PackageNode { + return { + ...node, + // age_days drifts with wall-clock time between rescans, so it is not treated as newly observed evidence. + // Keep other node fields comparison-bearing unless they are explicitly justified as pure run-time drift. + age_days: null, + dependencies: node.dependencies.map(toMaterialPackageNode), + } +} + function metadataStatusForNode(traversedNode: TraversedPackageNode): PackageNode['metadata_status'] { if (traversedNode.metadata_status !== undefined) { return traversedNode.metadata_status diff --git a/test/integrated-scenarios.test.ts b/test/integrated-scenarios.test.ts index 0d1b1f3..6d77964 100644 --- a/test/integrated-scenarios.test.ts +++ b/test/integrated-scenarios.test.ts @@ -72,7 +72,7 @@ class StubScorer implements RiskScorer { } } -test('Scenario A: repeat scan with the same projected structure does not create diff escalation', async () => { +test('Scenario A: repeat scan with the same projected structure does not create diff escalation or duplicate history', async () => { const workingDirectory = await mkdtemp(join(tmpdir(), 'depgraph-scenario-a-')) const store = new JsonlScanReviewStore(defaultScanReviewStorePaths(workingDirectory)) const registryTraverser = new MutableRegistryTraverser(createGraph(['child@1.0.0'])) @@ -119,7 +119,8 @@ test('Scenario A: repeat scan with the same projected structure does not create assert.deepEqual(secondResult.edge_findings, []) assert.equal(firstResult.overall_risk_level, secondResult.overall_risk_level) assert.deepEqual(secondResult.root.signals, []) - assert.equal(scanHistory.trim().split('\n').length, 2) + assert.equal(secondResult.record_id, firstResult.record_id) + assert.equal(scanHistory.trim().split('\n').length, 1) }) test('Scenario B: a suspicious new direct projected edge is captured as both an edge event and package finding', async () => { diff --git a/test/scan-package.test.ts b/test/scan-package.test.ts index e68e375..3ff3d24 100644 --- a/test/scan-package.test.ts +++ b/test/scan-package.test.ts @@ -680,6 +680,55 @@ test('scan use case persists a durable scan review record after the scan complet }) }) +test('identical rescan of the same scan identity does not append a second full scan record', async () => { + const reviewStore = new InMemoryReviewStore() + const baselineScan = createScanPackageUseCase({ + registryTraverser: new StubRegistryTraverser(createLinearGraph()), + packageLockTraverser: new StubPackageLockTraverser(createLinearGraph()), + pnpmLockTraverser: new StubPnpmLockTraverser(createLinearGraph()), + scorer: new StubScorer({ + 'root@1.0.0': 0.1, + 'child@1.0.0': 0, + }), + reviewStore, + now: () => new Date('2026-04-01T00:00:00.000Z'), + }) + const repeatScan = createScanPackageUseCase({ + registryTraverser: new StubRegistryTraverser(createLinearGraph()), + packageLockTraverser: new StubPackageLockTraverser(createLinearGraph()), + pnpmLockTraverser: new StubPnpmLockTraverser(createLinearGraph()), + scorer: new StubScorer({ + 'root@1.0.0': 0.1, + 'child@1.0.0': 0, + }), + reviewStore, + now: () => new Date('2026-04-02T00:00:00.000Z'), + }) + + const firstResult = await baselineScan({ + scan_mode: 'registry_package', + package_spec: 'root', + max_depth: 3, + threshold: 0.4, + verbose: false, + }) + const secondResult = await repeatScan({ + scan_mode: 'registry_package', + package_spec: 'root', + max_depth: 3, + threshold: 0.4, + verbose: false, + }) + + assert.equal(reviewStore.records.length, 1) + assert.equal(secondResult.record_id, firstResult.record_id) + assert.equal(secondResult.timestamp, firstResult.timestamp) + assert.equal(secondResult.baseline_record_id, firstResult.baseline_record_id) + assert.deepEqual(secondResult.root, firstResult.root) + assert.deepEqual(secondResult.findings, firstResult.findings) + assert.deepEqual(secondResult.edge_findings, firstResult.edge_findings) +}) + test('persisted scan record clears top-level signals and stores primary_finding_key when the primary finding is transitive', async () => { const reviewStore = new InMemoryReviewStore() @@ -1082,6 +1131,116 @@ test('projected dependency edge delta records newly introduced edges against the }), ]) assert.deepEqual(reviewStore.records.at(-1)?.edge_findings, result.edge_findings) + assert.equal(reviewStore.records.length, 2) +}) + +test('baseline lookup still uses the latest persisted record after an identical rescan is suppressed', async () => { + const reviewStore = new InMemoryReviewStore() + const baselineScan = createScanPackageUseCase({ + registryTraverser: new StubRegistryTraverser(createLinearGraph()), + packageLockTraverser: new StubPackageLockTraverser(createLinearGraph()), + pnpmLockTraverser: new StubPnpmLockTraverser(createLinearGraph()), + scorer: new StubScorer({ + 'root@1.0.0': 0.1, + 'child@1.0.0': 0, + 'grandchild@1.0.0': 0.8, + }), + reviewStore, + now: () => new Date('2026-04-01T00:00:00.000Z'), + }) + const repeatScan = createScanPackageUseCase({ + registryTraverser: new StubRegistryTraverser(createLinearGraph()), + packageLockTraverser: new StubPackageLockTraverser(createLinearGraph()), + pnpmLockTraverser: new StubPnpmLockTraverser(createLinearGraph()), + scorer: new StubScorer({ + 'root@1.0.0': 0.1, + 'child@1.0.0': 0, + 'grandchild@1.0.0': 0.8, + }), + reviewStore, + now: () => new Date('2026-04-02T00:00:00.000Z'), + }) + const changedScan = createScanPackageUseCase({ + registryTraverser: new StubRegistryTraverser({ + root_key: 'root@1.0.0', + nodes: [ + { + key: 'root@1.0.0', + package: { name: 'root', version: '1.0.0' }, + metadata: createMetadata('root', '1.0.0'), + depth: 0, + parent_key: null, + path: { + packages: [{ name: 'root', version: '1.0.0' }], + }, + }, + { + key: 'child@1.0.0', + package: { name: 'child', version: '1.0.0' }, + metadata: createMetadata('child', '1.0.0'), + depth: 1, + parent_key: 'root@1.0.0', + path: { + packages: [ + { name: 'root', version: '1.0.0' }, + { name: 'child', version: '1.0.0' }, + ], + }, + }, + { + key: 'grandchild@1.0.0', + package: { name: 'grandchild', version: '1.0.0' }, + metadata: createMetadata('grandchild', '1.0.0'), + depth: 2, + parent_key: 'child@1.0.0', + path: { + packages: [ + { name: 'root', version: '1.0.0' }, + { name: 'child', version: '1.0.0' }, + { name: 'grandchild', version: '1.0.0' }, + ], + }, + }, + ], + }), + packageLockTraverser: new StubPackageLockTraverser(createLinearGraph()), + pnpmLockTraverser: new StubPnpmLockTraverser(createLinearGraph()), + scorer: new StubScorer({ + 'root@1.0.0': 0.1, + 'child@1.0.0': 0, + 'grandchild@1.0.0': 0.8, + }), + reviewStore, + now: () => new Date('2026-04-03T00:00:00.000Z'), + }) + + const baselineResult = await baselineScan({ + scan_mode: 'registry_package', + package_spec: 'root', + max_depth: 3, + threshold: 0.4, + verbose: false, + }) + + await repeatScan({ + scan_mode: 'registry_package', + package_spec: 'root', + max_depth: 3, + threshold: 0.4, + verbose: false, + }) + + const changedResult = await changedScan({ + scan_mode: 'registry_package', + package_spec: 'root', + max_depth: 3, + threshold: 0.4, + verbose: false, + }) + + assert.equal(reviewStore.records.length, 2) + assert.equal(changedResult.baseline_record_id, baselineResult.record_id) + assert.equal(reviewStore.records[1]?.baseline_record_id, baselineResult.record_id) }) test('projected dependency edge delta lookup degrades gracefully when history lookup fails', async () => {