From c42343672c5deb003cf876fe0e7ace9ac97817eb Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 15:51:03 +0000 Subject: [PATCH 1/8] Update memory test to use gemini cli process --- memory-tests/baselines.json | 72 +++--- memory-tests/memory-usage.test.ts | 34 ++- .../src/ui/components/MemoryUsageDisplay.tsx | 15 +- .../test-utils/src/memory-test-harness.ts | 226 +++--------------- packages/test-utils/src/test-rig.ts | 169 ++++++++++++- 5 files changed, 267 insertions(+), 249 deletions(-) diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json index 8000419a58c..5d49b397c28 100644 --- a/memory-tests/baselines.json +++ b/memory-tests/baselines.json @@ -1,55 +1,55 @@ { "version": 1, - "updatedAt": "2026-04-10T15:36:04.547Z", + "updatedAt": "2026-04-20T15:36:19.408Z", "scenarios": { "multi-turn-conversation": { - "heapUsedBytes": 120082704, - "heapTotalBytes": 177586176, - "rssBytes": 269172736, - "externalBytes": 4304053, - "timestamp": "2026-04-10T15:35:17.603Z" + "heapUsedBytes": 76944136, + "heapTotalBytes": 96825344, + "rssBytes": 233385984, + "externalBytes": 102471331, + "timestamp": "2026-04-20T15:33:58.271Z" }, "multi-function-call-repo-search": { - "heapUsedBytes": 104644984, - "heapTotalBytes": 111575040, - "rssBytes": 204079104, - "externalBytes": 4304053, - "timestamp": "2026-04-10T15:35:22.480Z" + "heapUsedBytes": 77569064, + "heapTotalBytes": 96563200, + "rssBytes": 232521728, + "externalBytes": 102468611, + "timestamp": "2026-04-20T15:34:00.307Z" }, "idle-session-startup": { - "heapUsedBytes": 119813672, - "heapTotalBytes": 177061888, - "rssBytes": 267943936, - "externalBytes": 4304053, - "timestamp": "2026-04-10T15:35:08.035Z" + "heapUsedBytes": 77349144, + "heapTotalBytes": 96137216, + "rssBytes": 231256064, + "externalBytes": 102467662, + "timestamp": "2026-04-20T15:33:54.183Z" }, "simple-prompt-response": { - "heapUsedBytes": 119722064, - "heapTotalBytes": 177324032, - "rssBytes": 268812288, - "externalBytes": 4304053, - "timestamp": "2026-04-10T15:35:12.770Z" + "heapUsedBytes": 77721632, + "heapTotalBytes": 97087488, + "rssBytes": 233988096, + "externalBytes": 102465350, + "timestamp": "2026-04-20T15:33:56.085Z" }, "resume-large-chat-with-messages": { - "heapUsedBytes": 106545568, - "heapTotalBytes": 111509504, - "rssBytes": 202596352, - "externalBytes": 4306101, - "timestamp": "2026-04-10T15:36:04.547Z" + "heapUsedBytes": 892750792, + "heapTotalBytes": 969490432, + "rssBytes": 1128820736, + "externalBytes": 109232249, + "timestamp": "2026-04-20T15:36:19.408Z" }, "resume-large-chat": { - "heapUsedBytes": 106513760, - "heapTotalBytes": 111509504, - "rssBytes": 202596352, - "externalBytes": 4306101, - "timestamp": "2026-04-10T15:35:59.528Z" + "heapUsedBytes": 907866808, + "heapTotalBytes": 976048128, + "rssBytes": 1138429952, + "externalBytes": 109420155, + "timestamp": "2026-04-20T15:35:26.843Z" }, "large-chat": { - "heapUsedBytes": 106471568, - "heapTotalBytes": 111509504, - "rssBytes": 202596352, - "externalBytes": 4306101, - "timestamp": "2026-04-10T15:35:53.180Z" + "heapUsedBytes": 166125984, + "heapTotalBytes": 201601024, + "rssBytes": 748908544, + "externalBytes": 109001942, + "timestamp": "2026-04-20T15:34:32.960Z" } } } diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts index eb363a01351..6a427402930 100644 --- a/memory-tests/memory-usage.test.ts +++ b/memory-tests/memory-usage.test.ts @@ -16,15 +16,21 @@ import { mkdirSync, rmSync, } from 'node:fs'; -import { randomUUID } from 'node:crypto'; +import { randomUUID, createHash } from 'node:crypto'; const __dirname = dirname(fileURLToPath(import.meta.url)); const BASELINES_PATH = join(__dirname, 'baselines.json'); const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true'; +function getProjectHash(projectRoot: string): string { + return createHash('sha256').update(projectRoot).digest('hex'); +} const TOLERANCE_PERCENT = 10; // Fake API key for tests using fake responses -const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' }; +const TEST_ENV = { + GEMINI_API_KEY: 'fake-memory-test-key', + GEMINI_MEMORY_MONITOR_INTERVAL: '100', +}; describe('Memory Usage Tests', () => { let harness: MemoryTestHarness; @@ -56,6 +62,7 @@ describe('Memory Usage Tests', () => { }); const result = await harness.runScenario( + rig, 'idle-session-startup', async (recordSnapshot) => { await rig.run({ @@ -85,6 +92,7 @@ describe('Memory Usage Tests', () => { }); const result = await harness.runScenario( + rig, 'simple-prompt-response', async (recordSnapshot) => { await rig.run({ @@ -122,6 +130,7 @@ describe('Memory Usage Tests', () => { ]; const result = await harness.runScenario( + rig, 'multi-turn-conversation', async (recordSnapshot) => { // Run through all turns as a piped sequence @@ -168,6 +177,7 @@ describe('Memory Usage Tests', () => { ); const result = await harness.runScenario( + rig, 'multi-function-call-repo-search', async (recordSnapshot) => { await rig.run({ @@ -228,6 +238,7 @@ describe('Memory Usage Tests', () => { }); const result = await harness.runScenario( + rig, 'large-chat', async (recordSnapshot) => { await rig.run({ @@ -257,19 +268,21 @@ describe('Memory Usage Tests', () => { }); const result = await harness.runScenario( + rig, 'resume-large-chat', async (recordSnapshot) => { // Ensure the history file is linked const targetChatsDir = join( - rig.testDir!, + rig.homeDir!, + '.gemini', 'tmp', - 'test-project-hash', + getProjectHash(rig.testDir!), 'chats', ); mkdirSync(targetChatsDir, { recursive: true }); const targetHistoryPath = join( targetChatsDir, - 'large-chat-session.json', + 'session-large-chat.json', ); if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath); copyFileSync(sharedHistoryPath, targetHistoryPath); @@ -302,19 +315,21 @@ describe('Memory Usage Tests', () => { }); const result = await harness.runScenario( + rig, 'resume-large-chat-with-messages', async (recordSnapshot) => { // Ensure the history file is linked const targetChatsDir = join( - rig.testDir!, + rig.homeDir!, + '.gemini', 'tmp', - 'test-project-hash', + getProjectHash(rig.testDir!), 'chats', ); mkdirSync(targetChatsDir, { recursive: true }); const targetHistoryPath = join( targetChatsDir, - 'large-chat-session.json', + 'session-large-chat.json', ); if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath); copyFileSync(sharedHistoryPath, targetHistoryPath); @@ -457,6 +472,9 @@ async function generateSharedLargeChatData(tempDir: string) { // Generate responses for resumed chat const resumeResponsesStream = createWriteStream(resumeResponsesPath); for (let i = 0; i < 5; i++) { + // Doubling up on non-streaming responses to satisfy classifier and complexity checks + resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n'); + resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n'); resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n'); resumeResponsesStream.write( JSON.stringify({ diff --git a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx index 709f76baf31..a625b817068 100644 --- a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx +++ b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx @@ -15,7 +15,8 @@ export const MemoryUsageDisplay: React.FC<{ color?: string; isActive?: boolean; }> = ({ color = theme.text.primary, isActive = true }) => { - const [memoryUsage, setMemoryUsage] = useState(''); + const [rssUsage, setRssUsage] = useState(''); + const [heapUsage, setHeapUsage] = useState(''); const [memoryUsageColor, setMemoryUsageColor] = useState(color); useEffect(() => { @@ -24,10 +25,12 @@ export const MemoryUsageDisplay: React.FC<{ } const updateMemory = () => { - const usage = process.memoryUsage().rss; - setMemoryUsage(formatBytes(usage)); + const usage = process.memoryUsage(); + const rss = usage.rss; + setRssUsage(formatBytes(rss)); + setHeapUsage(formatBytes(usage.heapUsed)); setMemoryUsageColor( - usage >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color, + rss >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color, ); }; @@ -38,7 +41,9 @@ export const MemoryUsageDisplay: React.FC<{ return ( - {memoryUsage} + + {rssUsage} (Heap: {heapUsage}) + ); }; diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts index c12c2204589..863b200a133 100644 --- a/packages/test-utils/src/memory-test-harness.ts +++ b/packages/test-utils/src/memory-test-harness.ts @@ -4,10 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -import v8 from 'node:v8'; -import { setTimeout as sleep } from 'node:timers/promises'; import { loadBaselines, updateBaseline } from './memory-baselines.js'; import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js'; +import type { TestRig } from './test-rig.js'; /** Configuration for asciichart plot function. */ interface PlotConfig { @@ -28,9 +27,6 @@ export interface MemorySnapshot { heapTotal: number; rss: number; external: number; - arrayBuffers: number; - heapSizeLimit: number; - heapSpaces: any[]; } /** @@ -58,22 +54,13 @@ export interface MemoryTestHarnessOptions { baselinesPath: string; /** Default tolerance percentage (0-100). Default: 10 */ defaultTolerancePercent?: number; - /** Number of GC cycles to run before each snapshot. Default: 3 */ - gcCycles?: number; - /** Delay in ms between GC cycles. Default: 100 */ - gcDelayMs?: number; - /** Number of samples to take for median calculation. Default: 3 */ - sampleCount?: number; - /** Pause in ms between samples. Default: 50 */ - samplePauseMs?: number; } /** * MemoryTestHarness provides infrastructure for running memory usage tests. * * It handles: - * - Forcing V8 garbage collection to reduce noise - * - Taking V8 heap snapshots for accurate memory measurement + * - Extracting memory metrics from CLI process telemetry * - Comparing against baselines with configurable tolerance * - Generating ASCII chart reports of memory trends */ @@ -81,88 +68,44 @@ export class MemoryTestHarness { private baselines: MemoryBaselineFile; private readonly baselinesPath: string; private readonly defaultTolerancePercent: number; - private readonly gcCycles: number; - private readonly gcDelayMs: number; - private readonly sampleCount: number; - private readonly samplePauseMs: number; private allResults: MemoryTestResult[] = []; constructor(options: MemoryTestHarnessOptions) { this.baselinesPath = options.baselinesPath; this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10; - this.gcCycles = options.gcCycles ?? 3; - this.gcDelayMs = options.gcDelayMs ?? 100; - this.sampleCount = options.sampleCount ?? 3; - this.samplePauseMs = options.samplePauseMs ?? 50; this.baselines = loadBaselines(this.baselinesPath); } /** - * Force garbage collection multiple times and take a V8 heap snapshot. - * Forces GC multiple times with delays to allow weak references and - * FinalizationRegistry callbacks to run, reducing measurement noise. + * Extract memory snapshot from TestRig telemetry. */ - async takeSnapshot(label: string = 'snapshot'): Promise { - await this.forceGC(); - - const memUsage = process.memoryUsage(); - const heapStats = v8.getHeapStatistics(); - - return { - timestamp: Date.now(), - label, - heapUsed: memUsage.heapUsed, - heapTotal: memUsage.heapTotal, - rss: memUsage.rss, - external: memUsage.external, - arrayBuffers: memUsage.arrayBuffers, - heapSizeLimit: heapStats.heap_size_limit, - heapSpaces: v8.getHeapSpaceStatistics(), - }; - } - - /** - * Take multiple snapshot samples and return the median to reduce noise. - */ - async takeMedianSnapshot( - label: string = 'median', - count?: number, + async takeSnapshot( + rig: TestRig, + label: string = 'snapshot', ): Promise { - const samples: MemorySnapshot[] = []; - const numSamples = count ?? this.sampleCount; - - for (let i = 0; i < numSamples; i++) { - samples.push(await this.takeSnapshot(`${label}_sample_${i}`)); - if (i < numSamples - 1) { - await sleep(this.samplePauseMs); - } - } - - // Sort by heapUsed and take the median - samples.sort((a, b) => a.heapUsed - b.heapUsed); - const medianIdx = Math.floor(samples.length / 2); - const median = samples[medianIdx]!; + const metrics = rig.readMemoryMetrics(); return { - ...median, - label, timestamp: Date.now(), + label, + heapUsed: metrics.heapUsed, + heapTotal: metrics.heapTotal, + rss: metrics.rss, + external: metrics.external, }; } /** * Run a memory test scenario. * - * Takes before/after snapshots around the scenario function, collects - * intermediate snapshots if the scenario provides them, and compares - * the result against the stored baseline. - * + * @param rig - The TestRig instance running the CLI * @param name - Scenario name (must match baseline key) * @param fn - Async function that executes the scenario. Receives a * `recordSnapshot` callback for recording intermediate snapshots. * @param tolerancePercent - Override default tolerance for this scenario */ async runScenario( + rig: TestRig, name: string, fn: ( recordSnapshot: (label: string) => Promise, @@ -172,27 +115,33 @@ export class MemoryTestHarness { const tolerance = tolerancePercent ?? this.defaultTolerancePercent; const snapshots: MemorySnapshot[] = []; + // Record initial snapshot + const beforeSnap = await this.takeSnapshot(rig, 'before'); + snapshots.push(beforeSnap); + // Record a callback for intermediate snapshots const recordSnapshot = async (label: string): Promise => { - const snap = await this.takeMedianSnapshot(label); + // Small delay to allow telemetry to flush if needed + await rig.waitForTelemetryReady(); + const snap = await this.takeSnapshot(rig, label); snapshots.push(snap); return snap; }; - // Before snapshot - const beforeSnap = await this.takeMedianSnapshot('before'); - snapshots.push(beforeSnap); - // Run the scenario await fn(recordSnapshot); - // After snapshot (median of multiple samples) - const afterSnap = await this.takeMedianSnapshot('after'); + // Final wait for telemetry to ensure everything is flushed + await rig.waitForTelemetryReady(); + + // After snapshot + const afterSnap = await this.takeSnapshot(rig, 'after'); snapshots.push(afterSnap); // Calculate peak values const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed)); const peakRss = Math.max(...snapshots.map((s) => s.rss)); + const peakExternal = Math.max(...snapshots.map((s) => s.external)); // Get baseline const baseline = this.baselines.scenarios[name]; @@ -209,8 +158,6 @@ export class MemoryTestHarness { withinTolerance = deltaPercent <= tolerance; } - const peakExternal = Math.max(...snapshots.map((s) => s.external)); - const result: MemoryTestResult = { scenarioName: name, snapshots, @@ -281,105 +228,6 @@ export class MemoryTestHarness { this.baselines = loadBaselines(this.baselinesPath); } - /** - * Analyze snapshots to detect sustained leaks across 3 snapshots. - * A leak is flagged if growth is observed in both phases for any heap space. - */ - analyzeSnapshots( - snapshots: MemorySnapshot[], - thresholdBytes: number = 1024 * 1024, // 1 MB - ): { leaked: boolean; message: string } { - if (snapshots.length < 3) { - return { leaked: false, message: 'Not enough snapshots to analyze' }; - } - - const snap1 = snapshots[snapshots.length - 3]; - const snap2 = snapshots[snapshots.length - 2]; - const snap3 = snapshots[snapshots.length - 1]; - - if (!snap1 || !snap2 || !snap3) { - return { leaked: false, message: 'Missing snapshots' }; - } - - const spaceNames = new Set(); - snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); - snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); - snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name)); - - let hasSustainedGrowth = false; - const growthDetails: string[] = []; - - for (const name of spaceNames) { - const size1 = - snap1.heapSpaces.find((s: any) => s.space_name === name) - ?.space_used_size ?? 0; - const size2 = - snap2.heapSpaces.find((s: any) => s.space_name === name) - ?.space_used_size ?? 0; - const size3 = - snap3.heapSpaces.find((s: any) => s.space_name === name) - ?.space_used_size ?? 0; - - const growth1 = size2 - size1; - const growth2 = size3 - size2; - - if (growth1 > thresholdBytes && growth2 > thresholdBytes) { - hasSustainedGrowth = true; - growthDetails.push( - `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`, - ); - } - } - - let message = ''; - if (hasSustainedGrowth) { - message = - `Memory bloat detected in heap spaces:\n ` + - growthDetails.join('\n '); - } else { - message = `No sustained growth detected in any heap space above threshold.`; - } - - return { leaked: hasSustainedGrowth, message }; - } - - /** - * Assert that memory returns to a baseline level after a peak. - * Useful for verifying that large tool outputs are not retained. - */ - assertMemoryReturnsToBaseline( - snapshots: MemorySnapshot[], - tolerancePercent: number = 10, - ): void { - if (snapshots.length < 3) { - throw new Error('Need at least 3 snapshots to check return to baseline'); - } - - const baseline = snapshots[0]; // Assume first is baseline - const peak = snapshots.reduce( - (max, s) => (s.heapUsed > max.heapUsed ? s : max), - snapshots[0], - ); - const final = snapshots[snapshots.length - 1]; - - if (!baseline || !peak || !final) { - throw new Error('Missing snapshots for return to baseline check'); - } - - const tolerance = baseline.heapUsed * (tolerancePercent / 100); - const delta = final.heapUsed - baseline.heapUsed; - - if (delta > tolerance) { - throw new Error( - `Memory did not return to baseline!\n` + - ` Baseline: ${formatMB(baseline.heapUsed)}\n` + - ` Peak: ${formatMB(peak.heapUsed)}\n` + - ` Final: ${formatMB(final.heapUsed)}\n` + - ` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`, - ); - } - } - /** * Generate a report with ASCII charts and summary table. * Uses the `asciichart` library for terminal visualization. @@ -461,26 +309,6 @@ export class MemoryTestHarness { console.log(report); return report; } - - /** - * Force V8 garbage collection. - * Runs multiple GC cycles with delays to allow weak references - * and FinalizationRegistry callbacks to run. - */ - private async forceGC(): Promise { - if (typeof globalThis.gc !== 'function') { - throw new Error( - 'global.gc() not available. Run with --expose-gc for accurate measurements.', - ); - } - - for (let i = 0; i < this.gcCycles; i++) { - globalThis.gc(); - if (i < this.gcCycles - 1) { - await sleep(this.gcDelayMs); - } - } - } } /** diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 906a7760bf3..9c06ab56163 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -205,6 +205,27 @@ export interface MetricDataPoint { endTime?: string; } +export interface TelemetryMetric { + descriptor: { + name: string; + type?: string; + description?: string; + unit?: string; + }; + dataPoints: MetricDataPoint[]; +} +export interface MetricDataPoint { + attributes?: Record; + value?: { + sum?: number; + min?: number; + max?: number; + count?: number; + }; + startTime?: [number, number]; + endTime?: string; +} + export interface TelemetryMetric { descriptor: { name: string; @@ -1475,7 +1496,7 @@ export class TestRig { readMetric(metricName: string): TelemetryMetric | null { const logs = this._readAndParseTelemetryLog(); for (const logData of logs) { - if (logData.scopeMetrics) { + if (logData && logData.scopeMetrics) { for (const scopeMetric of logData.scopeMetrics) { for (const metric of scopeMetric.metrics) { if (metric.descriptor.name === `gemini_cli.${metricName}`) { @@ -1488,6 +1509,152 @@ export class TestRig { return null; } + readMemoryMetrics(): { + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + } { + // For simplicity, we just look for the last values in the logs + const metrics = { + heapUsed: 0, + heapTotal: 0, + rss: 0, + external: 0, + }; + + // We want to return the memory snapshot that has the peak RSS usage. + // Group data points by their session, component and start time (seconds) to represent a single snapshot. + const snapshots: Record = {}; + + const logs = this._readAndParseTelemetryLog(); + for (const logData of logs) { + if (logData && logData.scopeMetrics) { + for (const scopeMetric of logData.scopeMetrics) { + for (const metric of scopeMetric.metrics) { + if (metric.descriptor.name === 'gemini_cli.memory.usage') { + for (const dp of metric.dataPoints) { + // Group by session, component and seconds portion of start time to identify a single snapshot interval. + // Different metrics in the same snapshot might have slightly different nanosecond timestamps. + const sessionId = + (dp.attributes?.['session.id'] as string) || 'unknown'; + const component = + (dp.attributes?.['component'] as string) || 'unknown'; + const seconds = dp.startTime?.[0] || 0; + const timeKey = `${sessionId}-${component}-${seconds}`; + + if (!snapshots[timeKey]) { + snapshots[timeKey] = { + rss: 0, + heapUsed: 0, + heapTotal: 0, + external: 0, + }; + } + + const type = dp.attributes?.['memory_type']; + const value = dp.value?.max ?? dp.value?.sum ?? 0; + + if (type === 'heap_used') snapshots[timeKey].heapUsed = value; + else if (type === 'heap_total') + snapshots[timeKey].heapTotal = value; + else if (type === 'rss') snapshots[timeKey].rss = value; + else if (type === 'external') + snapshots[timeKey].external = value; + } + } + } + } + } + } + + // Find the snapshot with the highest RSS + for (const snapshot of Object.values(snapshots)) { + if (snapshot.rss > metrics.rss) { + metrics.rss = snapshot.rss; + metrics.heapUsed = snapshot.heapUsed; + metrics.heapTotal = snapshot.heapTotal; + metrics.external = snapshot.external; + } + } + + // Fallback: if we didn't find any RSS but found heap, use the max heap + if (metrics.rss === 0) { + for (const snapshot of Object.values(snapshots)) { + if (snapshot.heapUsed > metrics.heapUsed) { + metrics.rss = snapshot.rss; + metrics.heapUsed = snapshot.heapUsed; + metrics.heapTotal = snapshot.heapTotal; + metrics.external = snapshot.external; + } + } + } + + return metrics; + } + + readCpuMetrics(): { + userUs: number; + systemUs: number; + totalUs: number; + } { + const metrics = { + userUs: 0, + systemUs: 0, + totalUs: 0, + }; + + const logs = this._readAndParseTelemetryLog(); + for (const logData of logs) { + if (logData && logData.scopeMetrics) { + for (const scopeMetric of logData.scopeMetrics) { + for (const metric of scopeMetric.metrics) { + if (metric.descriptor.name === 'gemini_cli.cpu.usage') { + for (const dp of metric.dataPoints) { + const value = dp.value?.sum ?? 0; + // Currently cpu usage is recorded as a single total sum in core/metrics.ts + metrics.totalUs = value; + } + } + } + } + } + } + return metrics; + } + + readEventLoopMetrics(): { + p50: number; + p95: number; + max: number; + } { + const metrics = { + p50: 0, + p95: 0, + max: 0, + }; + + const logs = this._readAndParseTelemetryLog(); + for (const logData of logs) { + if (logData && logData.scopeMetrics) { + for (const scopeMetric of logData.scopeMetrics) { + for (const metric of scopeMetric.metrics) { + if (metric.descriptor.name === 'gemini_cli.event_loop.delay') { + for (const dp of metric.dataPoints) { + const percentile = dp.attributes?.['percentile']; + const value = dp.value?.sum ?? 0; + if (percentile === 'p50') metrics.p50 = value; + else if (percentile === 'p95') metrics.p95 = value; + else if (percentile === 'max') metrics.max = value; + } + } + } + } + } + } + return metrics; + } + async runInteractive(options?: { args?: string | string[]; approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; From 48463354fe3f086d8219586e2f50674b493a464a Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 16:45:02 +0000 Subject: [PATCH 2/8] refinement --- memory-tests/memory-usage.test.ts | 4 + .../test-utils/src/memory-test-harness.ts | 89 +++++++++++- packages/test-utils/src/test-rig.ts | 134 ++++++++++-------- 3 files changed, 165 insertions(+), 62 deletions(-) diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts index 6a427402930..31275baed4f 100644 --- a/memory-tests/memory-usage.test.ts +++ b/memory-tests/memory-usage.test.ts @@ -153,6 +153,9 @@ describe('Memory Usage Tests', () => { ); } else { harness.assertWithinBaseline(result); + harness.assertMemoryReturnsToBaseline(result.snapshots, 20); + const { leaked, message } = harness.analyzeSnapshots(result.snapshots); + if (leaked) console.warn(`⚠ ${message}`); } }); @@ -199,6 +202,7 @@ describe('Memory Usage Tests', () => { ); } else { harness.assertWithinBaseline(result); + harness.assertMemoryReturnsToBaseline(result.snapshots, 20); } }); diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts index 863b200a133..04f7c390a33 100644 --- a/packages/test-utils/src/memory-test-harness.ts +++ b/packages/test-utils/src/memory-test-harness.ts @@ -82,11 +82,12 @@ export class MemoryTestHarness { async takeSnapshot( rig: TestRig, label: string = 'snapshot', + strategy: 'peak' | 'last' = 'last', ): Promise { - const metrics = rig.readMemoryMetrics(); + const metrics = rig.readMemoryMetrics(strategy); return { - timestamp: Date.now(), + timestamp: metrics.timestamp, label, heapUsed: metrics.heapUsed, heapTotal: metrics.heapTotal, @@ -138,10 +139,26 @@ export class MemoryTestHarness { const afterSnap = await this.takeSnapshot(rig, 'after'); snapshots.push(afterSnap); - // Calculate peak values - const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed)); - const peakRss = Math.max(...snapshots.map((s) => s.rss)); - const peakExternal = Math.max(...snapshots.map((s) => s.external)); + // Calculate peak values from ALL snapshots seen during the scenario + const allSnapshots = rig.readAllMemorySnapshots(); + const scenarioSnapshots = allSnapshots.filter( + (s) => + s.timestamp >= beforeSnap.timestamp && + s.timestamp <= afterSnap.timestamp, + ); + + const peakHeapUsed = Math.max( + ...scenarioSnapshots.map((s) => s.heapUsed), + ...snapshots.map((s) => s.heapUsed), + ); + const peakRss = Math.max( + ...scenarioSnapshots.map((s) => s.rss), + ...snapshots.map((s) => s.rss), + ); + const peakExternal = Math.max( + ...scenarioSnapshots.map((s) => s.external), + ...snapshots.map((s) => s.external), + ); // Get baseline const baseline = this.baselines.scenarios[name]; @@ -176,6 +193,66 @@ export class MemoryTestHarness { return result; } + /** + * Analyze snapshots to detect sustained leaks. + * A leak is flagged if growth is observed in both phases. + */ + analyzeSnapshots( + snapshots: MemorySnapshot[], + thresholdBytes: number = 1024 * 1024, // 1 MB + ): { leaked: boolean; message: string } { + if (snapshots.length < 3) { + return { leaked: false, message: 'Not enough snapshots to analyze' }; + } + + const snap1 = snapshots[snapshots.length - 3]!; + const snap2 = snapshots[snapshots.length - 2]!; + const snap3 = snapshots[snapshots.length - 1]!; + + const growth1 = snap2.heapUsed - snap1.heapUsed; + const growth2 = snap3.heapUsed - snap2.heapUsed; + + const leaked = growth1 > thresholdBytes && growth2 > thresholdBytes; + let message = leaked + ? `Memory bloat detected: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})` + : `No sustained growth detected above threshold.`; + + return { leaked, message }; + } + + /** + * Assert that memory returns to a baseline level after a peak. + * Useful for verifying that large tool outputs or history are not retained. + */ + assertMemoryReturnsToBaseline( + snapshots: MemorySnapshot[], + tolerancePercent: number = 15, + ): void { + if (snapshots.length < 3) { + return; // Need at least before, peak, after + } + + // Find the first non-zero snapshot as baseline + const baseline = snapshots.find((s) => s.heapUsed > 0); + if (!baseline) { + return; // No memory reported yet + } + + const final = snapshots[snapshots.length - 1]!; + + const tolerance = baseline.heapUsed * (tolerancePercent / 100); + const delta = final.heapUsed - baseline.heapUsed; + + if (delta > tolerance) { + throw new Error( + `Memory did not return to baseline!\n` + + ` Baseline: ${formatMB(baseline.heapUsed)} (${baseline.label})\n` + + ` Final: ${formatMB(final.heapUsed)} (${final.label})\n` + + ` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`, + ); + } + } + /** * Assert that a scenario result is within the baseline tolerance. * Throws an assertion error with details if it exceeds the threshold. diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 9c06ab56163..fa5ffa4c911 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -205,27 +205,6 @@ export interface MetricDataPoint { endTime?: string; } -export interface TelemetryMetric { - descriptor: { - name: string; - type?: string; - description?: string; - unit?: string; - }; - dataPoints: MetricDataPoint[]; -} -export interface MetricDataPoint { - attributes?: Record; - value?: { - sum?: number; - min?: number; - max?: number; - count?: number; - }; - startTime?: [number, number]; - endTime?: string; -} - export interface TelemetryMetric { descriptor: { name: string; @@ -1509,23 +1488,88 @@ export class TestRig { return null; } - readMemoryMetrics(): { + readMemoryMetrics(strategy: 'peak' | 'last' = 'peak'): { + timestamp: number; heapUsed: number; heapTotal: number; rss: number; external: number; } { - // For simplicity, we just look for the last values in the logs - const metrics = { - heapUsed: 0, - heapTotal: 0, - rss: 0, - external: 0, + const snapshots = this._getMemorySnapshots(); + if (snapshots.length === 0) { + return { + timestamp: Date.now(), + heapUsed: 0, + heapTotal: 0, + rss: 0, + external: 0, + }; + } + + if (strategy === 'last') { + const last = snapshots[snapshots.length - 1]; + return { + timestamp: last.timestamp, + heapUsed: last.heapUsed, + heapTotal: last.heapTotal, + rss: last.rss, + external: last.external, + }; + } + + // Find the snapshot with the highest RSS + let peak = snapshots[0]; + for (const snapshot of snapshots) { + if (snapshot.rss > peak.rss) { + peak = snapshot; + } + } + + // Fallback: if we didn't find any RSS but found heap, use the max heap + if (peak.rss === 0) { + for (const snapshot of snapshots) { + if (snapshot.heapUsed > peak.heapUsed) { + peak = snapshot; + } + } + } + + return { + timestamp: peak.timestamp, + heapUsed: peak.heapUsed, + heapTotal: peak.heapTotal, + rss: peak.rss, + external: peak.external, }; + } - // We want to return the memory snapshot that has the peak RSS usage. - // Group data points by their session, component and start time (seconds) to represent a single snapshot. - const snapshots: Record = {}; + readAllMemorySnapshots(): { + timestamp: number; + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + }[] { + return this._getMemorySnapshots(); + } + + private _getMemorySnapshots(): { + timestamp: number; + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + }[] { + const snapshots: Record< + string, + { + timestamp: number; + heapUsed: number; + heapTotal: number; + rss: number; + external: number; + } + > = {}; const logs = this._readAndParseTelemetryLog(); for (const logData of logs) { @@ -1534,17 +1578,17 @@ export class TestRig { for (const metric of scopeMetric.metrics) { if (metric.descriptor.name === 'gemini_cli.memory.usage') { for (const dp of metric.dataPoints) { - // Group by session, component and seconds portion of start time to identify a single snapshot interval. - // Different metrics in the same snapshot might have slightly different nanosecond timestamps. const sessionId = (dp.attributes?.['session.id'] as string) || 'unknown'; const component = (dp.attributes?.['component'] as string) || 'unknown'; const seconds = dp.startTime?.[0] || 0; + const nanos = dp.startTime?.[1] || 0; const timeKey = `${sessionId}-${component}-${seconds}`; if (!snapshots[timeKey]) { snapshots[timeKey] = { + timestamp: seconds * 1000 + Math.floor(nanos / 1000000), rss: 0, heapUsed: 0, heapTotal: 0, @@ -1568,29 +1612,7 @@ export class TestRig { } } - // Find the snapshot with the highest RSS - for (const snapshot of Object.values(snapshots)) { - if (snapshot.rss > metrics.rss) { - metrics.rss = snapshot.rss; - metrics.heapUsed = snapshot.heapUsed; - metrics.heapTotal = snapshot.heapTotal; - metrics.external = snapshot.external; - } - } - - // Fallback: if we didn't find any RSS but found heap, use the max heap - if (metrics.rss === 0) { - for (const snapshot of Object.values(snapshots)) { - if (snapshot.heapUsed > metrics.heapUsed) { - metrics.rss = snapshot.rss; - metrics.heapUsed = snapshot.heapUsed; - metrics.heapTotal = snapshot.heapTotal; - metrics.external = snapshot.external; - } - } - } - - return metrics; + return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp); } readCpuMetrics(): { From 01c109a8bf612c92aead6547c120d14e0af74cc0 Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 17:01:37 +0000 Subject: [PATCH 3/8] revert change in ui --- .../cli/src/ui/components/MemoryUsageDisplay.tsx | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx index a625b817068..709f76baf31 100644 --- a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx +++ b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx @@ -15,8 +15,7 @@ export const MemoryUsageDisplay: React.FC<{ color?: string; isActive?: boolean; }> = ({ color = theme.text.primary, isActive = true }) => { - const [rssUsage, setRssUsage] = useState(''); - const [heapUsage, setHeapUsage] = useState(''); + const [memoryUsage, setMemoryUsage] = useState(''); const [memoryUsageColor, setMemoryUsageColor] = useState(color); useEffect(() => { @@ -25,12 +24,10 @@ export const MemoryUsageDisplay: React.FC<{ } const updateMemory = () => { - const usage = process.memoryUsage(); - const rss = usage.rss; - setRssUsage(formatBytes(rss)); - setHeapUsage(formatBytes(usage.heapUsed)); + const usage = process.memoryUsage().rss; + setMemoryUsage(formatBytes(usage)); setMemoryUsageColor( - rss >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color, + usage >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color, ); }; @@ -41,9 +38,7 @@ export const MemoryUsageDisplay: React.FC<{ return ( - - {rssUsage} (Heap: {heapUsage}) - + {memoryUsage} ); }; From 5799277eed38c42a2bbc05ebe7be8432f1f67d37 Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 19:07:49 +0000 Subject: [PATCH 4/8] update bytes to mb --- memory-tests/baselines.json | 72 +++++++++---------- packages/test-utils/src/memory-baselines.ts | 24 +++---- .../test-utils/src/memory-test-harness.ts | 31 ++++---- 3 files changed, 67 insertions(+), 60 deletions(-) diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json index 5d49b397c28..240e3d4fd46 100644 --- a/memory-tests/baselines.json +++ b/memory-tests/baselines.json @@ -1,55 +1,55 @@ { "version": 1, - "updatedAt": "2026-04-20T15:36:19.408Z", + "updatedAt": "2026-04-20T18:04:59.671Z", "scenarios": { "multi-turn-conversation": { - "heapUsedBytes": 76944136, - "heapTotalBytes": 96825344, - "rssBytes": 233385984, - "externalBytes": 102471331, - "timestamp": "2026-04-20T15:33:58.271Z" + "heapUsedMB": 68.8, + "heapTotalMB": 91.2, + "rssMB": 215.4, + "externalMB": 93.8, + "timestamp": "2026-04-20T18:02:40.101Z" }, "multi-function-call-repo-search": { - "heapUsedBytes": 77569064, - "heapTotalBytes": 96563200, - "rssBytes": 232521728, - "externalBytes": 102468611, - "timestamp": "2026-04-20T15:34:00.307Z" + "heapUsedMB": 73.5, + "heapTotalMB": 93.1, + "rssMB": 223.6, + "externalMB": 97.7, + "timestamp": "2026-04-20T18:02:42.032Z" }, "idle-session-startup": { - "heapUsedBytes": 77349144, - "heapTotalBytes": 96137216, - "rssBytes": 231256064, - "externalBytes": 102467662, - "timestamp": "2026-04-20T15:33:54.183Z" + "heapUsedMB": 69.8, + "heapTotalMB": 92.4, + "rssMB": 217.4, + "externalMB": 93.8, + "timestamp": "2026-04-20T18:02:36.294Z" }, "simple-prompt-response": { - "heapUsedBytes": 77721632, - "heapTotalBytes": 97087488, - "rssBytes": 233988096, - "externalBytes": 102465350, - "timestamp": "2026-04-20T15:33:56.085Z" + "heapUsedMB": 69.5, + "heapTotalMB": 92.4, + "rssMB": 216.1, + "externalMB": 93.8, + "timestamp": "2026-04-20T18:02:38.198Z" }, "resume-large-chat-with-messages": { - "heapUsedBytes": 892750792, - "heapTotalBytes": 969490432, - "rssBytes": 1128820736, - "externalBytes": 109232249, - "timestamp": "2026-04-20T15:36:19.408Z" + "heapUsedMB": 887.1, + "heapTotalMB": 954.3, + "rssMB": 1109.6, + "externalMB": 103.2, + "timestamp": "2026-04-20T18:04:59.671Z" }, "resume-large-chat": { - "heapUsedBytes": 907866808, - "heapTotalBytes": 976048128, - "rssBytes": 1138429952, - "externalBytes": 109420155, - "timestamp": "2026-04-20T15:35:26.843Z" + "heapUsedMB": 885.6, + "heapTotalMB": 955.6, + "rssMB": 1107.8, + "externalMB": 110.5, + "timestamp": "2026-04-20T18:04:06.526Z" }, "large-chat": { - "heapUsedBytes": 166125984, - "heapTotalBytes": 201601024, - "rssBytes": 748908544, - "externalBytes": 109001942, - "timestamp": "2026-04-20T15:34:32.960Z" + "heapUsedMB": 158.5, + "heapTotalMB": 193, + "rssMB": 787.9, + "externalMB": 104, + "timestamp": "2026-04-20T18:03:12.486Z" } } } diff --git a/packages/test-utils/src/memory-baselines.ts b/packages/test-utils/src/memory-baselines.ts index 3a4578cc504..bdcf0381b13 100644 --- a/packages/test-utils/src/memory-baselines.ts +++ b/packages/test-utils/src/memory-baselines.ts @@ -10,10 +10,10 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs'; * Baseline entry for a single memory test scenario. */ export interface MemoryBaseline { - heapUsedBytes: number; - heapTotalBytes: number; - rssBytes: number; - externalBytes: number; + heapUsedMB: number; + heapTotalMB: number; + rssMB: number; + externalMB: number; timestamp: string; } @@ -61,18 +61,18 @@ export function updateBaseline( path: string, scenarioName: string, measured: { - heapUsedBytes: number; - heapTotalBytes: number; - rssBytes: number; - externalBytes: number; + heapUsedMB: number; + heapTotalMB: number; + rssMB: number; + externalMB: number; }, ): void { const baselines = loadBaselines(path); baselines.scenarios[scenarioName] = { - heapUsedBytes: measured.heapUsedBytes, - heapTotalBytes: measured.heapTotalBytes, - rssBytes: measured.rssBytes, - externalBytes: measured.externalBytes, + heapUsedMB: measured.heapUsedMB, + heapTotalMB: measured.heapTotalMB, + rssMB: measured.rssMB, + externalMB: measured.externalMB, timestamp: new Date().toISOString(), }; saveBaselines(path, baselines); diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts index 04f7c390a33..12ecc5ed8c1 100644 --- a/packages/test-utils/src/memory-test-harness.ts +++ b/packages/test-utils/src/memory-test-harness.ts @@ -54,6 +54,12 @@ export interface MemoryTestHarnessOptions { baselinesPath: string; /** Default tolerance percentage (0-100). Default: 10 */ defaultTolerancePercent?: number; + /** Number of GC cycles to run before each snapshot. Default: 3 */ + gcCycles?: number; + /** Delay in ms between GC cycles. Default: 100 */ + gcDelayMs?: number; + /** Number of samples to take for median calculation. Default: 3 */ + sampleCount?: number; } /** @@ -168,10 +174,9 @@ export class MemoryTestHarness { let withinTolerance = true; if (baseline) { + const measuredMB = afterSnap.heapUsed / (1024 * 1024); deltaPercent = - ((afterSnap.heapUsed - baseline.heapUsedBytes) / - baseline.heapUsedBytes) * - 100; + ((measuredMB - baseline.heapUsedMB) / baseline.heapUsedMB) * 100; withinTolerance = deltaPercent <= tolerance; } @@ -272,16 +277,16 @@ export class MemoryTestHarness { return; // Don't fail if no baseline exists yet } + const measuredMB = result.finalHeapUsed / (1024 * 1024); const deltaPercent = - ((result.finalHeapUsed - result.baseline.heapUsedBytes) / - result.baseline.heapUsedBytes) * + ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) * 100; if (deltaPercent > tolerance) { throw new Error( `Memory regression detected for "${result.scenarioName}"!\n` + ` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` + - ` Baseline: ${formatMB(result.baseline.heapUsedBytes)} heap used\n` + + ` Baseline: ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` + ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + ` Peak heap: ${formatMB(result.peakHeapUsed)}\n` + ` Peak RSS: ${formatMB(result.peakRss)}\n` + @@ -294,12 +299,14 @@ export class MemoryTestHarness { * Update the baseline for a scenario with the current measured values. */ updateScenarioBaseline(result: MemoryTestResult): void { + const lastSnapshot = result.snapshots[result.snapshots.length - 1]; updateBaseline(this.baselinesPath, result.scenarioName, { - heapUsedBytes: result.finalHeapUsed, - heapTotalBytes: - result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0, - rssBytes: result.finalRss, - externalBytes: result.finalExternal, + heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)), + heapTotalMB: Number( + ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1), + ), + rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)), + externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)), }); // Reload baselines after update this.baselines = loadBaselines(this.baselinesPath); @@ -322,7 +329,7 @@ export class MemoryTestHarness { for (const result of resultsToReport) { const measured = formatMB(result.finalHeapUsed); const baseline = result.baseline - ? formatMB(result.baseline.heapUsedBytes) + ? `${result.baseline.heapUsedMB.toFixed(1)} MB` : 'N/A'; const delta = result.baseline ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%` From 74faa8dce1e23c3b826795248e8dfa12ebcbc1a8 Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 19:12:43 +0000 Subject: [PATCH 5/8] fix --- .../test-utils/src/memory-test-harness.ts | 108 +++++++++--------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts index 12ecc5ed8c1..54b59296c5b 100644 --- a/packages/test-utils/src/memory-test-harness.ts +++ b/packages/test-utils/src/memory-test-harness.ts @@ -198,6 +198,60 @@ export class MemoryTestHarness { return result; } + /** + * Assert that a scenario result is within the baseline tolerance. + * Throws an assertion error with details if it exceeds the threshold. + */ + assertWithinBaseline( + result: MemoryTestResult, + tolerancePercent?: number, + ): void { + const tolerance = tolerancePercent ?? this.defaultTolerancePercent; + + if (!result.baseline) { + console.warn( + `⚠ No baseline found for "${result.scenarioName}". ` + + `Run with UPDATE_MEMORY_BASELINES=true to create one. ` + + `Measured: ${formatMB(result.finalHeapUsed)} heap used.`, + ); + return; // Don't fail if no baseline exists yet + } + + const measuredMB = result.finalHeapUsed / (1024 * 1024); + const deltaPercent = + ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) * + 100; + + if (deltaPercent > tolerance) { + throw new Error( + `Memory regression detected for "${result.scenarioName}"!\n` + + ` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` + + ` Baseline: ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` + + ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + + ` Peak heap: ${formatMB(result.peakHeapUsed)}\n` + + ` Peak RSS: ${formatMB(result.peakRss)}\n` + + ` Peak External: ${formatMB(result.peakExternal)}`, + ); + } + } + + /** + * Update the baseline for a scenario with the current measured values. + */ + updateScenarioBaseline(result: MemoryTestResult): void { + const lastSnapshot = result.snapshots[result.snapshots.length - 1]; + updateBaseline(this.baselinesPath, result.scenarioName, { + heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)), + heapTotalMB: Number( + ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1), + ), + rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)), + externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)), + }); + // Reload baselines after update + this.baselines = loadBaselines(this.baselinesPath); + } + /** * Analyze snapshots to detect sustained leaks. * A leak is flagged if growth is observed in both phases. @@ -258,60 +312,6 @@ export class MemoryTestHarness { } } - /** - * Assert that a scenario result is within the baseline tolerance. - * Throws an assertion error with details if it exceeds the threshold. - */ - assertWithinBaseline( - result: MemoryTestResult, - tolerancePercent?: number, - ): void { - const tolerance = tolerancePercent ?? this.defaultTolerancePercent; - - if (!result.baseline) { - console.warn( - `⚠ No baseline found for "${result.scenarioName}". ` + - `Run with UPDATE_MEMORY_BASELINES=true to create one. ` + - `Measured: ${formatMB(result.finalHeapUsed)} heap used.`, - ); - return; // Don't fail if no baseline exists yet - } - - const measuredMB = result.finalHeapUsed / (1024 * 1024); - const deltaPercent = - ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) * - 100; - - if (deltaPercent > tolerance) { - throw new Error( - `Memory regression detected for "${result.scenarioName}"!\n` + - ` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` + - ` Baseline: ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` + - ` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` + - ` Peak heap: ${formatMB(result.peakHeapUsed)}\n` + - ` Peak RSS: ${formatMB(result.peakRss)}\n` + - ` Peak External: ${formatMB(result.peakExternal)}`, - ); - } - } - - /** - * Update the baseline for a scenario with the current measured values. - */ - updateScenarioBaseline(result: MemoryTestResult): void { - const lastSnapshot = result.snapshots[result.snapshots.length - 1]; - updateBaseline(this.baselinesPath, result.scenarioName, { - heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)), - heapTotalMB: Number( - ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1), - ), - rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)), - externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)), - }); - // Reload baselines after update - this.baselines = loadBaselines(this.baselinesPath); - } - /** * Generate a report with ASCII charts and summary table. * Uses the `asciichart` library for terminal visualization. From 146ab4f182ceafada64415a0e05b3846713ec65d Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Mon, 20 Apr 2026 19:55:51 +0000 Subject: [PATCH 6/8] add nanos --- packages/test-utils/src/test-rig.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index fa5ffa4c911..96a906f3198 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -1584,7 +1584,7 @@ export class TestRig { (dp.attributes?.['component'] as string) || 'unknown'; const seconds = dp.startTime?.[0] || 0; const nanos = dp.startTime?.[1] || 0; - const timeKey = `${sessionId}-${component}-${seconds}`; + const timeKey = `${sessionId}-${component}-${seconds}-${nanos}`; if (!snapshots[timeKey]) { snapshots[timeKey] = { From f4a154d7156257cccb9bab3d02b98ec4a035c416 Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Tue, 21 Apr 2026 14:45:51 +0000 Subject: [PATCH 7/8] remove unused func --- packages/test-utils/src/test-rig.ts | 62 ----------------------------- 1 file changed, 62 deletions(-) diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts index 96a906f3198..9374b573ac7 100644 --- a/packages/test-utils/src/test-rig.ts +++ b/packages/test-utils/src/test-rig.ts @@ -1615,68 +1615,6 @@ export class TestRig { return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp); } - readCpuMetrics(): { - userUs: number; - systemUs: number; - totalUs: number; - } { - const metrics = { - userUs: 0, - systemUs: 0, - totalUs: 0, - }; - - const logs = this._readAndParseTelemetryLog(); - for (const logData of logs) { - if (logData && logData.scopeMetrics) { - for (const scopeMetric of logData.scopeMetrics) { - for (const metric of scopeMetric.metrics) { - if (metric.descriptor.name === 'gemini_cli.cpu.usage') { - for (const dp of metric.dataPoints) { - const value = dp.value?.sum ?? 0; - // Currently cpu usage is recorded as a single total sum in core/metrics.ts - metrics.totalUs = value; - } - } - } - } - } - } - return metrics; - } - - readEventLoopMetrics(): { - p50: number; - p95: number; - max: number; - } { - const metrics = { - p50: 0, - p95: 0, - max: 0, - }; - - const logs = this._readAndParseTelemetryLog(); - for (const logData of logs) { - if (logData && logData.scopeMetrics) { - for (const scopeMetric of logData.scopeMetrics) { - for (const metric of scopeMetric.metrics) { - if (metric.descriptor.name === 'gemini_cli.event_loop.delay') { - for (const dp of metric.dataPoints) { - const percentile = dp.attributes?.['percentile']; - const value = dp.value?.sum ?? 0; - if (percentile === 'p50') metrics.p50 = value; - else if (percentile === 'p95') metrics.p95 = value; - else if (percentile === 'max') metrics.max = value; - } - } - } - } - } - } - return metrics; - } - async runInteractive(options?: { args?: string | string[]; approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; From d8960af18b1d463275c9758b9c08659f766c2bf6 Mon Sep 17 00:00:00 2001 From: Cynthia Long Date: Tue, 21 Apr 2026 15:25:13 +0000 Subject: [PATCH 8/8] typo --- packages/test-utils/src/memory-test-harness.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts index 54b59296c5b..5b82fb7df1c 100644 --- a/packages/test-utils/src/memory-test-harness.ts +++ b/packages/test-utils/src/memory-test-harness.ts @@ -264,9 +264,9 @@ export class MemoryTestHarness { return { leaked: false, message: 'Not enough snapshots to analyze' }; } - const snap1 = snapshots[snapshots.length - 3]!; - const snap2 = snapshots[snapshots.length - 2]!; - const snap3 = snapshots[snapshots.length - 1]!; + const snap1 = snapshots[snapshots.length - 3]; + const snap2 = snapshots[snapshots.length - 2]; + const snap3 = snapshots[snapshots.length - 1]; const growth1 = snap2.heapUsed - snap1.heapUsed; const growth2 = snap3.heapUsed - snap2.heapUsed; @@ -285,10 +285,10 @@ export class MemoryTestHarness { */ assertMemoryReturnsToBaseline( snapshots: MemorySnapshot[], - tolerancePercent: number = 15, + tolerancePercent: number = 10, ): void { if (snapshots.length < 3) { - return; // Need at least before, peak, after + throw new Error('Need at least 3 snapshots to check return to baseline'); } // Find the first non-zero snapshot as baseline