From c42343672c5deb003cf876fe0e7ace9ac97817eb Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 15:51:03 +0000
Subject: [PATCH 1/8] Update memory test to use gemini cli process

---
 memory-tests/baselines.json                   |  72 +++---
 memory-tests/memory-usage.test.ts             |  34 ++-
 .../src/ui/components/MemoryUsageDisplay.tsx  |  15 +-
 .../test-utils/src/memory-test-harness.ts     | 226 +++---------------
 packages/test-utils/src/test-rig.ts           | 169 ++++++++++++-
 5 files changed, 267 insertions(+), 249 deletions(-)

diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json
index 8000419a58c..5d49b397c28 100644
--- a/memory-tests/baselines.json
+++ b/memory-tests/baselines.json
@@ -1,55 +1,55 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-10T15:36:04.547Z",
+  "updatedAt": "2026-04-20T15:36:19.408Z",
   "scenarios": {
     "multi-turn-conversation": {
-      "heapUsedBytes": 120082704,
-      "heapTotalBytes": 177586176,
-      "rssBytes": 269172736,
-      "externalBytes": 4304053,
-      "timestamp": "2026-04-10T15:35:17.603Z"
+      "heapUsedBytes": 76944136,
+      "heapTotalBytes": 96825344,
+      "rssBytes": 233385984,
+      "externalBytes": 102471331,
+      "timestamp": "2026-04-20T15:33:58.271Z"
     },
     "multi-function-call-repo-search": {
-      "heapUsedBytes": 104644984,
-      "heapTotalBytes": 111575040,
-      "rssBytes": 204079104,
-      "externalBytes": 4304053,
-      "timestamp": "2026-04-10T15:35:22.480Z"
+      "heapUsedBytes": 77569064,
+      "heapTotalBytes": 96563200,
+      "rssBytes": 232521728,
+      "externalBytes": 102468611,
+      "timestamp": "2026-04-20T15:34:00.307Z"
     },
     "idle-session-startup": {
-      "heapUsedBytes": 119813672,
-      "heapTotalBytes": 177061888,
-      "rssBytes": 267943936,
-      "externalBytes": 4304053,
-      "timestamp": "2026-04-10T15:35:08.035Z"
+      "heapUsedBytes": 77349144,
+      "heapTotalBytes": 96137216,
+      "rssBytes": 231256064,
+      "externalBytes": 102467662,
+      "timestamp": "2026-04-20T15:33:54.183Z"
     },
     "simple-prompt-response": {
-      "heapUsedBytes": 119722064,
-      "heapTotalBytes": 177324032,
-      "rssBytes": 268812288,
-      "externalBytes": 4304053,
-      "timestamp": "2026-04-10T15:35:12.770Z"
+      "heapUsedBytes": 77721632,
+      "heapTotalBytes": 97087488,
+      "rssBytes": 233988096,
+      "externalBytes": 102465350,
+      "timestamp": "2026-04-20T15:33:56.085Z"
     },
     "resume-large-chat-with-messages": {
-      "heapUsedBytes": 106545568,
-      "heapTotalBytes": 111509504,
-      "rssBytes": 202596352,
-      "externalBytes": 4306101,
-      "timestamp": "2026-04-10T15:36:04.547Z"
+      "heapUsedBytes": 892750792,
+      "heapTotalBytes": 969490432,
+      "rssBytes": 1128820736,
+      "externalBytes": 109232249,
+      "timestamp": "2026-04-20T15:36:19.408Z"
     },
     "resume-large-chat": {
-      "heapUsedBytes": 106513760,
-      "heapTotalBytes": 111509504,
-      "rssBytes": 202596352,
-      "externalBytes": 4306101,
-      "timestamp": "2026-04-10T15:35:59.528Z"
+      "heapUsedBytes": 907866808,
+      "heapTotalBytes": 976048128,
+      "rssBytes": 1138429952,
+      "externalBytes": 109420155,
+      "timestamp": "2026-04-20T15:35:26.843Z"
     },
     "large-chat": {
-      "heapUsedBytes": 106471568,
-      "heapTotalBytes": 111509504,
-      "rssBytes": 202596352,
-      "externalBytes": 4306101,
-      "timestamp": "2026-04-10T15:35:53.180Z"
+      "heapUsedBytes": 166125984,
+      "heapTotalBytes": 201601024,
+      "rssBytes": 748908544,
+      "externalBytes": 109001942,
+      "timestamp": "2026-04-20T15:34:32.960Z"
     }
   }
 }
diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts
index eb363a01351..6a427402930 100644
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@@ -16,15 +16,21 @@ import {
   mkdirSync,
   rmSync,
 } from 'node:fs';
-import { randomUUID } from 'node:crypto';
+import { randomUUID, createHash } from 'node:crypto';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
 const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
+function getProjectHash(projectRoot: string): string {
+  return createHash('sha256').update(projectRoot).digest('hex');
+}
 const TOLERANCE_PERCENT = 10;
 
 // Fake API key for tests using fake responses
-const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
+const TEST_ENV = {
+  GEMINI_API_KEY: 'fake-memory-test-key',
+  GEMINI_MEMORY_MONITOR_INTERVAL: '100',
+};
 
 describe('Memory Usage Tests', () => {
   let harness: MemoryTestHarness;
@@ -56,6 +62,7 @@ describe('Memory Usage Tests', () => {
     });
 
     const result = await harness.runScenario(
+      rig,
       'idle-session-startup',
       async (recordSnapshot) => {
         await rig.run({
@@ -85,6 +92,7 @@ describe('Memory Usage Tests', () => {
     });
 
     const result = await harness.runScenario(
+      rig,
       'simple-prompt-response',
       async (recordSnapshot) => {
         await rig.run({
@@ -122,6 +130,7 @@ describe('Memory Usage Tests', () => {
     ];
 
     const result = await harness.runScenario(
+      rig,
       'multi-turn-conversation',
       async (recordSnapshot) => {
         // Run through all turns as a piped sequence
@@ -168,6 +177,7 @@ describe('Memory Usage Tests', () => {
     );
 
     const result = await harness.runScenario(
+      rig,
       'multi-function-call-repo-search',
       async (recordSnapshot) => {
         await rig.run({
@@ -228,6 +238,7 @@ describe('Memory Usage Tests', () => {
       });
 
       const result = await harness.runScenario(
+        rig,
         'large-chat',
         async (recordSnapshot) => {
           await rig.run({
@@ -257,19 +268,21 @@ describe('Memory Usage Tests', () => {
       });
 
       const result = await harness.runScenario(
+        rig,
         'resume-large-chat',
         async (recordSnapshot) => {
           // Ensure the history file is linked
           const targetChatsDir = join(
-            rig.testDir!,
+            rig.homeDir!,
+            '.gemini',
             'tmp',
-            'test-project-hash',
+            getProjectHash(rig.testDir!),
             'chats',
           );
           mkdirSync(targetChatsDir, { recursive: true });
           const targetHistoryPath = join(
             targetChatsDir,
-            'large-chat-session.json',
+            'session-large-chat.json',
           );
           if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
           copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -302,19 +315,21 @@ describe('Memory Usage Tests', () => {
       });
 
       const result = await harness.runScenario(
+        rig,
         'resume-large-chat-with-messages',
         async (recordSnapshot) => {
           // Ensure the history file is linked
           const targetChatsDir = join(
-            rig.testDir!,
+            rig.homeDir!,
+            '.gemini',
             'tmp',
-            'test-project-hash',
+            getProjectHash(rig.testDir!),
             'chats',
           );
           mkdirSync(targetChatsDir, { recursive: true });
           const targetHistoryPath = join(
             targetChatsDir,
-            'large-chat-session.json',
+            'session-large-chat.json',
           );
           if (existsSync(targetHistoryPath)) rmSync(targetHistoryPath);
           copyFileSync(sharedHistoryPath, targetHistoryPath);
@@ -457,6 +472,9 @@ async function generateSharedLargeChatData(tempDir: string) {
   // Generate responses for resumed chat
   const resumeResponsesStream = createWriteStream(resumeResponsesPath);
   for (let i = 0; i < 5; i++) {
+    // Doubling up on non-streaming responses to satisfy classifier and complexity checks
+    resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
+    resumeResponsesStream.write(JSON.stringify(summaryResponse) + '\n');
     resumeResponsesStream.write(JSON.stringify(complexityResponse) + '\n');
     resumeResponsesStream.write(
       JSON.stringify({
diff --git a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
index 709f76baf31..a625b817068 100644
--- a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
+++ b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
@@ -15,7 +15,8 @@ export const MemoryUsageDisplay: React.FC<{
   color?: string;
   isActive?: boolean;
 }> = ({ color = theme.text.primary, isActive = true }) => {
-  const [memoryUsage, setMemoryUsage] = useState<string>('');
+  const [rssUsage, setRssUsage] = useState<string>('');
+  const [heapUsage, setHeapUsage] = useState<string>('');
   const [memoryUsageColor, setMemoryUsageColor] = useState<string>(color);
 
   useEffect(() => {
@@ -24,10 +25,12 @@ export const MemoryUsageDisplay: React.FC<{
     }
 
     const updateMemory = () => {
-      const usage = process.memoryUsage().rss;
-      setMemoryUsage(formatBytes(usage));
+      const usage = process.memoryUsage();
+      const rss = usage.rss;
+      setRssUsage(formatBytes(rss));
+      setHeapUsage(formatBytes(usage.heapUsed));
       setMemoryUsageColor(
-        usage >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color,
+        rss >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color,
       );
     };
 
@@ -38,7 +41,9 @@ export const MemoryUsageDisplay: React.FC<{
 
   return (
     <Box>
-      <Text color={memoryUsageColor}>{memoryUsage}</Text>
+      <Text color={memoryUsageColor}>
+        {rssUsage} (Heap: {heapUsage})
+      </Text>
     </Box>
   );
 };
diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index c12c2204589..863b200a133 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -4,10 +4,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import v8 from 'node:v8';
-import { setTimeout as sleep } from 'node:timers/promises';
 import { loadBaselines, updateBaseline } from './memory-baselines.js';
 import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
+import type { TestRig } from './test-rig.js';
 
 /** Configuration for asciichart plot function. */
 interface PlotConfig {
@@ -28,9 +27,6 @@ export interface MemorySnapshot {
   heapTotal: number;
   rss: number;
   external: number;
-  arrayBuffers: number;
-  heapSizeLimit: number;
-  heapSpaces: any[];
 }
 
 /**
@@ -58,22 +54,13 @@ export interface MemoryTestHarnessOptions {
   baselinesPath: string;
   /** Default tolerance percentage (0-100). Default: 10 */
   defaultTolerancePercent?: number;
-  /** Number of GC cycles to run before each snapshot. Default: 3 */
-  gcCycles?: number;
-  /** Delay in ms between GC cycles. Default: 100 */
-  gcDelayMs?: number;
-  /** Number of samples to take for median calculation. Default: 3 */
-  sampleCount?: number;
-  /** Pause in ms between samples. Default: 50 */
-  samplePauseMs?: number;
 }
 
 /**
  * MemoryTestHarness provides infrastructure for running memory usage tests.
  *
  * It handles:
- * - Forcing V8 garbage collection to reduce noise
- * - Taking V8 heap snapshots for accurate memory measurement
+ * - Extracting memory metrics from CLI process telemetry
  * - Comparing against baselines with configurable tolerance
  * - Generating ASCII chart reports of memory trends
  */
@@ -81,88 +68,44 @@ export class MemoryTestHarness {
   private baselines: MemoryBaselineFile;
   private readonly baselinesPath: string;
   private readonly defaultTolerancePercent: number;
-  private readonly gcCycles: number;
-  private readonly gcDelayMs: number;
-  private readonly sampleCount: number;
-  private readonly samplePauseMs: number;
   private allResults: MemoryTestResult[] = [];
 
   constructor(options: MemoryTestHarnessOptions) {
     this.baselinesPath = options.baselinesPath;
     this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
-    this.gcCycles = options.gcCycles ?? 3;
-    this.gcDelayMs = options.gcDelayMs ?? 100;
-    this.sampleCount = options.sampleCount ?? 3;
-    this.samplePauseMs = options.samplePauseMs ?? 50;
     this.baselines = loadBaselines(this.baselinesPath);
   }
 
   /**
-   * Force garbage collection multiple times and take a V8 heap snapshot.
-   * Forces GC multiple times with delays to allow weak references and
-   * FinalizationRegistry callbacks to run, reducing measurement noise.
+   * Extract memory snapshot from TestRig telemetry.
    */
-  async takeSnapshot(label: string = 'snapshot'): Promise<MemorySnapshot> {
-    await this.forceGC();
-
-    const memUsage = process.memoryUsage();
-    const heapStats = v8.getHeapStatistics();
-
-    return {
-      timestamp: Date.now(),
-      label,
-      heapUsed: memUsage.heapUsed,
-      heapTotal: memUsage.heapTotal,
-      rss: memUsage.rss,
-      external: memUsage.external,
-      arrayBuffers: memUsage.arrayBuffers,
-      heapSizeLimit: heapStats.heap_size_limit,
-      heapSpaces: v8.getHeapSpaceStatistics(),
-    };
-  }
-
-  /**
-   * Take multiple snapshot samples and return the median to reduce noise.
-   */
-  async takeMedianSnapshot(
-    label: string = 'median',
-    count?: number,
+  async takeSnapshot(
+    rig: TestRig,
+    label: string = 'snapshot',
   ): Promise<MemorySnapshot> {
-    const samples: MemorySnapshot[] = [];
-    const numSamples = count ?? this.sampleCount;
-
-    for (let i = 0; i < numSamples; i++) {
-      samples.push(await this.takeSnapshot(`${label}_sample_${i}`));
-      if (i < numSamples - 1) {
-        await sleep(this.samplePauseMs);
-      }
-    }
-
-    // Sort by heapUsed and take the median
-    samples.sort((a, b) => a.heapUsed - b.heapUsed);
-    const medianIdx = Math.floor(samples.length / 2);
-    const median = samples[medianIdx]!;
+    const metrics = rig.readMemoryMetrics();
 
     return {
-      ...median,
-      label,
       timestamp: Date.now(),
+      label,
+      heapUsed: metrics.heapUsed,
+      heapTotal: metrics.heapTotal,
+      rss: metrics.rss,
+      external: metrics.external,
     };
   }
 
   /**
    * Run a memory test scenario.
    *
-   * Takes before/after snapshots around the scenario function, collects
-   * intermediate snapshots if the scenario provides them, and compares
-   * the result against the stored baseline.
-   *
+   * @param rig - The TestRig instance running the CLI
    * @param name - Scenario name (must match baseline key)
    * @param fn - Async function that executes the scenario. Receives a
    *   `recordSnapshot` callback for recording intermediate snapshots.
    * @param tolerancePercent - Override default tolerance for this scenario
    */
   async runScenario(
+    rig: TestRig,
     name: string,
     fn: (
       recordSnapshot: (label: string) => Promise<MemorySnapshot>,
@@ -172,27 +115,33 @@ export class MemoryTestHarness {
     const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
     const snapshots: MemorySnapshot[] = [];
 
+    // Record initial snapshot
+    const beforeSnap = await this.takeSnapshot(rig, 'before');
+    snapshots.push(beforeSnap);
+
     // Record a callback for intermediate snapshots
     const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
-      const snap = await this.takeMedianSnapshot(label);
+      // Small delay to allow telemetry to flush if needed
+      await rig.waitForTelemetryReady();
+      const snap = await this.takeSnapshot(rig, label);
       snapshots.push(snap);
       return snap;
     };
 
-    // Before snapshot
-    const beforeSnap = await this.takeMedianSnapshot('before');
-    snapshots.push(beforeSnap);
-
     // Run the scenario
     await fn(recordSnapshot);
 
-    // After snapshot (median of multiple samples)
-    const afterSnap = await this.takeMedianSnapshot('after');
+    // Final wait for telemetry to ensure everything is flushed
+    await rig.waitForTelemetryReady();
+
+    // After snapshot
+    const afterSnap = await this.takeSnapshot(rig, 'after');
     snapshots.push(afterSnap);
 
     // Calculate peak values
     const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
     const peakRss = Math.max(...snapshots.map((s) => s.rss));
+    const peakExternal = Math.max(...snapshots.map((s) => s.external));
 
     // Get baseline
     const baseline = this.baselines.scenarios[name];
@@ -209,8 +158,6 @@ export class MemoryTestHarness {
       withinTolerance = deltaPercent <= tolerance;
     }
 
-    const peakExternal = Math.max(...snapshots.map((s) => s.external));
-
     const result: MemoryTestResult = {
       scenarioName: name,
       snapshots,
@@ -281,105 +228,6 @@ export class MemoryTestHarness {
     this.baselines = loadBaselines(this.baselinesPath);
   }
 
-  /**
-   * Analyze snapshots to detect sustained leaks across 3 snapshots.
-   * A leak is flagged if growth is observed in both phases for any heap space.
-   */
-  analyzeSnapshots(
-    snapshots: MemorySnapshot[],
-    thresholdBytes: number = 1024 * 1024, // 1 MB
-  ): { leaked: boolean; message: string } {
-    if (snapshots.length < 3) {
-      return { leaked: false, message: 'Not enough snapshots to analyze' };
-    }
-
-    const snap1 = snapshots[snapshots.length - 3];
-    const snap2 = snapshots[snapshots.length - 2];
-    const snap3 = snapshots[snapshots.length - 1];
-
-    if (!snap1 || !snap2 || !snap3) {
-      return { leaked: false, message: 'Missing snapshots' };
-    }
-
-    const spaceNames = new Set<string>();
-    snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
-    snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
-    snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
-
-    let hasSustainedGrowth = false;
-    const growthDetails: string[] = [];
-
-    for (const name of spaceNames) {
-      const size1 =
-        snap1.heapSpaces.find((s: any) => s.space_name === name)
-          ?.space_used_size ?? 0;
-      const size2 =
-        snap2.heapSpaces.find((s: any) => s.space_name === name)
-          ?.space_used_size ?? 0;
-      const size3 =
-        snap3.heapSpaces.find((s: any) => s.space_name === name)
-          ?.space_used_size ?? 0;
-
-      const growth1 = size2 - size1;
-      const growth2 = size3 - size2;
-
-      if (growth1 > thresholdBytes && growth2 > thresholdBytes) {
-        hasSustainedGrowth = true;
-        growthDetails.push(
-          `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`,
-        );
-      }
-    }
-
-    let message = '';
-    if (hasSustainedGrowth) {
-      message =
-        `Memory bloat detected in heap spaces:\n  ` +
-        growthDetails.join('\n  ');
-    } else {
-      message = `No sustained growth detected in any heap space above threshold.`;
-    }
-
-    return { leaked: hasSustainedGrowth, message };
-  }
-
-  /**
-   * Assert that memory returns to a baseline level after a peak.
-   * Useful for verifying that large tool outputs are not retained.
-   */
-  assertMemoryReturnsToBaseline(
-    snapshots: MemorySnapshot[],
-    tolerancePercent: number = 10,
-  ): void {
-    if (snapshots.length < 3) {
-      throw new Error('Need at least 3 snapshots to check return to baseline');
-    }
-
-    const baseline = snapshots[0]; // Assume first is baseline
-    const peak = snapshots.reduce(
-      (max, s) => (s.heapUsed > max.heapUsed ? s : max),
-      snapshots[0],
-    );
-    const final = snapshots[snapshots.length - 1];
-
-    if (!baseline || !peak || !final) {
-      throw new Error('Missing snapshots for return to baseline check');
-    }
-
-    const tolerance = baseline.heapUsed * (tolerancePercent / 100);
-    const delta = final.heapUsed - baseline.heapUsed;
-
-    if (delta > tolerance) {
-      throw new Error(
-        `Memory did not return to baseline!\n` +
-          `  Baseline: ${formatMB(baseline.heapUsed)}\n` +
-          `  Peak:     ${formatMB(peak.heapUsed)}\n` +
-          `  Final:    ${formatMB(final.heapUsed)}\n` +
-          `  Delta:    ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
-      );
-    }
-  }
-
   /**
    * Generate a report with ASCII charts and summary table.
    * Uses the `asciichart` library for terminal visualization.
@@ -461,26 +309,6 @@ export class MemoryTestHarness {
     console.log(report);
     return report;
   }
-
-  /**
-   * Force V8 garbage collection.
-   * Runs multiple GC cycles with delays to allow weak references
-   * and FinalizationRegistry callbacks to run.
-   */
-  private async forceGC(): Promise<void> {
-    if (typeof globalThis.gc !== 'function') {
-      throw new Error(
-        'global.gc() not available. Run with --expose-gc for accurate measurements.',
-      );
-    }
-
-    for (let i = 0; i < this.gcCycles; i++) {
-      globalThis.gc();
-      if (i < this.gcCycles - 1) {
-        await sleep(this.gcDelayMs);
-      }
-    }
-  }
 }
 
 /**
diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
index 906a7760bf3..9c06ab56163 100644
--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@@ -205,6 +205,27 @@ export interface MetricDataPoint {
   endTime?: string;
 }
 
+export interface TelemetryMetric {
+  descriptor: {
+    name: string;
+    type?: string;
+    description?: string;
+    unit?: string;
+  };
+  dataPoints: MetricDataPoint[];
+}
+export interface MetricDataPoint {
+  attributes?: Record<string, unknown>;
+  value?: {
+    sum?: number;
+    min?: number;
+    max?: number;
+    count?: number;
+  };
+  startTime?: [number, number];
+  endTime?: string;
+}
+
 export interface TelemetryMetric {
   descriptor: {
     name: string;
@@ -1475,7 +1496,7 @@ export class TestRig {
   readMetric(metricName: string): TelemetryMetric | null {
     const logs = this._readAndParseTelemetryLog();
     for (const logData of logs) {
-      if (logData.scopeMetrics) {
+      if (logData && logData.scopeMetrics) {
         for (const scopeMetric of logData.scopeMetrics) {
           for (const metric of scopeMetric.metrics) {
             if (metric.descriptor.name === `gemini_cli.${metricName}`) {
@@ -1488,6 +1509,152 @@ export class TestRig {
     return null;
   }
 
+  readMemoryMetrics(): {
+    heapUsed: number;
+    heapTotal: number;
+    rss: number;
+    external: number;
+  } {
+    // For simplicity, we just look for the last values in the logs
+    const metrics = {
+      heapUsed: 0,
+      heapTotal: 0,
+      rss: 0,
+      external: 0,
+    };
+
+    // We want to return the memory snapshot that has the peak RSS usage.
+    // Group data points by their session, component and start time (seconds) to represent a single snapshot.
+    const snapshots: Record<string, typeof metrics> = {};
+
+    const logs = this._readAndParseTelemetryLog();
+    for (const logData of logs) {
+      if (logData && logData.scopeMetrics) {
+        for (const scopeMetric of logData.scopeMetrics) {
+          for (const metric of scopeMetric.metrics) {
+            if (metric.descriptor.name === 'gemini_cli.memory.usage') {
+              for (const dp of metric.dataPoints) {
+                // Group by session, component and seconds portion of start time to identify a single snapshot interval.
+                // Different metrics in the same snapshot might have slightly different nanosecond timestamps.
+                const sessionId =
+                  (dp.attributes?.['session.id'] as string) || 'unknown';
+                const component =
+                  (dp.attributes?.['component'] as string) || 'unknown';
+                const seconds = dp.startTime?.[0] || 0;
+                const timeKey = `${sessionId}-${component}-${seconds}`;
+
+                if (!snapshots[timeKey]) {
+                  snapshots[timeKey] = {
+                    rss: 0,
+                    heapUsed: 0,
+                    heapTotal: 0,
+                    external: 0,
+                  };
+                }
+
+                const type = dp.attributes?.['memory_type'];
+                const value = dp.value?.max ?? dp.value?.sum ?? 0;
+
+                if (type === 'heap_used') snapshots[timeKey].heapUsed = value;
+                else if (type === 'heap_total')
+                  snapshots[timeKey].heapTotal = value;
+                else if (type === 'rss') snapshots[timeKey].rss = value;
+                else if (type === 'external')
+                  snapshots[timeKey].external = value;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    // Find the snapshot with the highest RSS
+    for (const snapshot of Object.values(snapshots)) {
+      if (snapshot.rss > metrics.rss) {
+        metrics.rss = snapshot.rss;
+        metrics.heapUsed = snapshot.heapUsed;
+        metrics.heapTotal = snapshot.heapTotal;
+        metrics.external = snapshot.external;
+      }
+    }
+
+    // Fallback: if we didn't find any RSS but found heap, use the max heap
+    if (metrics.rss === 0) {
+      for (const snapshot of Object.values(snapshots)) {
+        if (snapshot.heapUsed > metrics.heapUsed) {
+          metrics.rss = snapshot.rss;
+          metrics.heapUsed = snapshot.heapUsed;
+          metrics.heapTotal = snapshot.heapTotal;
+          metrics.external = snapshot.external;
+        }
+      }
+    }
+
+    return metrics;
+  }
+
+  readCpuMetrics(): {
+    userUs: number;
+    systemUs: number;
+    totalUs: number;
+  } {
+    const metrics = {
+      userUs: 0,
+      systemUs: 0,
+      totalUs: 0,
+    };
+
+    const logs = this._readAndParseTelemetryLog();
+    for (const logData of logs) {
+      if (logData && logData.scopeMetrics) {
+        for (const scopeMetric of logData.scopeMetrics) {
+          for (const metric of scopeMetric.metrics) {
+            if (metric.descriptor.name === 'gemini_cli.cpu.usage') {
+              for (const dp of metric.dataPoints) {
+                const value = dp.value?.sum ?? 0;
+                // Currently cpu usage is recorded as a single total sum in core/metrics.ts
+                metrics.totalUs = value;
+              }
+            }
+          }
+        }
+      }
+    }
+    return metrics;
+  }
+
+  readEventLoopMetrics(): {
+    p50: number;
+    p95: number;
+    max: number;
+  } {
+    const metrics = {
+      p50: 0,
+      p95: 0,
+      max: 0,
+    };
+
+    const logs = this._readAndParseTelemetryLog();
+    for (const logData of logs) {
+      if (logData && logData.scopeMetrics) {
+        for (const scopeMetric of logData.scopeMetrics) {
+          for (const metric of scopeMetric.metrics) {
+            if (metric.descriptor.name === 'gemini_cli.event_loop.delay') {
+              for (const dp of metric.dataPoints) {
+                const percentile = dp.attributes?.['percentile'];
+                const value = dp.value?.sum ?? 0;
+                if (percentile === 'p50') metrics.p50 = value;
+                else if (percentile === 'p95') metrics.p95 = value;
+                else if (percentile === 'max') metrics.max = value;
+              }
+            }
+          }
+        }
+      }
+    }
+    return metrics;
+  }
+
   async runInteractive(options?: {
     args?: string | string[];
     approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';

From 48463354fe3f086d8219586e2f50674b493a464a Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 16:45:02 +0000
Subject: [PATCH 2/8] refinement

---
 memory-tests/memory-usage.test.ts             |   4 +
 .../test-utils/src/memory-test-harness.ts     |  89 +++++++++++-
 packages/test-utils/src/test-rig.ts           | 134 ++++++++++--------
 3 files changed, 165 insertions(+), 62 deletions(-)

diff --git a/memory-tests/memory-usage.test.ts b/memory-tests/memory-usage.test.ts
index 6a427402930..31275baed4f 100644
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@@ -153,6 +153,9 @@ describe('Memory Usage Tests', () => {
       );
     } else {
       harness.assertWithinBaseline(result);
+      harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
+      const { leaked, message } = harness.analyzeSnapshots(result.snapshots);
+      if (leaked) console.warn(`⚠ ${message}`);
     }
   });
 
@@ -199,6 +202,7 @@ describe('Memory Usage Tests', () => {
       );
     } else {
       harness.assertWithinBaseline(result);
+      harness.assertMemoryReturnsToBaseline(result.snapshots, 20);
     }
   });
 
diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index 863b200a133..04f7c390a33 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -82,11 +82,12 @@ export class MemoryTestHarness {
   async takeSnapshot(
     rig: TestRig,
     label: string = 'snapshot',
+    strategy: 'peak' | 'last' = 'last',
   ): Promise<MemorySnapshot> {
-    const metrics = rig.readMemoryMetrics();
+    const metrics = rig.readMemoryMetrics(strategy);
 
     return {
-      timestamp: Date.now(),
+      timestamp: metrics.timestamp,
       label,
       heapUsed: metrics.heapUsed,
       heapTotal: metrics.heapTotal,
@@ -138,10 +139,26 @@ export class MemoryTestHarness {
     const afterSnap = await this.takeSnapshot(rig, 'after');
     snapshots.push(afterSnap);
 
-    // Calculate peak values
-    const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
-    const peakRss = Math.max(...snapshots.map((s) => s.rss));
-    const peakExternal = Math.max(...snapshots.map((s) => s.external));
+    // Calculate peak values from ALL snapshots seen during the scenario
+    const allSnapshots = rig.readAllMemorySnapshots();
+    const scenarioSnapshots = allSnapshots.filter(
+      (s) =>
+        s.timestamp >= beforeSnap.timestamp &&
+        s.timestamp <= afterSnap.timestamp,
+    );
+
+    const peakHeapUsed = Math.max(
+      ...scenarioSnapshots.map((s) => s.heapUsed),
+      ...snapshots.map((s) => s.heapUsed),
+    );
+    const peakRss = Math.max(
+      ...scenarioSnapshots.map((s) => s.rss),
+      ...snapshots.map((s) => s.rss),
+    );
+    const peakExternal = Math.max(
+      ...scenarioSnapshots.map((s) => s.external),
+      ...snapshots.map((s) => s.external),
+    );
 
     // Get baseline
     const baseline = this.baselines.scenarios[name];
@@ -176,6 +193,66 @@ export class MemoryTestHarness {
     return result;
   }
 
+  /**
+   * Analyze snapshots to detect sustained leaks.
+   * A leak is flagged if growth is observed in both phases.
+   */
+  analyzeSnapshots(
+    snapshots: MemorySnapshot[],
+    thresholdBytes: number = 1024 * 1024, // 1 MB
+  ): { leaked: boolean; message: string } {
+    if (snapshots.length < 3) {
+      return { leaked: false, message: 'Not enough snapshots to analyze' };
+    }
+
+    const snap1 = snapshots[snapshots.length - 3]!;
+    const snap2 = snapshots[snapshots.length - 2]!;
+    const snap3 = snapshots[snapshots.length - 1]!;
+
+    const growth1 = snap2.heapUsed - snap1.heapUsed;
+    const growth2 = snap3.heapUsed - snap2.heapUsed;
+
+    const leaked = growth1 > thresholdBytes && growth2 > thresholdBytes;
+    let message = leaked
+      ? `Memory bloat detected: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`
+      : `No sustained growth detected above threshold.`;
+
+    return { leaked, message };
+  }
+
+  /**
+   * Assert that memory returns to a baseline level after a peak.
+   * Useful for verifying that large tool outputs or history are not retained.
+   */
+  assertMemoryReturnsToBaseline(
+    snapshots: MemorySnapshot[],
+    tolerancePercent: number = 15,
+  ): void {
+    if (snapshots.length < 3) {
+      return; // Need at least before, peak, after
+    }
+
+    // Find the first non-zero snapshot as baseline
+    const baseline = snapshots.find((s) => s.heapUsed > 0);
+    if (!baseline) {
+      return; // No memory reported yet
+    }
+
+    const final = snapshots[snapshots.length - 1]!;
+
+    const tolerance = baseline.heapUsed * (tolerancePercent / 100);
+    const delta = final.heapUsed - baseline.heapUsed;
+
+    if (delta > tolerance) {
+      throw new Error(
+        `Memory did not return to baseline!\n` +
+          `  Baseline: ${formatMB(baseline.heapUsed)} (${baseline.label})\n` +
+          `  Final:    ${formatMB(final.heapUsed)} (${final.label})\n` +
+          `  Delta:    ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
+      );
+    }
+  }
+
   /**
    * Assert that a scenario result is within the baseline tolerance.
    * Throws an assertion error with details if it exceeds the threshold.
diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
index 9c06ab56163..fa5ffa4c911 100644
--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@@ -205,27 +205,6 @@ export interface MetricDataPoint {
   endTime?: string;
 }
 
-export interface TelemetryMetric {
-  descriptor: {
-    name: string;
-    type?: string;
-    description?: string;
-    unit?: string;
-  };
-  dataPoints: MetricDataPoint[];
-}
-export interface MetricDataPoint {
-  attributes?: Record<string, unknown>;
-  value?: {
-    sum?: number;
-    min?: number;
-    max?: number;
-    count?: number;
-  };
-  startTime?: [number, number];
-  endTime?: string;
-}
-
 export interface TelemetryMetric {
   descriptor: {
     name: string;
@@ -1509,23 +1488,88 @@ export class TestRig {
     return null;
   }
 
-  readMemoryMetrics(): {
+  readMemoryMetrics(strategy: 'peak' | 'last' = 'peak'): {
+    timestamp: number;
     heapUsed: number;
     heapTotal: number;
     rss: number;
     external: number;
   } {
-    // For simplicity, we just look for the last values in the logs
-    const metrics = {
-      heapUsed: 0,
-      heapTotal: 0,
-      rss: 0,
-      external: 0,
+    const snapshots = this._getMemorySnapshots();
+    if (snapshots.length === 0) {
+      return {
+        timestamp: Date.now(),
+        heapUsed: 0,
+        heapTotal: 0,
+        rss: 0,
+        external: 0,
+      };
+    }
+
+    if (strategy === 'last') {
+      const last = snapshots[snapshots.length - 1];
+      return {
+        timestamp: last.timestamp,
+        heapUsed: last.heapUsed,
+        heapTotal: last.heapTotal,
+        rss: last.rss,
+        external: last.external,
+      };
+    }
+
+    // Find the snapshot with the highest RSS
+    let peak = snapshots[0];
+    for (const snapshot of snapshots) {
+      if (snapshot.rss > peak.rss) {
+        peak = snapshot;
+      }
+    }
+
+    // Fallback: if we didn't find any RSS but found heap, use the max heap
+    if (peak.rss === 0) {
+      for (const snapshot of snapshots) {
+        if (snapshot.heapUsed > peak.heapUsed) {
+          peak = snapshot;
+        }
+      }
+    }
+
+    return {
+      timestamp: peak.timestamp,
+      heapUsed: peak.heapUsed,
+      heapTotal: peak.heapTotal,
+      rss: peak.rss,
+      external: peak.external,
     };
+  }
 
-    // We want to return the memory snapshot that has the peak RSS usage.
-    // Group data points by their session, component and start time (seconds) to represent a single snapshot.
-    const snapshots: Record<string, typeof metrics> = {};
+  readAllMemorySnapshots(): {
+    timestamp: number;
+    heapUsed: number;
+    heapTotal: number;
+    rss: number;
+    external: number;
+  }[] {
+    return this._getMemorySnapshots();
+  }
+
+  private _getMemorySnapshots(): {
+    timestamp: number;
+    heapUsed: number;
+    heapTotal: number;
+    rss: number;
+    external: number;
+  }[] {
+    const snapshots: Record<
+      string,
+      {
+        timestamp: number;
+        heapUsed: number;
+        heapTotal: number;
+        rss: number;
+        external: number;
+      }
+    > = {};
 
     const logs = this._readAndParseTelemetryLog();
     for (const logData of logs) {
@@ -1534,17 +1578,17 @@ export class TestRig {
           for (const metric of scopeMetric.metrics) {
             if (metric.descriptor.name === 'gemini_cli.memory.usage') {
               for (const dp of metric.dataPoints) {
-                // Group by session, component and seconds portion of start time to identify a single snapshot interval.
-                // Different metrics in the same snapshot might have slightly different nanosecond timestamps.
                 const sessionId =
                   (dp.attributes?.['session.id'] as string) || 'unknown';
                 const component =
                   (dp.attributes?.['component'] as string) || 'unknown';
                 const seconds = dp.startTime?.[0] || 0;
+                const nanos = dp.startTime?.[1] || 0;
                 const timeKey = `${sessionId}-${component}-${seconds}`;
 
                 if (!snapshots[timeKey]) {
                   snapshots[timeKey] = {
+                    timestamp: seconds * 1000 + Math.floor(nanos / 1000000),
                     rss: 0,
                     heapUsed: 0,
                     heapTotal: 0,
@@ -1568,29 +1612,7 @@ export class TestRig {
       }
     }
 
-    // Find the snapshot with the highest RSS
-    for (const snapshot of Object.values(snapshots)) {
-      if (snapshot.rss > metrics.rss) {
-        metrics.rss = snapshot.rss;
-        metrics.heapUsed = snapshot.heapUsed;
-        metrics.heapTotal = snapshot.heapTotal;
-        metrics.external = snapshot.external;
-      }
-    }
-
-    // Fallback: if we didn't find any RSS but found heap, use the max heap
-    if (metrics.rss === 0) {
-      for (const snapshot of Object.values(snapshots)) {
-        if (snapshot.heapUsed > metrics.heapUsed) {
-          metrics.rss = snapshot.rss;
-          metrics.heapUsed = snapshot.heapUsed;
-          metrics.heapTotal = snapshot.heapTotal;
-          metrics.external = snapshot.external;
-        }
-      }
-    }
-
-    return metrics;
+    return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp);
   }
 
   readCpuMetrics(): {

From 01c109a8bf612c92aead6547c120d14e0af74cc0 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 17:01:37 +0000
Subject: [PATCH 3/8] revert change in ui

---
 .../cli/src/ui/components/MemoryUsageDisplay.tsx  | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
index a625b817068..709f76baf31 100644
--- a/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
+++ b/packages/cli/src/ui/components/MemoryUsageDisplay.tsx
@@ -15,8 +15,7 @@ export const MemoryUsageDisplay: React.FC<{
   color?: string;
   isActive?: boolean;
 }> = ({ color = theme.text.primary, isActive = true }) => {
-  const [rssUsage, setRssUsage] = useState<string>('');
-  const [heapUsage, setHeapUsage] = useState<string>('');
+  const [memoryUsage, setMemoryUsage] = useState<string>('');
   const [memoryUsageColor, setMemoryUsageColor] = useState<string>(color);
 
   useEffect(() => {
@@ -25,12 +24,10 @@ export const MemoryUsageDisplay: React.FC<{
     }
 
     const updateMemory = () => {
-      const usage = process.memoryUsage();
-      const rss = usage.rss;
-      setRssUsage(formatBytes(rss));
-      setHeapUsage(formatBytes(usage.heapUsed));
+      const usage = process.memoryUsage().rss;
+      setMemoryUsage(formatBytes(usage));
       setMemoryUsageColor(
-        rss >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color,
+        usage >= 2 * 1024 * 1024 * 1024 ? theme.status.error : color,
       );
     };
 
@@ -41,9 +38,7 @@ export const MemoryUsageDisplay: React.FC<{
 
   return (
     <Box>
-      <Text color={memoryUsageColor}>
-        {rssUsage} (Heap: {heapUsage})
-      </Text>
+      <Text color={memoryUsageColor}>{memoryUsage}</Text>
     </Box>
   );
 };

From 5799277eed38c42a2bbc05ebe7be8432f1f67d37 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 19:07:49 +0000
Subject: [PATCH 4/8] update bytes to mb

---
 memory-tests/baselines.json                   | 72 +++++++++----------
 packages/test-utils/src/memory-baselines.ts   | 24 +++----
 .../test-utils/src/memory-test-harness.ts     | 31 ++++----
 3 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/memory-tests/baselines.json b/memory-tests/baselines.json
index 5d49b397c28..240e3d4fd46 100644
--- a/memory-tests/baselines.json
+++ b/memory-tests/baselines.json
@@ -1,55 +1,55 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-20T15:36:19.408Z",
+  "updatedAt": "2026-04-20T18:04:59.671Z",
   "scenarios": {
     "multi-turn-conversation": {
-      "heapUsedBytes": 76944136,
-      "heapTotalBytes": 96825344,
-      "rssBytes": 233385984,
-      "externalBytes": 102471331,
-      "timestamp": "2026-04-20T15:33:58.271Z"
+      "heapUsedMB": 68.8,
+      "heapTotalMB": 91.2,
+      "rssMB": 215.4,
+      "externalMB": 93.8,
+      "timestamp": "2026-04-20T18:02:40.101Z"
     },
     "multi-function-call-repo-search": {
-      "heapUsedBytes": 77569064,
-      "heapTotalBytes": 96563200,
-      "rssBytes": 232521728,
-      "externalBytes": 102468611,
-      "timestamp": "2026-04-20T15:34:00.307Z"
+      "heapUsedMB": 73.5,
+      "heapTotalMB": 93.1,
+      "rssMB": 223.6,
+      "externalMB": 97.7,
+      "timestamp": "2026-04-20T18:02:42.032Z"
     },
     "idle-session-startup": {
-      "heapUsedBytes": 77349144,
-      "heapTotalBytes": 96137216,
-      "rssBytes": 231256064,
-      "externalBytes": 102467662,
-      "timestamp": "2026-04-20T15:33:54.183Z"
+      "heapUsedMB": 69.8,
+      "heapTotalMB": 92.4,
+      "rssMB": 217.4,
+      "externalMB": 93.8,
+      "timestamp": "2026-04-20T18:02:36.294Z"
     },
     "simple-prompt-response": {
-      "heapUsedBytes": 77721632,
-      "heapTotalBytes": 97087488,
-      "rssBytes": 233988096,
-      "externalBytes": 102465350,
-      "timestamp": "2026-04-20T15:33:56.085Z"
+      "heapUsedMB": 69.5,
+      "heapTotalMB": 92.4,
+      "rssMB": 216.1,
+      "externalMB": 93.8,
+      "timestamp": "2026-04-20T18:02:38.198Z"
     },
     "resume-large-chat-with-messages": {
-      "heapUsedBytes": 892750792,
-      "heapTotalBytes": 969490432,
-      "rssBytes": 1128820736,
-      "externalBytes": 109232249,
-      "timestamp": "2026-04-20T15:36:19.408Z"
+      "heapUsedMB": 887.1,
+      "heapTotalMB": 954.3,
+      "rssMB": 1109.6,
+      "externalMB": 103.2,
+      "timestamp": "2026-04-20T18:04:59.671Z"
     },
     "resume-large-chat": {
-      "heapUsedBytes": 907866808,
-      "heapTotalBytes": 976048128,
-      "rssBytes": 1138429952,
-      "externalBytes": 109420155,
-      "timestamp": "2026-04-20T15:35:26.843Z"
+      "heapUsedMB": 885.6,
+      "heapTotalMB": 955.6,
+      "rssMB": 1107.8,
+      "externalMB": 110.5,
+      "timestamp": "2026-04-20T18:04:06.526Z"
     },
     "large-chat": {
-      "heapUsedBytes": 166125984,
-      "heapTotalBytes": 201601024,
-      "rssBytes": 748908544,
-      "externalBytes": 109001942,
-      "timestamp": "2026-04-20T15:34:32.960Z"
+      "heapUsedMB": 158.5,
+      "heapTotalMB": 193,
+      "rssMB": 787.9,
+      "externalMB": 104,
+      "timestamp": "2026-04-20T18:03:12.486Z"
     }
   }
 }
diff --git a/packages/test-utils/src/memory-baselines.ts b/packages/test-utils/src/memory-baselines.ts
index 3a4578cc504..bdcf0381b13 100644
--- a/packages/test-utils/src/memory-baselines.ts
+++ b/packages/test-utils/src/memory-baselines.ts
@@ -10,10 +10,10 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
  * Baseline entry for a single memory test scenario.
  */
 export interface MemoryBaseline {
-  heapUsedBytes: number;
-  heapTotalBytes: number;
-  rssBytes: number;
-  externalBytes: number;
+  heapUsedMB: number;
+  heapTotalMB: number;
+  rssMB: number;
+  externalMB: number;
   timestamp: string;
 }
 
@@ -61,18 +61,18 @@ export function updateBaseline(
   path: string,
   scenarioName: string,
   measured: {
-    heapUsedBytes: number;
-    heapTotalBytes: number;
-    rssBytes: number;
-    externalBytes: number;
+    heapUsedMB: number;
+    heapTotalMB: number;
+    rssMB: number;
+    externalMB: number;
   },
 ): void {
   const baselines = loadBaselines(path);
   baselines.scenarios[scenarioName] = {
-    heapUsedBytes: measured.heapUsedBytes,
-    heapTotalBytes: measured.heapTotalBytes,
-    rssBytes: measured.rssBytes,
-    externalBytes: measured.externalBytes,
+    heapUsedMB: measured.heapUsedMB,
+    heapTotalMB: measured.heapTotalMB,
+    rssMB: measured.rssMB,
+    externalMB: measured.externalMB,
     timestamp: new Date().toISOString(),
   };
   saveBaselines(path, baselines);
diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index 04f7c390a33..12ecc5ed8c1 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -54,6 +54,12 @@ export interface MemoryTestHarnessOptions {
   baselinesPath: string;
   /** Default tolerance percentage (0-100). Default: 10 */
   defaultTolerancePercent?: number;
+  /** Number of GC cycles to run before each snapshot. Default: 3 */
+  gcCycles?: number;
+  /** Delay in ms between GC cycles. Default: 100 */
+  gcDelayMs?: number;
+  /** Number of samples to take for median calculation. Default: 3 */
+  sampleCount?: number;
 }
 
 /**
@@ -168,10 +174,9 @@ export class MemoryTestHarness {
     let withinTolerance = true;
 
     if (baseline) {
+      const measuredMB = afterSnap.heapUsed / (1024 * 1024);
       deltaPercent =
-        ((afterSnap.heapUsed - baseline.heapUsedBytes) /
-          baseline.heapUsedBytes) *
-        100;
+        ((measuredMB - baseline.heapUsedMB) / baseline.heapUsedMB) * 100;
       withinTolerance = deltaPercent <= tolerance;
     }
 
@@ -272,16 +277,16 @@ export class MemoryTestHarness {
       return; // Don't fail if no baseline exists yet
     }
 
+    const measuredMB = result.finalHeapUsed / (1024 * 1024);
     const deltaPercent =
-      ((result.finalHeapUsed - result.baseline.heapUsedBytes) /
-        result.baseline.heapUsedBytes) *
+      ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) *
       100;
 
     if (deltaPercent > tolerance) {
       throw new Error(
         `Memory regression detected for "${result.scenarioName}"!\n` +
           `  Measured:  ${formatMB(result.finalHeapUsed)} heap used\n` +
-          `  Baseline:  ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
+          `  Baseline:  ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` +
           `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
           `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
           `  Peak RSS:  ${formatMB(result.peakRss)}\n` +
@@ -294,12 +299,14 @@ export class MemoryTestHarness {
    * Update the baseline for a scenario with the current measured values.
    */
   updateScenarioBaseline(result: MemoryTestResult): void {
+    const lastSnapshot = result.snapshots[result.snapshots.length - 1];
     updateBaseline(this.baselinesPath, result.scenarioName, {
-      heapUsedBytes: result.finalHeapUsed,
-      heapTotalBytes:
-        result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
-      rssBytes: result.finalRss,
-      externalBytes: result.finalExternal,
+      heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)),
+      heapTotalMB: Number(
+        ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1),
+      ),
+      rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)),
+      externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)),
     });
     // Reload baselines after update
     this.baselines = loadBaselines(this.baselinesPath);
@@ -322,7 +329,7 @@ export class MemoryTestHarness {
     for (const result of resultsToReport) {
       const measured = formatMB(result.finalHeapUsed);
       const baseline = result.baseline
-        ? formatMB(result.baseline.heapUsedBytes)
+        ? `${result.baseline.heapUsedMB.toFixed(1)} MB`
         : 'N/A';
       const delta = result.baseline
         ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`

From 74faa8dce1e23c3b826795248e8dfa12ebcbc1a8 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 19:12:43 +0000
Subject: [PATCH 5/8] fix

---
 .../test-utils/src/memory-test-harness.ts     | 108 +++++++++---------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index 12ecc5ed8c1..54b59296c5b 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -198,6 +198,60 @@ export class MemoryTestHarness {
     return result;
   }
 
+  /**
+   * Assert that a scenario result is within the baseline tolerance.
+   * Throws an assertion error with details if it exceeds the threshold.
+   */
+  assertWithinBaseline(
+    result: MemoryTestResult,
+    tolerancePercent?: number,
+  ): void {
+    const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
+
+    if (!result.baseline) {
+      console.warn(
+        `⚠ No baseline found for "${result.scenarioName}". ` +
+          `Run with UPDATE_MEMORY_BASELINES=true to create one. ` +
+          `Measured: ${formatMB(result.finalHeapUsed)} heap used.`,
+      );
+      return; // Don't fail if no baseline exists yet
+    }
+
+    const measuredMB = result.finalHeapUsed / (1024 * 1024);
+    const deltaPercent =
+      ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) *
+      100;
+
+    if (deltaPercent > tolerance) {
+      throw new Error(
+        `Memory regression detected for "${result.scenarioName}"!\n` +
+          `  Measured:  ${formatMB(result.finalHeapUsed)} heap used\n` +
+          `  Baseline:  ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` +
+          `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
+          `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
+          `  Peak RSS:  ${formatMB(result.peakRss)}\n` +
+          `  Peak External:  ${formatMB(result.peakExternal)}`,
+      );
+    }
+  }
+
+  /**
+   * Update the baseline for a scenario with the current measured values.
+   */
+  updateScenarioBaseline(result: MemoryTestResult): void {
+    const lastSnapshot = result.snapshots[result.snapshots.length - 1];
+    updateBaseline(this.baselinesPath, result.scenarioName, {
+      heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)),
+      heapTotalMB: Number(
+        ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1),
+      ),
+      rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)),
+      externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)),
+    });
+    // Reload baselines after update
+    this.baselines = loadBaselines(this.baselinesPath);
+  }
+
   /**
    * Analyze snapshots to detect sustained leaks.
    * A leak is flagged if growth is observed in both phases.
@@ -258,60 +312,6 @@ export class MemoryTestHarness {
     }
   }
 
-  /**
-   * Assert that a scenario result is within the baseline tolerance.
-   * Throws an assertion error with details if it exceeds the threshold.
-   */
-  assertWithinBaseline(
-    result: MemoryTestResult,
-    tolerancePercent?: number,
-  ): void {
-    const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
-
-    if (!result.baseline) {
-      console.warn(
-        `⚠ No baseline found for "${result.scenarioName}". ` +
-          `Run with UPDATE_MEMORY_BASELINES=true to create one. ` +
-          `Measured: ${formatMB(result.finalHeapUsed)} heap used.`,
-      );
-      return; // Don't fail if no baseline exists yet
-    }
-
-    const measuredMB = result.finalHeapUsed / (1024 * 1024);
-    const deltaPercent =
-      ((measuredMB - result.baseline.heapUsedMB) / result.baseline.heapUsedMB) *
-      100;
-
-    if (deltaPercent > tolerance) {
-      throw new Error(
-        `Memory regression detected for "${result.scenarioName}"!\n` +
-          `  Measured:  ${formatMB(result.finalHeapUsed)} heap used\n` +
-          `  Baseline:  ${result.baseline.heapUsedMB.toFixed(1)} MB heap used\n` +
-          `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
-          `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
-          `  Peak RSS:  ${formatMB(result.peakRss)}\n` +
-          `  Peak External:  ${formatMB(result.peakExternal)}`,
-      );
-    }
-  }
-
-  /**
-   * Update the baseline for a scenario with the current measured values.
-   */
-  updateScenarioBaseline(result: MemoryTestResult): void {
-    const lastSnapshot = result.snapshots[result.snapshots.length - 1];
-    updateBaseline(this.baselinesPath, result.scenarioName, {
-      heapUsedMB: Number((result.finalHeapUsed / (1024 * 1024)).toFixed(1)),
-      heapTotalMB: Number(
-        ((lastSnapshot?.heapTotal ?? 0) / (1024 * 1024)).toFixed(1),
-      ),
-      rssMB: Number((result.finalRss / (1024 * 1024)).toFixed(1)),
-      externalMB: Number((result.finalExternal / (1024 * 1024)).toFixed(1)),
-    });
-    // Reload baselines after update
-    this.baselines = loadBaselines(this.baselinesPath);
-  }
-
   /**
    * Generate a report with ASCII charts and summary table.
    * Uses the `asciichart` library for terminal visualization.

From 146ab4f182ceafada64415a0e05b3846713ec65d Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 20 Apr 2026 19:55:51 +0000
Subject: [PATCH 6/8] add nanos

---
 packages/test-utils/src/test-rig.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
index fa5ffa4c911..96a906f3198 100644
--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@@ -1584,7 +1584,7 @@ export class TestRig {
                   (dp.attributes?.['component'] as string) || 'unknown';
                 const seconds = dp.startTime?.[0] || 0;
                 const nanos = dp.startTime?.[1] || 0;
-                const timeKey = `${sessionId}-${component}-${seconds}`;
+                const timeKey = `${sessionId}-${component}-${seconds}-${nanos}`;
 
                 if (!snapshots[timeKey]) {
                   snapshots[timeKey] = {

From f4a154d7156257cccb9bab3d02b98ec4a035c416 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Tue, 21 Apr 2026 14:45:51 +0000
Subject: [PATCH 7/8] remove unused func

---
 packages/test-utils/src/test-rig.ts | 62 -----------------------------
 1 file changed, 62 deletions(-)

diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
index 96a906f3198..9374b573ac7 100644
--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@@ -1615,68 +1615,6 @@ export class TestRig {
     return Object.values(snapshots).sort((a, b) => a.timestamp - b.timestamp);
   }
 
-  readCpuMetrics(): {
-    userUs: number;
-    systemUs: number;
-    totalUs: number;
-  } {
-    const metrics = {
-      userUs: 0,
-      systemUs: 0,
-      totalUs: 0,
-    };
-
-    const logs = this._readAndParseTelemetryLog();
-    for (const logData of logs) {
-      if (logData && logData.scopeMetrics) {
-        for (const scopeMetric of logData.scopeMetrics) {
-          for (const metric of scopeMetric.metrics) {
-            if (metric.descriptor.name === 'gemini_cli.cpu.usage') {
-              for (const dp of metric.dataPoints) {
-                const value = dp.value?.sum ?? 0;
-                // Currently cpu usage is recorded as a single total sum in core/metrics.ts
-                metrics.totalUs = value;
-              }
-            }
-          }
-        }
-      }
-    }
-    return metrics;
-  }
-
-  readEventLoopMetrics(): {
-    p50: number;
-    p95: number;
-    max: number;
-  } {
-    const metrics = {
-      p50: 0,
-      p95: 0,
-      max: 0,
-    };
-
-    const logs = this._readAndParseTelemetryLog();
-    for (const logData of logs) {
-      if (logData && logData.scopeMetrics) {
-        for (const scopeMetric of logData.scopeMetrics) {
-          for (const metric of scopeMetric.metrics) {
-            if (metric.descriptor.name === 'gemini_cli.event_loop.delay') {
-              for (const dp of metric.dataPoints) {
-                const percentile = dp.attributes?.['percentile'];
-                const value = dp.value?.sum ?? 0;
-                if (percentile === 'p50') metrics.p50 = value;
-                else if (percentile === 'p95') metrics.p95 = value;
-                else if (percentile === 'max') metrics.max = value;
-              }
-            }
-          }
-        }
-      }
-    }
-    return metrics;
-  }
-
   async runInteractive(options?: {
     args?: string | string[];
     approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan';

From d8960af18b1d463275c9758b9c08659f766c2bf6 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Tue, 21 Apr 2026 15:25:13 +0000
Subject: [PATCH 8/8] typo

---
 packages/test-utils/src/memory-test-harness.ts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/test-utils/src/memory-test-harness.ts b/packages/test-utils/src/memory-test-harness.ts
index 54b59296c5b..5b82fb7df1c 100644
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@@ -264,9 +264,9 @@ export class MemoryTestHarness {
       return { leaked: false, message: 'Not enough snapshots to analyze' };
     }
 
-    const snap1 = snapshots[snapshots.length - 3]!;
-    const snap2 = snapshots[snapshots.length - 2]!;
-    const snap3 = snapshots[snapshots.length - 1]!;
+    const snap1 = snapshots[snapshots.length - 3];
+    const snap2 = snapshots[snapshots.length - 2];
+    const snap3 = snapshots[snapshots.length - 1];
 
     const growth1 = snap2.heapUsed - snap1.heapUsed;
     const growth2 = snap3.heapUsed - snap2.heapUsed;
@@ -285,10 +285,10 @@ export class MemoryTestHarness {
    */
   assertMemoryReturnsToBaseline(
     snapshots: MemorySnapshot[],
-    tolerancePercent: number = 15,
+    tolerancePercent: number = 10,
   ): void {
     if (snapshots.length < 3) {
-      return; // Need at least before, peak, after
+      throw new Error('Need at least 3 snapshots to check return to baseline');
     }
 
     // Find the first non-zero snapshot as baseline