From e79799e00a58c21d4d2f3ebbd639ccdfde07eae7 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Sat, 11 Apr 2026 01:56:46 +0000
Subject: [PATCH 1/7] test(perf): add long conversation scenario

This adds a performance test that loads a 60MB chat history and measures the time it takes to resume the session, type text, execute a command, and scroll through the buffer. This addresses issue #24865.
---
 perf-tests/baselines.json           |   7 +-
 perf-tests/perf-usage.test.ts       | 132 +++++++++++++++++++++++++++-
 perf-tests/perf.long-chat.responses |   4 +
 3 files changed, 141 insertions(+), 2 deletions(-)
 create mode 100644 perf-tests/perf.long-chat.responses

diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 1dd52a52132..83cd7100170 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-09T02:30:22.000Z",
+  "updatedAt": "2026-04-11T01:52:23.183Z",
   "scenarios": {
     "cold-startup-time": {
       "wallClockMs": 927.553249999999,
@@ -21,6 +21,11 @@
       "wallClockMs": 1119.9,
       "cpuTotalUs": 2100,
       "timestamp": "2026-04-09T02:30:22.000Z"
+    },
+    "long-conversation": {
+      "wallClockMs": 4199.5024319999975,
+      "cpuTotalUs": 292959,
+      "timestamp": "2026-04-11T01:52:23.183Z"
     }
   }
 }
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index 1a361eda5d4..e2756d97dcb 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -8,7 +8,13 @@ import { describe, it, beforeAll, afterAll } from 'vitest';
 import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { existsSync, readFileSync } from 'node:fs';
+import {
+  existsSync,
+  readFileSync,
+  mkdirSync,
+  copyFileSync,
+  writeFileSync,
+} from 'node:fs';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
@@ -266,4 +272,128 @@ describe('CPU Performance Tests', () => {
       harness.assertWithinBaseline(result);
     }
   });
+
+  it('long-conversation: input, command, and scroll latency', async () => {
+    const LARGE_CHAT_SOURCE = join(
+      __dirname,
+      '..',
+      'memory-tests',
+      'large-chat-session.json',
+    );
+
+    const result = await harness.runScenario('long-conversation', async () => {
+      const rig = new TestRig();
+      try {
+        rig.setup('perf-long-conversation', {
+          fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
+        });
+
+        const SESSION_ID =
+          'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
+        const identifier = 'perf-long-conversation';
+
+        // Manually setup the project registry so the CLI knows this project's ID
+        const geminiDir = join(rig.homeDir!, '.gemini');
+        mkdirSync(geminiDir, { recursive: true });
+        const registryPath = join(geminiDir, 'projects.json');
+        const projects = { [rig.testDir!]: identifier };
+        if (process.platform === 'win32') {
+          projects[rig.testDir!.toLowerCase()] = identifier;
+        }
+        writeFileSync(registryPath, JSON.stringify({ projects }));
+
+        // Create the temp dir and ownership marker
+        const projectTempDir = join(geminiDir, 'tmp', identifier);
+        mkdirSync(projectTempDir, { recursive: true });
+        writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
+
+        // Setup the large chat history file
+        const targetChatsDir = join(projectTempDir, 'chats');
+        mkdirSync(targetChatsDir, { recursive: true });
+        const sessionFilePath = join(
+          targetChatsDir,
+          `session-${SESSION_ID}.json`,
+        );
+        copyFileSync(LARGE_CHAT_SOURCE, sessionFilePath);
+
+        if (process.env['DEBUG']) {
+          console.log(`[PERF DEBUG] rig.testDir: ${rig.testDir}`);
+          console.log(`[PERF DEBUG] rig.homeDir: ${rig.homeDir}`);
+          console.log(`[PERF DEBUG] Registry path: ${registryPath}`);
+          console.log(
+            `[PERF DEBUG] Registry content: ${readFileSync(registryPath, 'utf8')}`,
+          );
+          console.log(`[PERF DEBUG] Session file path: ${sessionFilePath}`);
+          console.log(
+            `[PERF DEBUG] Session file exists: ${existsSync(sessionFilePath)}`,
+          );
+        }
+
+        let sessionLoadTimeMs = 0;
+        let avgTypingLatencyMs = 0;
+        let commandExecutionTimeMs = 0;
+        let scrollingLatencyMs = 0;
+
+        const snapshot = await harness.measureWithEventLoop(
+          'long-conversation-full',
+          async () => {
+            // 1. Measure Session Load Time
+            harness.startTimer('session-load-time');
+            const run = await rig.runInteractive({
+              args: ['--resume', 'latest', '--debug'],
+              env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+            });
+            const loadSnapshot = harness.stopTimer('session-load-time');
+            sessionLoadTimeMs = loadSnapshot.wallClockMs;
+
+            // 2. Measure Typing Latency (average over 10 chars)
+            const testString = 'Hello Gemini';
+            harness.startTimer('typing-latency');
+            await run.type(testString);
+            const typeSnapshot = harness.stopTimer('typing-latency');
+            avgTypingLatencyMs = typeSnapshot.wallClockMs / testString.length;
+
+            // 3. Measure Simple Command Execution
+            harness.startTimer('command-execution-time');
+            await run.sendText('\r'); // Submit the "Hello Gemini" prompt
+            await run.expectText(
+              'I am a large conversation model response.',
+              30000,
+            );
+            const cmdSnapshot = harness.stopTimer('command-execution-time');
+            commandExecutionTimeMs = cmdSnapshot.wallClockMs;
+
+            // 4. Measure Scrolling Performance (Up/Down)
+            harness.startTimer('scrolling-latency');
+            // Simulate PageUp/PageDown multiple times
+            for (let i = 0; i < 5; i++) {
+              await run.sendKeys('\u001b[5~'); // PageUp
+              await run.sendKeys('\u001b[6~'); // PageDown
+            }
+            const scrollSnapshot = harness.stopTimer('scrolling-latency');
+            scrollingLatencyMs = scrollSnapshot.wallClockMs;
+
+            await run.kill();
+          },
+        );
+
+        // Add the sub-metrics to the main snapshot
+        return {
+          ...snapshot,
+          sessionLoadTimeMs,
+          avgTypingLatencyMs,
+          commandExecutionTimeMs,
+          scrollingLatencyMs,
+        };
+      } finally {
+        await rig.cleanup();
+      }
+    });
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
 });
diff --git a/perf-tests/perf.long-chat.responses b/perf-tests/perf.long-chat.responses
new file mode 100644
index 00000000000..7cf057e5a4b
--- /dev/null
+++ b/perf-tests/perf.long-chat.responses
@@ -0,0 +1,4 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"complexity_reasoning\":\"simple\",\"complexity_score\":1}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I am a large conversation model response."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"candidatesTokenCount":10,"promptTokenCount":20,"totalTokenCount":30}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"originalSummary\":\"large chat summary\",\"events\":[]}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"countTokens","response":{"totalTokens":100}}

From 4f93af81847c2bf7045277faf443f70029889aca Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 13 Apr 2026 19:36:40 +0000
Subject: [PATCH 2/7] update test

---
 perf-tests/baselines.json     |  32 +++-
 perf-tests/perf-usage.test.ts | 308 ++++++++++++++++++++++------------
 2 files changed, 231 insertions(+), 109 deletions(-)

diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 83cd7100170..61409d1fe46 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-11T01:52:23.183Z",
+  "updatedAt": "2026-04-13T19:21:34.123Z",
   "scenarios": {
     "cold-startup-time": {
       "wallClockMs": 927.553249999999,
@@ -26,6 +26,36 @@
       "wallClockMs": 4199.5024319999975,
       "cpuTotalUs": 292959,
       "timestamp": "2026-04-11T01:52:23.183Z"
+    },
+    "long-conversation-resume": {
+      "wallClockMs": 3006.8672899999947,
+      "cpuTotalUs": 220905,
+      "timestamp": "2026-04-13T19:20:42.672Z"
+    },
+    "long-conversation-typing": {
+      "wallClockMs": 361.24042400000326,
+      "cpuTotalUs": 12844,
+      "timestamp": "2026-04-13T19:20:56.118Z"
+    },
+    "long-conversation-execution": {
+      "wallClockMs": 248.57246799999848,
+      "cpuTotalUs": 5118,
+      "timestamp": "2026-04-13T19:21:08.755Z"
+    },
+    "long-conversation-scrolling": {
+      "wallClockMs": 210.18672699999297,
+      "cpuTotalUs": 7911,
+      "timestamp": "2026-04-13T18:32:14.226Z"
+    },
+    "long-conversation-terminal-scrolling": {
+      "wallClockMs": 210.52472200000193,
+      "cpuTotalUs": 8412,
+      "timestamp": "2026-04-13T19:21:21.244Z"
+    },
+    "long-conversation-alternate-scrolling": {
+      "wallClockMs": 208.16203599999426,
+      "cpuTotalUs": 9763,
+      "timestamp": "2026-04-13T19:21:34.123Z"
     }
   }
 }
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index e2756d97dcb..e02ae4e51ca 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -171,7 +171,7 @@ describe('CPU Performance Tests', () => {
           const snapshot = await harness.measureWithEventLoop(
             'high-volume-output',
             async () => {
-              const runResult = await rig.run({
+              await rig.run({
                 args: ['Generate 1M lines of output'],
                 timeout: 120000,
                 env: {
@@ -182,7 +182,6 @@ describe('CPU Performance Tests', () => {
                   DEBUG: 'true',
                 },
               });
-              console.log(`  Child Process Output:`, runResult);
             },
           );
 
@@ -273,7 +272,11 @@ describe('CPU Performance Tests', () => {
     }
   });
 
-  it('long-conversation: input, command, and scroll latency', async () => {
+  describe('long-conversation', () => {
+    let rig: TestRig;
+    const identifier = 'perf-long-conversation';
+    const SESSION_ID =
+      'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
     const LARGE_CHAT_SOURCE = join(
       __dirname,
       '..',
@@ -281,119 +284,208 @@ describe('CPU Performance Tests', () => {
       'large-chat-session.json',
     );
 
-    const result = await harness.runScenario('long-conversation', async () => {
-      const rig = new TestRig();
-      try {
-        rig.setup('perf-long-conversation', {
-          fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
-        });
-
-        const SESSION_ID =
-          'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
-        const identifier = 'perf-long-conversation';
-
-        // Manually setup the project registry so the CLI knows this project's ID
-        const geminiDir = join(rig.homeDir!, '.gemini');
-        mkdirSync(geminiDir, { recursive: true });
-        const registryPath = join(geminiDir, 'projects.json');
-        const projects = { [rig.testDir!]: identifier };
-        if (process.platform === 'win32') {
-          projects[rig.testDir!.toLowerCase()] = identifier;
-        }
-        writeFileSync(registryPath, JSON.stringify({ projects }));
-
-        // Create the temp dir and ownership marker
-        const projectTempDir = join(geminiDir, 'tmp', identifier);
-        mkdirSync(projectTempDir, { recursive: true });
-        writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
-
-        // Setup the large chat history file
-        const targetChatsDir = join(projectTempDir, 'chats');
-        mkdirSync(targetChatsDir, { recursive: true });
-        const sessionFilePath = join(
-          targetChatsDir,
-          `session-${SESSION_ID}.json`,
+    beforeAll(async () => {
+      if (!existsSync(LARGE_CHAT_SOURCE)) {
+        throw new Error(
+          `Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`,
         );
-        copyFileSync(LARGE_CHAT_SOURCE, sessionFilePath);
-
-        if (process.env['DEBUG']) {
-          console.log(`[PERF DEBUG] rig.testDir: ${rig.testDir}`);
-          console.log(`[PERF DEBUG] rig.homeDir: ${rig.homeDir}`);
-          console.log(`[PERF DEBUG] Registry path: ${registryPath}`);
-          console.log(
-            `[PERF DEBUG] Registry content: ${readFileSync(registryPath, 'utf8')}`,
+      }
+
+      rig = new TestRig();
+      rig.setup(identifier, {
+        fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
+      });
+
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      const projectTempDir = join(geminiDir, 'tmp', identifier);
+      const targetChatsDir = join(projectTempDir, 'chats');
+
+      mkdirSync(targetChatsDir, { recursive: true });
+      writeFileSync(
+        join(geminiDir, 'projects.json'),
+        JSON.stringify({
+          projects: { [rig.testDir!]: identifier },
+        }),
+      );
+      writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
+      copyFileSync(
+        LARGE_CHAT_SOURCE,
+        join(targetChatsDir, `session-${SESSION_ID}.json`),
+      );
+    });
+
+    afterAll(async () => {
+      await rig.cleanup();
+    });
+
+    it('session-load: resume a 60MB chat history', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-resume',
+        async () => {
+          const snapshot = await harness.measureWithEventLoop(
+            'resume',
+            async () => {
+              const run = await rig.runInteractive({
+                args: ['--resume', 'latest'],
+                env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+              });
+              await run.kill();
+            },
           );
-          console.log(`[PERF DEBUG] Session file path: ${sessionFilePath}`);
-          console.log(
-            `[PERF DEBUG] Session file exists: ${existsSync(sessionFilePath)}`,
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('typing: latency when typing into a large session', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-typing',
+        async () => {
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+          });
+
+          const snapshot = await harness.measureWithEventLoop(
+            'typing',
+            async () => {
+              await run.type('Hello');
+            },
           );
-        }
 
-        let sessionLoadTimeMs = 0;
-        let avgTypingLatencyMs = 0;
-        let commandExecutionTimeMs = 0;
-        let scrollingLatencyMs = 0;
-
-        const snapshot = await harness.measureWithEventLoop(
-          'long-conversation-full',
-          async () => {
-            // 1. Measure Session Load Time
-            harness.startTimer('session-load-time');
-            const run = await rig.runInteractive({
-              args: ['--resume', 'latest', '--debug'],
-              env: { GEMINI_API_KEY: 'fake-perf-test-key' },
-            });
-            const loadSnapshot = harness.stopTimer('session-load-time');
-            sessionLoadTimeMs = loadSnapshot.wallClockMs;
-
-            // 2. Measure Typing Latency (average over 10 chars)
-            const testString = 'Hello Gemini';
-            harness.startTimer('typing-latency');
-            await run.type(testString);
-            const typeSnapshot = harness.stopTimer('typing-latency');
-            avgTypingLatencyMs = typeSnapshot.wallClockMs / testString.length;
-
-            // 3. Measure Simple Command Execution
-            harness.startTimer('command-execution-time');
-            await run.sendText('\r'); // Submit the "Hello Gemini" prompt
-            await run.expectText(
-              'I am a large conversation model response.',
-              30000,
-            );
-            const cmdSnapshot = harness.stopTimer('command-execution-time');
-            commandExecutionTimeMs = cmdSnapshot.wallClockMs;
-
-            // 4. Measure Scrolling Performance (Up/Down)
-            harness.startTimer('scrolling-latency');
-            // Simulate PageUp/PageDown multiple times
-            for (let i = 0; i < 5; i++) {
-              await run.sendKeys('\u001b[5~'); // PageUp
-              await run.sendKeys('\u001b[6~'); // PageDown
-            }
-            const scrollSnapshot = harness.stopTimer('scrolling-latency');
-            scrollingLatencyMs = scrollSnapshot.wallClockMs;
+          await run.kill();
+          return snapshot;
+        },
+      );
 
-            await run.kill();
-          },
-        );
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
 
-        // Add the sub-metrics to the main snapshot
-        return {
-          ...snapshot,
-          sessionLoadTimeMs,
-          avgTypingLatencyMs,
-          commandExecutionTimeMs,
-          scrollingLatencyMs,
-        };
-      } finally {
-        await rig.cleanup();
+    it('execution: response latency for a simple shell command', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-execution',
+        async () => {
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+          });
+
+          await run.expectText('type your message');
+
+          const snapshot = await harness.measureWithEventLoop(
+            'execution',
+            async () => {
+              await run.sendKeys('!echo hi\r');
+              await run.expectText('hi');
+            },
+          );
+
+          await run.kill();
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
       }
     });
 
-    if (UPDATE_BASELINES) {
-      harness.updateScenarioBaseline(result);
-    } else {
-      harness.assertWithinBaseline(result);
-    }
+    it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-terminal-scrolling',
+        async () => {
+          // Enable terminalBuffer to intentionally test CLI scrolling logic
+          const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
+          writeFileSync(
+            settingsPath,
+            JSON.stringify({
+              security: { folderTrust: { enabled: false } },
+              ui: { terminalBuffer: true },
+            }),
+          );
+
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+          });
+
+          await run.expectText('type your message');
+
+          const snapshot = await harness.measureWithEventLoop(
+            'terminal-scrolling',
+            async () => {
+              for (let i = 0; i < 5; i++) {
+                await run.sendKeys('\u001b[5~'); // PageUp
+                await run.sendKeys('\u001b[6~'); // PageDown
+              }
+            },
+          );
+
+          await run.kill();
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-alternate-scrolling',
+        async () => {
+          // Enable useAlternateBuffer to intentionally test CLI scrolling logic
+          const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
+          writeFileSync(
+            settingsPath,
+            JSON.stringify({
+              security: { folderTrust: { enabled: false } },
+              ui: { useAlternateBuffer: true },
+            }),
+          );
+
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+          });
+
+          await run.expectText('type your message');
+
+          const snapshot = await harness.measureWithEventLoop(
+            'alternate-scrolling',
+            async () => {
+              for (let i = 0; i < 5; i++) {
+                await run.sendKeys('\u001b[5~'); // PageUp
+                await run.sendKeys('\u001b[6~'); // PageDown
+              }
+            },
+          );
+
+          await run.kill();
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
   });
 });

From 5091aaee3c0206cddf3a703dcb88093cdaeab11d Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 13 Apr 2026 19:50:42 +0000
Subject: [PATCH 3/7] update baseline

---
 perf-tests/baselines.json | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 61409d1fe46..504395df254 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-13T19:21:34.123Z",
+  "updatedAt": "2026-04-13T19:49:01.577Z",
   "scenarios": {
     "cold-startup-time": {
       "wallClockMs": 927.553249999999,
@@ -28,19 +28,19 @@
       "timestamp": "2026-04-11T01:52:23.183Z"
     },
     "long-conversation-resume": {
-      "wallClockMs": 3006.8672899999947,
-      "cpuTotalUs": 220905,
-      "timestamp": "2026-04-13T19:20:42.672Z"
+      "wallClockMs": 2807.285507000008,
+      "cpuTotalUs": 21912,
+      "timestamp": "2026-04-13T19:48:10.905Z"
     },
     "long-conversation-typing": {
-      "wallClockMs": 361.24042400000326,
-      "cpuTotalUs": 12844,
-      "timestamp": "2026-04-13T19:20:56.118Z"
+      "wallClockMs": 339.29195900000923,
+      "cpuTotalUs": 10295,
+      "timestamp": "2026-04-13T19:48:24.011Z"
     },
     "long-conversation-execution": {
-      "wallClockMs": 248.57246799999848,
-      "cpuTotalUs": 5118,
-      "timestamp": "2026-04-13T19:21:08.755Z"
+      "wallClockMs": 247.3171299999958,
+      "cpuTotalUs": 3583,
+      "timestamp": "2026-04-13T19:48:36.640Z"
     },
     "long-conversation-scrolling": {
       "wallClockMs": 210.18672699999297,
@@ -48,14 +48,14 @@
       "timestamp": "2026-04-13T18:32:14.226Z"
     },
     "long-conversation-terminal-scrolling": {
-      "wallClockMs": 210.52472200000193,
-      "cpuTotalUs": 8412,
-      "timestamp": "2026-04-13T19:21:21.244Z"
+      "wallClockMs": 207.54268199999933,
+      "cpuTotalUs": 6390,
+      "timestamp": "2026-04-13T19:48:49.110Z"
     },
     "long-conversation-alternate-scrolling": {
-      "wallClockMs": 208.16203599999426,
-      "cpuTotalUs": 9763,
-      "timestamp": "2026-04-13T19:21:34.123Z"
+      "wallClockMs": 207.34147999998822,
+      "cpuTotalUs": 6135,
+      "timestamp": "2026-04-13T19:49:01.577Z"
     }
   }
 }

From 7f648180fa119b91573c531165ddb4a50fa82135 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 13 Apr 2026 22:22:15 +0000
Subject: [PATCH 4/7] update test to use telemetry metrics

---
 .../core/src/telemetry/memory-monitor.test.ts |  15 +-
 packages/core/src/telemetry/memory-monitor.ts |   9 ++
 perf-tests/baselines.json                     |  39 +++---
 perf-tests/perf-usage.test.ts                 | 132 +++++++++++++++---
 4 files changed, 151 insertions(+), 44 deletions(-)

diff --git a/packages/core/src/telemetry/memory-monitor.test.ts b/packages/core/src/telemetry/memory-monitor.test.ts
index 8ad0d45595e..9cb0e91caa1 100644
--- a/packages/core/src/telemetry/memory-monitor.test.ts
+++ b/packages/core/src/telemetry/memory-monitor.test.ts
@@ -17,13 +17,18 @@ import {
   _resetGlobalMemoryMonitorForTests,
 } from './memory-monitor.js';
 import type { Config } from '../config/config.js';
-import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js';
+import {
+  recordMemoryUsage,
+  recordCpuUsage,
+  isPerformanceMonitoringActive,
+} from './metrics.js';
 import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
 import { RateLimiter } from './rate-limiter.js';
 
 // Mock dependencies
 vi.mock('./metrics.js', () => ({
   recordMemoryUsage: vi.fn(),
+  recordCpuUsage: vi.fn(),
   isPerformanceMonitoringActive: vi.fn(),
   MemoryMetricType: {
     HEAP_USED: 'heap_used',
@@ -50,6 +55,7 @@ vi.mock('node:process', () => ({
 }));
 
 const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage);
+const mockRecordCpuUsage = vi.mocked(recordCpuUsage);
 const mockIsPerformanceMonitoringActive = vi.mocked(
   isPerformanceMonitoringActive,
 );
@@ -192,6 +198,13 @@ describe('MemoryMonitor', () => {
             component: 'test_context',
           },
         );
+        expect(mockRecordCpuUsage).toHaveBeenCalledWith(
+          mockConfig,
+          expect.any(Number),
+          {
+            component: 'test_context',
+          },
+        );
       });
 
       it('should not record metrics when performance monitoring is inactive', () => {
diff --git a/packages/core/src/telemetry/memory-monitor.ts b/packages/core/src/telemetry/memory-monitor.ts
index e005bd73cca..aeaecc6ca08 100644
--- a/packages/core/src/telemetry/memory-monitor.ts
+++ b/packages/core/src/telemetry/memory-monitor.ts
@@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js';
 import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
 import {
   recordMemoryUsage,
+  recordCpuUsage,
   MemoryMetricType,
   isPerformanceMonitoringActive,
 } from './metrics.js';
@@ -37,6 +38,7 @@ export class MemoryMonitor {
   private intervalId: NodeJS.Timeout | null = null;
   private isRunning = false;
   private lastSnapshot: MemorySnapshot | null = null;
+  private lastCpuUsage: NodeJS.CpuUsage | null = null;
   private monitoringInterval: number = 10000;
   private highWaterMarkTracker: HighWaterMarkTracker;
   private rateLimiter: RateLimiter;
@@ -191,6 +193,13 @@ export class MemoryMonitor {
         memory_type: MemoryMetricType.RSS,
         component: context,
       });
+
+      // Record delta CPU usage (in microseconds)
+      const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined);
+      this.lastCpuUsage = process.cpuUsage();
+      recordCpuUsage(config, cpuUsage.user + cpuUsage.system, {
+        component: context,
+      });
     }
 
     this.lastSnapshot = snapshot;
diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 504395df254..2e5723c2bca 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-13T19:49:01.577Z",
+  "updatedAt": "2026-04-13T21:48:46.316Z",
   "scenarios": {
     "cold-startup-time": {
       "wallClockMs": 927.553249999999,
@@ -18,7 +18,7 @@
       "timestamp": "2026-04-08T22:28:23.290Z"
     },
     "high-volume-shell-output": {
-      "wallClockMs": 1119.9,
+       "wallClockMs": 1119.9,
       "cpuTotalUs": 2100,
       "timestamp": "2026-04-09T02:30:22.000Z"
     },
@@ -28,34 +28,29 @@
       "timestamp": "2026-04-11T01:52:23.183Z"
     },
     "long-conversation-resume": {
-      "wallClockMs": 2807.285507000008,
-      "cpuTotalUs": 21912,
-      "timestamp": "2026-04-13T19:48:10.905Z"
+      "wallClockMs": 3609.1588829999964,
+      "cpuTotalUs": 256378,
+      "timestamp": "2026-04-13T21:47:38.112Z"
     },
     "long-conversation-typing": {
-      "wallClockMs": 339.29195900000923,
-      "cpuTotalUs": 10295,
-      "timestamp": "2026-04-13T19:48:24.011Z"
+      "wallClockMs": 567.3885389999923,
+      "cpuTotalUs": 16572,
+      "timestamp": "2026-04-13T21:47:54.721Z"
     },
     "long-conversation-execution": {
-      "wallClockMs": 247.3171299999958,
-      "cpuTotalUs": 3583,
-      "timestamp": "2026-04-13T19:48:36.640Z"
-    },
-    "long-conversation-scrolling": {
-      "wallClockMs": 210.18672699999297,
-      "cpuTotalUs": 7911,
-      "timestamp": "2026-04-13T18:32:14.226Z"
+      "wallClockMs": 447.42054899998766,
+      "cpuTotalUs": 6902,
+      "timestamp": "2026-04-13T21:48:10.571Z"
     },
     "long-conversation-terminal-scrolling": {
-      "wallClockMs": 207.54268199999933,
-      "cpuTotalUs": 6390,
-      "timestamp": "2026-04-13T19:48:49.110Z"
+      "wallClockMs": 367.099886,
+      "cpuTotalUs": 12504142,
+      "timestamp": "2026-04-13T21:48:28.810Z"
     },
     "long-conversation-alternate-scrolling": {
-      "wallClockMs": 207.34147999998822,
-      "cpuTotalUs": 6135,
-      "timestamp": "2026-04-13T19:49:01.577Z"
+      "wallClockMs": 367.099886,
+      "cpuTotalUs": 12504142,
+      "timestamp": "2026-04-13T21:48:46.316Z"
     }
   }
 }
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index e02ae4e51ca..e7cd229e33b 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -5,7 +5,7 @@
  */
 
 import { describe, it, beforeAll, afterAll } from 'vitest';
-import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
+import { TestRig, PerfTestHarness, type PerfSnapshot } from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import {
@@ -418,22 +418,67 @@ describe('CPU Performance Tests', () => {
 
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+            env: { 
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
           });
 
           await run.expectText('type your message');
 
-          const snapshot = await harness.measureWithEventLoop(
-            'terminal-scrolling',
-            async () => {
-              for (let i = 0; i < 5; i++) {
-                await run.sendKeys('\u001b[5~'); // PageUp
-                await run.sendKeys('\u001b[6~'); // PageDown
-              }
-            },
-          );
+          // Start the interaction but do not wait for test runner overhead
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[5~'); // PageUp
+            await run.sendKeys('\u001b[6~'); // PageDown
+          }
 
+          await rig.waitForTelemetryReady();
           await run.kill();
+
+          const eventLoopMetric = rig.readMetric('event_loop.delay');
+          const cpuMetric = rig.readMetric('cpu.usage');
+
+          let p50Ms = 0;
+          let p95Ms = 0;
+          let maxMs = 0;
+          if (eventLoopMetric) {
+             const dataPoints = (eventLoopMetric as any).dataPoints || [];
+             const p50Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p50');
+             const p95Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p95');
+             const maxData = dataPoints.find((dp: any) => dp.attributes?.percentile === 'max');
+
+             if (p50Data) p50Ms = p50Data.value.sum;
+             if (p95Data) p95Ms = p95Data.value.sum;
+             if (maxData) maxMs = maxData.value.sum;
+          }
+
+          let cpuTotalUs = 0;
+          if (cpuMetric) {
+            const dataPoints = (cpuMetric as any).dataPoints || [];
+            for(const dp of dataPoints) {
+              if (dp.value?.sum > 0) {
+                cpuTotalUs += dp.value.sum;
+              }
+            }
+          }
+          const cpuUserUs = cpuTotalUs;
+          const cpuSystemUs = 0;
+
+          const snapshot: PerfSnapshot = {
+            timestamp: Date.now(),
+            label: 'scrolling',
+            wallClockMs: p50Ms,
+            cpuTotalUs,
+            cpuUserUs,
+            cpuSystemUs,
+            eventLoopDelayP50Ms: p50Ms,
+            eventLoopDelayP95Ms: p95Ms,
+            eventLoopDelayMaxMs: maxMs,
+          };
+
           return snapshot;
         },
       );
@@ -461,22 +506,67 @@ describe('CPU Performance Tests', () => {
 
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+            env: { 
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
           });
 
           await run.expectText('type your message');
 
-          const snapshot = await harness.measureWithEventLoop(
-            'alternate-scrolling',
-            async () => {
-              for (let i = 0; i < 5; i++) {
-                await run.sendKeys('\u001b[5~'); // PageUp
-                await run.sendKeys('\u001b[6~'); // PageDown
-              }
-            },
-          );
+          // Start the interaction but do not wait for test runner overhead
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[5~'); // PageUp
+            await run.sendKeys('\u001b[6~'); // PageDown
+          }
 
+          await rig.waitForTelemetryReady();
           await run.kill();
+
+          const eventLoopMetric = rig.readMetric('event_loop.delay');
+          const cpuMetric = rig.readMetric('cpu.usage');
+
+          let p50Ms = 0;
+          let p95Ms = 0;
+          let maxMs = 0;
+          if (eventLoopMetric) {
+             const dataPoints = (eventLoopMetric as any).dataPoints || [];
+             const p50Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p50');
+             const p95Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p95');
+             const maxData = dataPoints.find((dp: any) => dp.attributes?.percentile === 'max');
+
+             if (p50Data) p50Ms = p50Data.value.sum;
+             if (p95Data) p95Ms = p95Data.value.sum;
+             if (maxData) maxMs = maxData.value.sum;
+          }
+
+          let cpuTotalUs = 0;
+          if (cpuMetric) {
+            const dataPoints = (cpuMetric as any).dataPoints || [];
+            for(const dp of dataPoints) {
+              if (dp.value?.sum > 0) {
+                cpuTotalUs += dp.value.sum;
+              }
+            }
+          }
+          const cpuUserUs = cpuTotalUs;
+          const cpuSystemUs = 0;
+
+          const snapshot: PerfSnapshot = {
+            timestamp: Date.now(),
+            label: 'scrolling',
+            wallClockMs: p50Ms,
+            cpuTotalUs,
+            cpuUserUs,
+            cpuSystemUs,
+            eventLoopDelayP50Ms: p50Ms,
+            eventLoopDelayP95Ms: p95Ms,
+            eventLoopDelayMaxMs: maxMs,
+          };
+
           return snapshot;
         },
       );

From d7d72a0e6723a96b3c0098062fdbb528feac21ec Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 13 Apr 2026 22:51:30 +0000
Subject: [PATCH 5/7] improve lint

---
 packages/test-utils/src/test-rig.ts | 34 ++++++++--
 perf-tests/perf-usage.test.ts       | 98 +++++++++++++++++++----------
 2 files changed, 93 insertions(+), 39 deletions(-)

diff --git a/packages/test-utils/src/test-rig.ts b/packages/test-utils/src/test-rig.ts
index 734c1b95462..906a7760bf3 100644
--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@@ -193,6 +193,28 @@ export function checkModelOutputContent(
   return isValid;
 }
 
+export interface MetricDataPoint {
+  attributes?: Record<string, unknown>;
+  value?: {
+    sum?: number;
+    min?: number;
+    max?: number;
+    count?: number;
+  };
+  startTime?: [number, number];
+  endTime?: string;
+}
+
+export interface TelemetryMetric {
+  descriptor: {
+    name: string;
+    type?: string;
+    description?: string;
+    unit?: string;
+  };
+  dataPoints: MetricDataPoint[];
+}
+
 export interface ParsedLog {
   attributes?: {
     'event.name'?: string;
@@ -213,11 +235,7 @@ export interface ParsedLog {
     prompt_id?: string;
   };
   scopeMetrics?: {
-    metrics: {
-      descriptor: {
-        name: string;
-      };
-    }[];
+    metrics: TelemetryMetric[];
   }[];
 }
 
@@ -1297,6 +1315,10 @@ export class TestRig {
     return logs;
   }
 
+  readTelemetryLogs(): ParsedLog[] {
+    return this._readAndParseTelemetryLog();
+  }
+
   private _readAndParseTelemetryLog(): ParsedLog[] {
     // Telemetry is always written to the test directory
     const logFilePath = join(this.homeDir!, 'telemetry.log');
@@ -1450,7 +1472,7 @@ export class TestRig {
     );
   }
 
-  readMetric(metricName: string): Record<string, unknown> | null {
+  readMetric(metricName: string): TelemetryMetric | null {
     const logs = this._readAndParseTelemetryLog();
     for (const logData of logs) {
       if (logData.scopeMetrics) {
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index e7cd229e33b..42aa8dc516a 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -5,7 +5,11 @@
  */
 
 import { describe, it, beforeAll, afterAll } from 'vitest';
-import { TestRig, PerfTestHarness, type PerfSnapshot } from '@google/gemini-cli-test-utils';
+import {
+  TestRig,
+  PerfTestHarness,
+  type PerfSnapshot,
+} from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import {
@@ -221,8 +225,7 @@ describe('CPU Performance Tests', () => {
               JSON.stringify(toolLatencyMetric),
             );
           }
-          // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          const logs = (rig as any)._readAndParseTelemetryLog();
+          const logs = rig.readTelemetryLogs();
           console.log(`  Total telemetry log entries: ${logs.length}`);
           for (const logData of logs) {
             if (logData.scopeMetrics) {
@@ -247,10 +250,9 @@ describe('CPU Performance Tests', () => {
 
             const findValue = (percentile: string) => {
               const dp = eventLoopMetric.dataPoints.find(
-                // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                (p: any) => p.attributes.percentile === percentile,
+                (p) => p.attributes?.['percentile'] === percentile,
               );
-              return dp ? dp.value.min : undefined;
+              return dp?.value?.min;
             };
 
             snapshot.childEventLoopDelayP50Ms = findValue('p50');
@@ -327,7 +329,13 @@ describe('CPU Performance Tests', () => {
             async () => {
               const run = await rig.runInteractive({
                 args: ['--resume', 'latest'],
-                env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+                env: {
+                  GEMINI_API_KEY: 'fake-perf-test-key',
+                  GEMINI_TELEMETRY_ENABLED: 'true',
+                  GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+                  GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+                  DEBUG: 'true',
+                },
               });
               await run.kill();
             },
@@ -349,7 +357,13 @@ describe('CPU Performance Tests', () => {
         async () => {
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
           });
 
           const snapshot = await harness.measureWithEventLoop(
@@ -377,7 +391,13 @@ describe('CPU Performance Tests', () => {
         async () => {
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { GEMINI_API_KEY: 'fake-perf-test-key' },
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
           });
 
           await run.expectText('type your message');
@@ -418,7 +438,7 @@ describe('CPU Performance Tests', () => {
 
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { 
+            env: {
               GEMINI_API_KEY: 'fake-perf-test-key',
               GEMINI_TELEMETRY_ENABLED: 'true',
               GEMINI_MEMORY_MONITOR_INTERVAL: '500',
@@ -445,21 +465,27 @@ describe('CPU Performance Tests', () => {
           let p95Ms = 0;
           let maxMs = 0;
           if (eventLoopMetric) {
-             const dataPoints = (eventLoopMetric as any).dataPoints || [];
-             const p50Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p50');
-             const p95Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p95');
-             const maxData = dataPoints.find((dp: any) => dp.attributes?.percentile === 'max');
-
-             if (p50Data) p50Ms = p50Data.value.sum;
-             if (p95Data) p95Ms = p95Data.value.sum;
-             if (maxData) maxMs = maxData.value.sum;
+            const dataPoints = eventLoopMetric.dataPoints;
+            const p50Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p50',
+            );
+            const p95Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p95',
+            );
+            const maxData = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'max',
+            );
+
+            if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
+            if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
+            if (maxData?.value?.sum) maxMs = maxData.value.sum;
           }
 
           let cpuTotalUs = 0;
           if (cpuMetric) {
-            const dataPoints = (cpuMetric as any).dataPoints || [];
-            for(const dp of dataPoints) {
-              if (dp.value?.sum > 0) {
+            const dataPoints = cpuMetric.dataPoints;
+            for (const dp of dataPoints) {
+              if (dp.value?.sum && dp.value.sum > 0) {
                 cpuTotalUs += dp.value.sum;
               }
             }
@@ -506,7 +532,7 @@ describe('CPU Performance Tests', () => {
 
           const run = await rig.runInteractive({
             args: ['--resume', 'latest'],
-            env: { 
+            env: {
               GEMINI_API_KEY: 'fake-perf-test-key',
               GEMINI_TELEMETRY_ENABLED: 'true',
               GEMINI_MEMORY_MONITOR_INTERVAL: '500',
@@ -533,21 +559,27 @@ describe('CPU Performance Tests', () => {
           let p95Ms = 0;
           let maxMs = 0;
           if (eventLoopMetric) {
-             const dataPoints = (eventLoopMetric as any).dataPoints || [];
-             const p50Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p50');
-             const p95Data = dataPoints.find((dp: any) => dp.attributes?.percentile === 'p95');
-             const maxData = dataPoints.find((dp: any) => dp.attributes?.percentile === 'max');
-
-             if (p50Data) p50Ms = p50Data.value.sum;
-             if (p95Data) p95Ms = p95Data.value.sum;
-             if (maxData) maxMs = maxData.value.sum;
+            const dataPoints = eventLoopMetric.dataPoints;
+            const p50Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p50',
+            );
+            const p95Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p95',
+            );
+            const maxData = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'max',
+            );
+
+            if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
+            if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
+            if (maxData?.value?.sum) maxMs = maxData.value.sum;
           }
 
           let cpuTotalUs = 0;
           if (cpuMetric) {
-            const dataPoints = (cpuMetric as any).dataPoints || [];
-            for(const dp of dataPoints) {
-              if (dp.value?.sum > 0) {
+            const dataPoints = cpuMetric.dataPoints;
+            for (const dp of dataPoints) {
+              if (dp.value?.sum && dp.value.sum > 0) {
                 cpuTotalUs += dp.value.sum;
               }
             }

From 46b9de73ed1056c7be2290f9500f65859f7d54a2 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Mon, 13 Apr 2026 22:59:06 +0000
Subject: [PATCH 6/7] fix format

---
 perf-tests/baselines.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 2e5723c2bca..002ef21f18d 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -18,7 +18,7 @@
       "timestamp": "2026-04-08T22:28:23.290Z"
     },
     "high-volume-shell-output": {
-       "wallClockMs": 1119.9,
+      "wallClockMs": 1119.9,
       "cpuTotalUs": 2100,
       "timestamp": "2026-04-09T02:30:22.000Z"
     },

From 698b58be0c889879b828410f8cac42a655dba875 Mon Sep 17 00:00:00 2001
From: Cynthia Long <cynthialong@google.com>
Date: Tue, 14 Apr 2026 14:13:52 +0000
Subject: [PATCH 7/7] update test per comment

---
 packages/test-utils/src/perf-test-harness.ts |  4 +-
 perf-tests/baselines.json                    | 43 +++++++++-----------
 perf-tests/perf-usage.test.ts                | 33 +++++++++++----
 3 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/packages/test-utils/src/perf-test-harness.ts b/packages/test-utils/src/perf-test-harness.ts
index 2f376f58b6d..f0520ccecbd 100644
--- a/packages/test-utils/src/perf-test-harness.ts
+++ b/packages/test-utils/src/perf-test-harness.ts
@@ -147,7 +147,9 @@ export class PerfTestHarness {
       throw new Error(`No active timer found for label "${label}"`);
     }
 
-    const wallClockMs = performance.now() - timer.startTime;
+    // Round wall-clock time to nearest 0.1 ms
+    const wallClockMs =
+      Math.round((performance.now() - timer.startTime) * 10) / 10;
     const cpuDelta = process.cpuUsage(timer.startCpuUsage);
     this.activeTimers.delete(label);
 
diff --git a/perf-tests/baselines.json b/perf-tests/baselines.json
index 002ef21f18d..bb0e8fd754b 100644
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@@ -1,19 +1,19 @@
 {
   "version": 1,
-  "updatedAt": "2026-04-13T21:48:46.316Z",
+  "updatedAt": "2026-04-14T14:04:02.662Z",
   "scenarios": {
     "cold-startup-time": {
-      "wallClockMs": 927.553249999999,
+      "wallClockMs": 927.6,
       "cpuTotalUs": 1470,
       "timestamp": "2026-04-08T22:27:54.871Z"
     },
     "idle-cpu-usage": {
-      "wallClockMs": 5000.460750000002,
+      "wallClockMs": 5000.5,
       "cpuTotalUs": 12157,
       "timestamp": "2026-04-08T22:28:19.098Z"
     },
     "skill-loading-time": {
-      "wallClockMs": 930.0920409999962,
+      "wallClockMs": 930.1,
       "cpuTotalUs": 1323,
       "timestamp": "2026-04-08T22:28:23.290Z"
     },
@@ -22,35 +22,30 @@
       "cpuTotalUs": 2100,
       "timestamp": "2026-04-09T02:30:22.000Z"
     },
-    "long-conversation": {
-      "wallClockMs": 4199.5024319999975,
-      "cpuTotalUs": 292959,
-      "timestamp": "2026-04-11T01:52:23.183Z"
-    },
     "long-conversation-resume": {
-      "wallClockMs": 3609.1588829999964,
-      "cpuTotalUs": 256378,
-      "timestamp": "2026-04-13T21:47:38.112Z"
+      "wallClockMs": 4212.5,
+      "cpuTotalUs": 351393,
+      "timestamp": "2026-04-14T14:02:53.268Z"
     },
     "long-conversation-typing": {
-      "wallClockMs": 567.3885389999923,
-      "cpuTotalUs": 16572,
-      "timestamp": "2026-04-13T21:47:54.721Z"
+      "wallClockMs": 113.7,
+      "cpuTotalUs": 3304,
+      "timestamp": "2026-04-14T14:03:12.525Z"
     },
     "long-conversation-execution": {
-      "wallClockMs": 447.42054899998766,
-      "cpuTotalUs": 6902,
-      "timestamp": "2026-04-13T21:48:10.571Z"
+      "wallClockMs": 248.7,
+      "cpuTotalUs": 3825,
+      "timestamp": "2026-04-14T14:03:28.575Z"
     },
     "long-conversation-terminal-scrolling": {
-      "wallClockMs": 367.099886,
-      "cpuTotalUs": 12504142,
-      "timestamp": "2026-04-13T21:48:28.810Z"
+      "wallClockMs": 362.4,
+      "cpuTotalUs": 12755860,
+      "timestamp": "2026-04-14T14:03:45.687Z"
     },
     "long-conversation-alternate-scrolling": {
-      "wallClockMs": 367.099886,
-      "cpuTotalUs": 12504142,
-      "timestamp": "2026-04-13T21:48:46.316Z"
+      "wallClockMs": 362.4,
+      "cpuTotalUs": 12755860,
+      "timestamp": "2026-04-14T14:04:02.662Z"
     }
   }
 }
diff --git a/perf-tests/perf-usage.test.ts b/perf-tests/perf-usage.test.ts
index 42aa8dc516a..697a038affb 100644
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@@ -369,7 +369,10 @@ describe('CPU Performance Tests', () => {
           const snapshot = await harness.measureWithEventLoop(
             'typing',
             async () => {
-              await run.type('Hello');
+              // On average, the expected latency per key is under 30ms.
+              for (const char of 'Hello') {
+                await run.type(char);
+              }
             },
           );
 
@@ -400,7 +403,7 @@ describe('CPU Performance Tests', () => {
             },
           });
 
-          await run.expectText('type your message');
+          await run.expectText('Type your message');
 
           const snapshot = await harness.measureWithEventLoop(
             'execution',
@@ -447,11 +450,18 @@ describe('CPU Performance Tests', () => {
             },
           });
 
-          await run.expectText('type your message');
+          await run.expectText('Type your message');
 
-          // Start the interaction but do not wait for test runner overhead
           for (let i = 0; i < 5; i++) {
             await run.sendKeys('\u001b[5~'); // PageUp
+          }
+
+          // Scroll to the very top
+          await run.sendKeys('\u001b[H'); // Home
+          // Verify top line of chat is visible.
+          await run.expectText('Authenticated with');
+
+          for (let i = 0; i < 5; i++) {
             await run.sendKeys('\u001b[6~'); // PageDown
           }
 
@@ -496,7 +506,7 @@ describe('CPU Performance Tests', () => {
           const snapshot: PerfSnapshot = {
             timestamp: Date.now(),
             label: 'scrolling',
-            wallClockMs: p50Ms,
+            wallClockMs: Math.round(p50Ms * 10) / 10,
             cpuTotalUs,
             cpuUserUs,
             cpuSystemUs,
@@ -541,11 +551,18 @@ describe('CPU Performance Tests', () => {
             },
           });
 
-          await run.expectText('type your message');
+          await run.expectText('Type your message');
 
-          // Start the interaction but do not wait for test runner overhead
           for (let i = 0; i < 5; i++) {
             await run.sendKeys('\u001b[5~'); // PageUp
+          }
+
+          // Scroll to the very top
+          await run.sendKeys('\u001b[H'); // Home
+          // Verify top line of chat is visible.
+          await run.expectText('Authenticated with');
+
+          for (let i = 0; i < 5; i++) {
             await run.sendKeys('\u001b[6~'); // PageDown
           }
 
@@ -590,7 +607,7 @@ describe('CPU Performance Tests', () => {
           const snapshot: PerfSnapshot = {
             timestamp: Date.now(),
             label: 'scrolling',
-            wallClockMs: p50Ms,
+            wallClockMs: Math.round(p50Ms * 10) / 10,
             cpuTotalUs,
             cpuUserUs,
             cpuSystemUs,