diff --git a/.env.example b/.env.example
index f1c207c1..77ca0f3a 100644
--- a/.env.example
+++ b/.env.example
@@ -98,6 +98,8 @@
 # AGENTMEMORY_GRAPH_WEIGHT=0.2                   # Graph traversal bonus on smart-search ranking
 # TOKEN_BUDGET=2000                              # Max tokens injected via mem::context per session
 # MAX_OBS_PER_SESSION=500                        # Per-session observation cap before consolidation kicks in
+# SUMMARIZE_CHUNK_SIZE=400                       # When mem::summarize sees a session larger than this, it chunks observations and map-reduces (chunk-summarize → reduce-merge) to stay within the LLM's context window. Default 400 ≈ 50k tokens per chunk at ~110 tok/obs. Native sessions are capped by MAX_OBS_PER_SESSION; chunking primarily matters for bulk-imported jsonl sessions, which bypass that cap.
+# SUMMARIZE_CHUNK_CONCURRENCY=6                  # Parallel chunk LLM calls during chunked summarize. Default 6 fits ~100-chunk sessions under iii's 180s function-invocation timeout at typical ~8s/call. High-throughput providers (Novita, DeepInfra, DeepSeek) commonly allow 100+ concurrent — bump this for very large imported sessions.
 
 # -----------------------------------------------------------------------------
 # 5. Behaviour flags
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..a2f5e0c5
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: [rohitg00]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 41c99434..b9671280 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,19 +1,62 @@
 name: CI
 
+# `paths-ignore` keeps doc-only / website / README / CHANGELOG churn from
+# burning runner minutes. Source / config / workflow changes always run.
+# `workflow_dispatch` gives a manual re-run button for flake debugging.
 on:
   push:
     branches: [main]
+    paths-ignore:
+      - "README.md"
+      - "CHANGELOG.md"
+      - "AGENTS.md"
+      - "ROADMAP.md"
+      - "website/**"
+      - "docs/**"
+      - "assets/**"
+      - "deploy/**/README.md"
+      - "**/*.md"
+      - "**/*.mdx"
   pull_request:
     branches: [main]
+    paths-ignore:
+      - "README.md"
+      - "CHANGELOG.md"
+      - "AGENTS.md"
+      - "ROADMAP.md"
+      - "website/**"
+      - "docs/**"
+      - "assets/**"
+      - "deploy/**/README.md"
+      - "**/*.md"
+      - "**/*.mdx"
+  workflow_dispatch:
+
+# Cancel in-flight PR runs when a force-push lands. Keep push runs to
+# protect against partial state on main.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
+      # Don't bail the whole matrix on one cell's failure — we want to
+      # see whether the same failure reproduces across OSes (e.g.
+      # whether a flake is platform-specific or universal).
+      fail-fast: false
       matrix:
+        # Windows held back: test/obsidian-export.test.ts has hardcoded
+        # POSIX paths (`/tmp/...`) that fail on D:\ drive runners.
+        # src/functions/obsidian-export.ts needs os.tmpdir() + path.join
+        # rework before Windows can be added back. Tracked as follow-up.
+        os: [ubuntu-latest, macos-latest]
         node-version: [20, 22]
     steps:
       - uses: actions/checkout@v6
+        with:
+          persist-credentials: false
       - uses: actions/setup-node@v6
         with:
           node-version: ${{ matrix.node-version }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 62dc8925..00003399 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -10,15 +10,25 @@ on:
         required: false
         default: "agentmemory,mcp,fs-watcher"
 
+# Workflow-level permissions stay minimal — only `contents: read`
+# is required to check out the repo. `id-token: write` is granted on
+# the publish job for npm's --provenance Sigstore OIDC mint.
 permissions:
   contents: read
-  id-token: write
 
 jobs:
   publish:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
     steps:
       - uses: actions/checkout@v6
+        with:
+          # Don't persist the GITHUB_TOKEN to .git/config — the
+          # publish steps don't push back to the repo, so the token
+          # only needs to live in memory for this checkout.
+          persist-credentials: false
 
       - uses: actions/setup-node@v6
         with:
diff --git a/.gitignore b/.gitignore
index 9a9260b8..ba6af995 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,10 @@ dist/
 plugin/scripts/*.map
 plugin/scripts/*.d.mts
 data/
+!eval/data/
+!eval/data/**
+data-*/
+agentmemory-debug/
 .gstack/
 
 # Lock files — never commit (see feedback_no_lockfiles memory)
@@ -20,3 +24,8 @@ package-lock.json
 pnpm-lock.yaml
 yarn.lock
 integrations/hermes/__pycache__/
+
+# Eval reports (transient; published scorecards live in docs/benchmarks/)
+eval/reports/
+# LongMemEval download is 278MB; fetched on demand
+eval/data/longmemeval/
diff --git a/AGENTS.md b/AGENTS.md
index ebcf3584..24e74245 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -19,6 +19,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng
 5. `test/mcp-standalone.test.ts` — tool count assertion
 6. `README.md` — tool counts (search for "MCP tools")
 7. `plugin/.claude-plugin/plugin.json` — tool count in description
+8. `plugin/plugin.json` and `plugin/.mcp.copilot.json` (when present) — tool count or MCP exposure
 
 **When adding REST endpoints, you MUST update:**
 1. `src/triggers/api.ts` — endpoint registration
@@ -32,6 +33,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng
 4. `src/functions/export-import.ts` — supportedVersions set
 5. `test/export-import.test.ts` — version assertion
 6. `plugin/.claude-plugin/plugin.json` — version field
+7. `plugin/plugin.json` (when present) — version field
 
 **When adding new KV scopes:**
 1. `src/state/schema.ts` — add to the KV object
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c73c185..0188e05a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,48 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 ## [Unreleased]
 
+## [0.9.21] — 2026-05-19
+
+Quality + integration wave. Headline: native OpenCode plugin with full Claude Code hook parity ([#237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer)). Ten more PRs alongside: `memory_recall` returning the wrong shape, env-file `AGENTMEMORY_DROP_STALE_INDEX` silently ignored, hook scripts crashing on Windows usernames with spaces, viewer search inputs interrupting CJK IME composition, large sessions silently failing at the LLM context limit, lessons invisible to smart-search, Hermes plugin manifest missing hooks, cli onboarding crashing in non-TTY contexts, rebuildIndex blocking boot on large corpora, 25h embed-loop bottleneck during rebuild, and the v0.9.19 iii-console installer workaround can come out now that upstream is fixed.
+
+### Added
+
+- **OpenCode plugin with 22 auto-capture hooks** ([PR #237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer), closes [#236](https://github.com/rohitg00/agentmemory/issues/236) + [#244](https://github.com/rohitg00/agentmemory/issues/244)). Complete OpenCode plugin in `plugin/opencode/` matching Claude Code hook parity. Covers session lifecycle (8 hooks), messages (3), tool lifecycle (2), part tracking, permissions, task tracking, plus a two-layer enrichment pipeline (memory context on first turn, file enrichment on subsequent turns) and two slash commands (`/recall`, `/remember`). Full gap analysis in `plugin/opencode/README.md`.
+
+### Fixed
+
+- **`memory_recall` endpoint + format/token_budget forwarding** ([PR #516](https://github.com/rohitg00/agentmemory/pull/516) by [@serhiizghama](https://github.com/serhiizghama), closes [#507](https://github.com/rohitg00/agentmemory/issues/507) + [#440](https://github.com/rohitg00/agentmemory/issues/440)). MCP `memory_recall` always returned compact mode and dropped `format` + `token_budget` params. Two root causes fixed: standalone shim routed through `/agentmemory/smart-search` instead of `/agentmemory/search`, and the local-fallback path didn't read either param. Now routes correctly, forwards both params end-to-end, defaults `format` to `"full"` matching the MCP schema.
+
+- **env-file `AGENTMEMORY_DROP_STALE_INDEX` flag now honored** ([PR #461](https://github.com/rohitg00/agentmemory/pull/461) by [@honor2030](https://github.com/honor2030), closes [#456](https://github.com/rohitg00/agentmemory/issues/456)). Setting the flag in `~/.agentmemory/.env` was silently ignored because the boot path read `process.env` directly. New `isDropStaleIndexEnabled()` helper reads merged env. Combined with [#455](https://github.com/rohitg00/agentmemory/issues/455) + [#469](https://github.com/rohitg00/agentmemory/issues/469) reports, this is the unblock path for the stale-index server-crash recovery loop.
+
+- **Windows hook scripts quote plugin paths correctly** ([PR #487](https://github.com/rohitg00/agentmemory/pull/487) by [@honor2030](https://github.com/honor2030), closes [#477](https://github.com/rohitg00/agentmemory/issues/477)). Hook command strings referenced `${CLAUDE_PLUGIN_ROOT}/scripts/*.mjs` without quotes — Windows users with spaces in their username had every hook crash. Quotes added + regression test.
+
+- **Viewer search inputs honor IME composition** ([PR #517](https://github.com/rohitg00/agentmemory/pull/517) by [@jonathanzhan1975](https://github.com/jonathanzhan1975)). CJK users typing in the viewer's search inputs hit mid-character interruption — every keystroke fired the `oninput=` re-render handler, breaking IME composition mid-syllable. New `bindImeSafeSearch` helper defers re-render until `compositionend`.
+
+- **Chunk large sessions to fit LLM context window** ([PR #472](https://github.com/rohitg00/agentmemory/pull/472) by [@efenex](https://github.com/efenex)). Sessions with >7000 observations silently failed at the LLM provider's context limit — the consolidation pipeline silently skipped the session. New chunking splits oversized sessions across multiple compress calls + restitches the narrative via a `REDUCE_SYSTEM` prompt. Legacy single-call path preserved when obs count is under the chunk size. Backfill script under `scripts/` for users hitting the pre-fix bug.
+
+- **Surface lessons in smart-search + diagnose tally** ([PR #473](https://github.com/rohitg00/agentmemory/pull/473) by [@efenex](https://github.com/efenex)). Closes the lesson round-trip with [#458](https://github.com/rohitg00/agentmemory/pull/458) (lessons auto-injected into `mem::context`): lessons are now also returned alongside hybrid search results in a separate `lessons` field on `smart-search`, and the `diagnose` health surface tallies per-store counts so the trust-shock pattern (save succeeds, recall empty, diagnose says 0) goes away.
+
+- **Declare all Hermes plugin hooks** ([PR #486](https://github.com/rohitg00/agentmemory/pull/486) by [@honor2030](https://github.com/honor2030)). The Hermes `plugin.yaml` manifest only declared 3 of the 6 implemented hooks. All 6 now declared (`prefetch`, `sync_turn`, `on_session_end`, `on_pre_compress`, `on_memory_write`, `system_prompt_block`).
+
+- **`rebuildIndex` non-blocking on boot** ([PR #500](https://github.com/rohitg00/agentmemory/pull/500) by [@efenex](https://github.com/efenex)). Boot path previously `await`-ed `rebuildIndex(kv)`, so the viewer + later boot steps stalled — on large corpora this was 25h+ of blocked startup. Replaced with `void rebuildIndex(kv).then(...).catch(...)` so the rebuild runs in the background.
+
+- **Batched embed calls in `rebuildIndex` (25h → 3h on large corpora)** ([PR #504](https://github.com/rohitg00/agentmemory/pull/504) by [@efenex](https://github.com/efenex)). The rebuild loop made one embed call per observation, paying full HTTP RTT per item. New `vectorIndexAddBatchGuarded` helper batches embeds (default 32, configurable via `REBUILD_EMBED_BATCH_SIZE`) and try/catches per-item failures. Measured 25h → 3h on a 250k-observation corpus.
+
+- **CLI skips onboarding prompts without a tty** ([PR #491](https://github.com/rohitg00/agentmemory/pull/491) by [@honor2030](https://github.com/honor2030)). Onboarding prompts crashed in non-interactive contexts (CI, `docker run -d`, piped input). New guard short-circuits with sensible defaults when stdin/stdout aren't TTYs or `CI=1`.
+
+### Changed
+
+- **Drop iii-console installer `--next` workaround** ([PR #546](https://github.com/rohitg00/agentmemory/pull/546)). v0.9.19 routed first-run iii-console install through `bash -s -- --next` to dodge an upstream tag-prefix bug at [iii-hq/iii#1652](https://github.com/iii-hq/iii/issues/1652). Upstream [iii-hq/iii#1660](https://github.com/iii-hq/iii/pull/1660) shipped 2026-05-19; `install.iii.dev/console/main/install.sh` is a CDN proxy serving upstream main HEAD so the fix is live without an iii release tag. Reverted to canonical bare `curl ... | sh`.
+
+### Infrastructure
+
+- 95 test files (was 92), **1067 tests pass** (was 1038) on `chore(release): v0.9.21`.
+- Bundles 11 PRs: 1 contributor feature + 9 bug fixes across MCP / hooks / viewer / summarize / lessons / Hermes / rebuildIndex / CLI + 1 upstream-installer revert.
+- New contributors landing first PRs this release: [@cl0ckt0wer](https://github.com/cl0ckt0wer), [@serhiizghama](https://github.com/serhiizghama), [@jonathanzhan1975](https://github.com/jonathanzhan1975).
+
+[0.9.21]: https://github.com/rohitg00/agentmemory/compare/v0.9.20...v0.9.21
+
 ## [0.9.20] — 2026-05-18
 
 Hotfix: revert the Codex Stop → session-end chain shipped in v0.9.19.
diff --git a/README.md b/README.md
index ef840011..fc6300fb 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
     Your coding agent remembers everything. No more re-explaining.
     Built on <a href="https://github.com/iii-hq/iii">iii engine</a>
   </strong><br/>
-  Persistent memory for Claude Code, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client.
+  Persistent memory for Claude Code, GitHub Copilot CLI, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client.
 </p>
 
 <p align="center">
@@ -34,6 +34,7 @@
 
 <p align="center">
   <a href="https://www.npmjs.com/package/@agentmemory/agentmemory"><img src="https://img.shields.io/npm/v/@agentmemory/agentmemory?color=CB3837&label=npm&style=for-the-badge&logo=npm" alt="npm version" /></a>
+  <a href="https://www.npmjs.com/package/@agentmemory/agentmemory"><img src="https://img.shields.io/npm/dm/@agentmemory/agentmemory?color=CB3837&label=downloads&style=for-the-badge&logo=npm" alt="npm downloads" /></a>
   <a href="https://github.com/rohitg00/agentmemory/actions"><img src="https://img.shields.io/github/actions/workflow/status/rohitg00/agentmemory/ci.yml?label=tests&style=for-the-badge&logo=github" alt="CI" /></a>
   <a href="https://github.com/rohitg00/agentmemory/blob/main/LICENSE"><img src="https://img.shields.io/github/license/rohitg00/agentmemory?color=blue&style=for-the-badge" alt="License" /></a>
   <a href="https://github.com/rohitg00/agentmemory/stargazers"><img src="https://img.shields.io/github/stars/rohitg00/agentmemory?style=for-the-badge&color=yellow&logo=github" alt="Stars" /></a>
@@ -72,10 +73,12 @@
 ## Install
 
 ```bash
-npm install -g @agentmemory/agentmemory     # once — bare `agentmemory` on PATH
-agentmemory                                  # start the memory server on :3111
-agentmemory demo                             # seed sample sessions + prove recall
-agentmemory connect claude-code              # wire your agent (also: codex, cursor, gemini-cli, ...)
+npm install -g @agentmemory/agentmemory          # once — bare `agentmemory` on PATH
+# If you hit EACCES on macOS/Linux system Node installs, retry with:
+# sudo npm install -g @agentmemory/agentmemory
+agentmemory                                      # start the memory server on :3111
+agentmemory demo                                 # seed sample sessions + prove recall
+agentmemory connect claude-code                  # wire your agent (also: copilot-cli, codex, cursor, gemini-cli, ...)
 ```
 
 Or via `npx` (no install):
@@ -107,6 +110,11 @@ agentmemory works with any agent that supports hooks, MCP, or REST API. All agen
 <sub>native plugin + 6 hooks + MCP</sub>
 </td>
 <td align="center" width="12.5%">
+<a href="https://github.com/features/copilot"><img src="https://github.githubassets.com/images/modules/site/copilot/copilot.png" alt="GitHub Copilot CLI" width="48" height="48" /></a><br/>
+<strong>GitHub Copilot CLI</strong><br/>
+<sub>MCP + plugin hooks/skills</sub>
+</td>
+<td align="center" width="12.5%">
 <a href="integrations/openclaw/"><img src="https://github.com/openclaw.png?size=120" alt="OpenClaw" width="48" height="48" /></a><br/>
 <strong>OpenClaw</strong><br/>
 <sub>native plugin + MCP</sub>
@@ -207,6 +215,15 @@ npx @agentmemory/agentmemory
 
 ### Retrieval Accuracy
 
+**coding-agent-life-v1** (in-house corpus, sandbox-reproducible)
+
+| Adapter | P@5 | R@5 | Top-5 hit rate | p50 latency |
+|---|---|---|---|---|
+| **agentmemory hybrid** | **0.578** | **0.967** | **15 / 15** | 14 ms |
+| grep baseline | 0.267 | 0.967 | 15 / 15 | 0 ms |
+
+100% top-5 hit rate. **2.2×** better precision than the grep baseline on identical input. Full per-type breakdown: [`docs/benchmarks/2026-05-20-coding-agent-life-v1.md`](docs/benchmarks/2026-05-20-coding-agent-life-v1.md).
+
 **LongMemEval-S** (ICLR 2025, 500 questions)
 
 | System | R@5 | R@10 | MRR |
@@ -232,6 +249,8 @@ npx @agentmemory/agentmemory
 
 > Embedding model: `all-MiniLM-L6-v2` (local, free, no API key). Full reports: [`benchmark/LONGMEMEVAL.md`](benchmark/LONGMEMEVAL.md), [`benchmark/QUALITY.md`](benchmark/QUALITY.md), [`benchmark/SCALE.md`](benchmark/SCALE.md). Competitor comparison: [`benchmark/COMPARISON.md`](benchmark/COMPARISON.md) — agentmemory vs mem0, Letta, Khoj, claude-mem, Hippo.
 
+**Reproduce locally:** [`eval/README.md`](eval/README.md) — adapter-pluggable harness for LongMemEval `_s` (public 500-Q) + `coding-agent-life-v1` (in-house 15-session corpus). Grep / vector / agentmemory adapters score side-by-side, NDJSON output, published scorecards land in [`docs/benchmarks/`](docs/benchmarks/).
+
 ---
 
 <h2 id="vs-competitors"><picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/section-competitors.svg"><img src="assets/tags/section-competitors.svg" alt="vs Competitors" height="32" /></picture></h2>
@@ -349,6 +368,8 @@ Open `http://localhost:3113` to watch the memory build live.
 
 ```bash
 npm install -g @agentmemory/agentmemory
+# If you hit EACCES on macOS/Linux system Node installs, retry with:
+# sudo npm install -g @agentmemory/agentmemory
 agentmemory                    # start the server (same as the npx form)
 agentmemory stop               # tear it down
 agentmemory remove             # uninstall everything we created
@@ -418,6 +439,30 @@ The Codex plugin ships from the same `plugin/` directory as the Claude Code plug
 
 Codex's hook engine injects `CLAUDE_PLUGIN_ROOT` into hook subprocesses (per [`codex-rs/hooks/src/engine/discovery.rs`](https://github.com/openai/codex/blob/main/codex-rs/hooks/src/engine/discovery.rs)), so the same hook scripts work across both hosts without duplication. Subagent / SessionEnd / Notification / TaskCompleted / PostToolUseFailure events are Claude-Code-only and are not registered for Codex.
 
+#### Codex Desktop: plugin hooks currently silent (workaround available)
+
+`CodexHooks` and `PluginHooks` are both stable + default-enabled in [`codex-rs/features/src/lib.rs`](https://github.com/openai/codex/blob/main/codex-rs/features/src/lib.rs), but Codex Desktop builds currently do not dispatch plugin-local `hooks.json` ([openai/codex#16430](https://github.com/openai/codex/issues/16430)). MCP tools still work; only the lifecycle observations are missing.
+
+Until upstream lands the fix, mirror the same hook commands into the global `~/.codex/hooks.json`:
+
+```bash
+agentmemory connect codex --with-hooks
+```
+
+This adds an idempotent block to `~/.codex/hooks.json` referencing absolute paths to the bundled scripts (no `${CLAUDE_PLUGIN_ROOT}` expansion needed at user-scope). Re-run the same command after upgrading agentmemory to refresh paths. User entries in the same file are preserved; only previous agentmemory entries are replaced.
+
+### GitHub Copilot CLI
+
+```bash
+# MCP-only wiring
+agentmemory connect copilot-cli
+
+# Full hooks/skills plugin from the GitHub subdir
+copilot plugin install rohitg00/agentmemory:plugin
+```
+
+`agentmemory connect copilot-cli` merges `mcpServers.agentmemory` into `~/.copilot/mcp-config.json` (or `$COPILOT_HOME/mcp-config.json` when `COPILOT_HOME` is set) and preserves existing servers. This adapter is Windows-safe even though other `connect` adapters still require manual Windows setup. Copilot picks up the MCP server on next launch or after `/mcp`. Install the plugin as well when you want the full hook/skill experience.
+
 <details>
 <summary><b>OpenClaw (paste this prompt)</b></summary>
 
@@ -490,9 +535,11 @@ The agentmemory entry is the **same MCP server block** across every host that us
 | **Cline / Roo Code / Kilo Code** | Cline MCP settings (Settings UI → MCP Servers → Edit) | Same `mcpServers` block. |
 | **Windsurf** | `~/.codeium/windsurf/mcp_config.json` | Same `mcpServers` block. |
 | **Gemini CLI** | `~/.gemini/settings.json` | `gemini mcp add agentmemory npx -y @agentmemory/mcp --scope user` (auto-merges). |
+| **GitHub Copilot CLI (MCP only)** | `~/.copilot/mcp-config.json` | `agentmemory connect copilot-cli` merges `mcpServers.agentmemory`; Copilot picks it up on next launch or `/mcp`. |
+| **GitHub Copilot CLI (full plugin)** | Copilot plugin install | `copilot plugin install rohitg00/agentmemory:plugin` for the plugin from the GitHub subdir. |
 | **OpenClaw** | OpenClaw MCP config | Same `mcpServers` block, or use the deeper [memory plugin](integrations/openclaw/). |
 | **Codex CLI (MCP only)** | `.codex/config.toml` | TOML shape: `codex mcp add agentmemory -- npx -y @agentmemory/mcp`, or add `[mcp_servers.agentmemory]` manually. |
-| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. |
+| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. On Codex Desktop, also run `agentmemory connect codex --with-hooks` until [openai/codex#16430](https://github.com/openai/codex/issues/16430) lands — plugin hooks are currently silent there. |
 | **OpenCode (MCP only)** | `opencode.json` | Different shape — top-level `mcp` key, command as array: `{"mcp": {"agentmemory": {"type": "local", "command": ["npx", "-y", "@agentmemory/mcp"], "enabled": true}}}`. |
 | **OpenCode (full plugin)** | `plugin/opencode/` | 22 auto-capture hooks covering session lifecycle, messages, tools, errors. Two slash commands (`/recall`, `/remember`). Copy `plugin/opencode/` into your OpenCode workspace and add the plugin entry to `opencode.json`. See [`plugin/opencode/README.md`](plugin/opencode/README.md) for the full hook table + gap analysis. |
 | **pi** | `~/.pi/agent/extensions/agentmemory` | Copy [`integrations/pi`](integrations/pi/) and restart pi. |
@@ -1035,7 +1082,7 @@ Full registry: [workers.iii.dev](https://workers.iii.dev). Every worker there co
 
 ### LLM Providers
 
-agentmemory auto-detects from your environment. No API key needed if you have a Claude subscription.
+agentmemory auto-detects from your environment. By default, no LLM calls are made unless you configure a provider or explicitly opt in to the Claude subscription fallback.
 
 | Provider | Config | Notes |
 |----------|--------|-------|
@@ -1046,6 +1093,33 @@ agentmemory auto-detects from your environment. No API key needed if you have a
 | OpenRouter | `OPENROUTER_API_KEY` | Any model |
 | Claude subscription fallback | `AGENTMEMORY_ALLOW_AGENT_SDK=true` | Opt-in only. Spawns `@anthropic-ai/claude-agent-sdk` sessions — used to cause unbounded Stop-hook recursion (#149 follow-up) so it is no longer the default. |
 
+### Config File
+
+Put agentmemory runtime configuration in `~/.agentmemory/.env` instead of exporting variables in every shell. If the viewer shows a setup hint like `export ANTHROPIC_API_KEY=...`, copy it into this file as `ANTHROPIC_API_KEY=...` without the `export` prefix, then restart agentmemory.
+
+Process environment variables still work and take precedence over values in the file.
+
+On Windows, the same file lives at `%USERPROFILE%\.agentmemory\.env`:
+
+```powershell
+New-Item -ItemType Directory -Force $HOME\.agentmemory
+notepad $HOME\.agentmemory\.env
+```
+
+To test with a Claude Code Pro/Max subscription instead of an API key, opt in explicitly:
+
+```env
+AGENTMEMORY_ALLOW_AGENT_SDK=true
+AGENTMEMORY_AUTO_COMPRESS=true
+```
+
+Turn on graph or consolidation features in the same file if you want them:
+
+```env
+GRAPH_EXTRACTION_ENABLED=true
+CONSOLIDATION_ENABLED=true
+```
+
 ### Environment Variables
 
 Create `~/.agentmemory/.env`:
diff --git a/docs/benchmarks/2026-05-20-coding-agent-life-v1.md b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md
new file mode 100644
index 00000000..f280b27d
--- /dev/null
+++ b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md
@@ -0,0 +1,76 @@
+# 2026-05-20 — coding-agent-life-v1 (v0.9.21)
+
+**Commit:** `e9dc710`
+**Bench:** coding-agent-life-v1 (15 sessions, 15 queries)
+**N:** 15
+**K:** 5
+**Hardware:** macOS 15 (Apple Silicon)
+**agentmemory:** v0.9.21
+**iii-engine:** v0.11.2
+**Embedding provider:** local default
+**Sandbox:** isolated data dir at `/tmp/agentmemory-eval-sandbox/`, ports 3411/3412
+
+## Headline
+
+`agentmemory-hybrid` hits **100% top-5 hit rate**, R@5 = **0.967**, P@5 = **0.578**.
+
+Same corpus, grep baseline: R@5 = 0.967, P@5 = 0.267 — same recall, but **2.2× worse precision**. Hybrid's top-5 is mostly gold; grep's top-5 is half noise.
+
+## Per-adapter
+
+| Adapter | P@5 | R@5 | Hit rate | p50 latency |
+|---|---|---|---|---|
+| grep (tokenized substring) | 0.267 | 0.967 | 15 / 15 | 0 ms |
+| `agentmemory-hybrid` | **0.578** | **0.967** | **15 / 15** | 14 ms |
+
+`agentmemory-hybrid` runs through the production smart-search endpoint (`POST /agentmemory/smart-search`) so it exercises the full BM25 + embedding + reranker stack.
+
+## Per-question-type
+
+P@5, grep vs `agentmemory-hybrid`:
+
+| Type | grep | hybrid | hybrid lift |
+|---|---|---|---|
+| single-session-bug | 0.20 | 0.33 | 1.7× |
+| single-session-infra (n=2) | 0.20 | 0.50 | 2.5× |
+| single-session-refactor | 0.20 | 0.50 | 2.5× |
+| single-session-feature | 0.50 | 0.50 | tie |
+| single-session-test | 0.20 | 0.33 | 1.7× |
+| single-session-perf | 0.20 | 0.50 | 2.5× |
+| single-session-api | 0.20 | 0.50 | 2.5× |
+| single-session-db | 0.20 | 0.50 | 2.5× |
+| single-session-release | 0.20 | 0.33 | 1.7× |
+| multi-session-causal | 0.40 | 0.40 | tie |
+| preference (n=2) | 0.20 | 0.42 | 2.1× |
+| multi-session-review | 0.40 | 0.67 | 1.7× |
+| temporal (R@5 = 0.50 grep / 1.00 hybrid) | 0.50 | 0.67 | 1.3× |
+
+Temporal queries (`What was shipped on April 8th 2026?`) need both gold sessions to score full recall. grep finds 1/2; hybrid finds 2/2.
+
+## Methodology
+
+- 15 fictional Claude Code sessions across a 10-day stretch of a Rust CLI project (`shipctl`) — bug fixes, refactors, infra, perf, schema migrations, preferences, post-mortem
+- 15 hand-graded queries with `goldSessionIds[]` covering single-session, multi-session causal, multi-session review, preference, temporal
+- Each session ingested via `POST /agentmemory/remember` with `type=eval-session` and `concepts=[session_id]`
+- Each query hits `POST /agentmemory/smart-search` with `limit=50`; dedupe by session ID; truncate to K=5
+- No LLM in the retrieval loop
+- Sandbox: clean `~/.agentmemory` via `HOME` override + alt ports (3411/3412) so no cross-contamination from a user's real store
+
+## Reproduce
+
+```sh
+git checkout e9dc710
+npm install --legacy-peer-deps
+npm run build
+
+source eval/scripts/sandbox.sh
+npm run eval:coding-life -- --adapters grep,agentmemory
+```
+
+Outputs land in `eval/reports/coding-life/`: `scores.ndjson` (per-query rows) and `summary.json` (per-adapter and per-type aggregates).
+
+## Notes
+
+- The single-session-feature tie (`Which PR introduced helm chart support?`) is interesting: query says `PR introduced helm chart` and gold session has `helm chart` literally — grep wins on lexical exactness, hybrid matches but doesn't outperform.
+- The corpus is intentionally small for fast iteration. Hardening targets: paraphrased queries, synonym substitution, in-corpus distractors with shared keywords, longer multi-session chains.
+- Vector adapter not measured here — requires `OPENAI_API_KEY`; will be added in a follow-up scorecard alongside LongMemEval `_s`.
diff --git a/docs/benchmarks/TEMPLATE.md b/docs/benchmarks/TEMPLATE.md
new file mode 100644
index 00000000..b830e24e
--- /dev/null
+++ b/docs/benchmarks/TEMPLATE.md
@@ -0,0 +1,54 @@
+# <YYYY-MM-DD> — <benchmark-name>
+
+**Commit:** `<sha>`
+**Bench:** LongMemEval `_s` / coding-agent-life-v1 / ...
+**N:** 500 / 15 / ...
+**K:** 5
+**Hardware:** macos-15 / ubuntu-22.04 / ...
+**OpenAI model:** text-embedding-3-small
+**Anthropic model:** N/A (no LLM in retrieval loop)
+
+## Headline
+
+agentmemory-hybrid: **R@5 = XX.XX%**, P@5 = XX.XX%, p50 latency = XXms
+
+Beats grep baseline by +X.Xpt R@5, vector by +X.Xpt R@5.
+
+## Per-adapter
+
+| Adapter | P@5 | R@5 | Hit rate | p50 latency |
+|---|---|---|---|---|
+| grep | | | | |
+| vector | | | | |
+| agentmemory-hybrid | | | | |
+
+## Per-question-type
+
+| Type | grep R@5 | vector R@5 | agentmemory R@5 |
+|---|---|---|---|
+| single-session-bug | | | |
+| single-session-refactor | | | |
+| preference | | | |
+| multi-session-causal | | | |
+| temporal | | | |
+
+## Methodology
+
+- Sessions ingested via `POST /agentmemory/remember` with `type=eval-session`
+- Queries hit `POST /agentmemory/smart-search` with `limit=k*4`
+- No LLM in retrieval loop. Direct rank from hybrid scoring.
+- Ranks dedup by sessionId before truncating to K
+- Latency measured as init+query for LongMemEval (per-question fresh state), query-only for coding-life (shared state)
+
+## Reproduce
+
+```sh
+git checkout <sha>
+npm install --legacy-peer-deps
+OPENAI_API_KEY=sk-... AGENTMEMORY_BASE_URL=http://localhost:3111 \
+  npm run eval:longmemeval -- --stratify 10
+```
+
+## Notes
+
+<what surprised, what regressed, what's load-bearing>
diff --git a/eval/README.md b/eval/README.md
new file mode 100644
index 00000000..7f295367
--- /dev/null
+++ b/eval/README.md
@@ -0,0 +1,111 @@
+# agentmemory-evals
+
+Public benchmarks for agentmemory's hybrid memory stack (BM25 + embeddings + consolidation + graph).
+
+Two families, both reproducible:
+
+- **LongMemEval** — public 500-question retrieval benchmark over multi-session chat
+- **coding-agent-life-v1** — in-house corpus of 15 fictional Claude Code sessions for a Rust CLI project (`shipctl`), with 15 hand-graded queries covering bug fixes, refactors, preferences, and multi-session causal reasoning
+
+## Adapters
+
+| Adapter | Backend | API key needed |
+|---|---|---|
+| `grep` | Tokenized substring match | none |
+| `vector` | OpenAI `text-embedding-3-small` + cosine | `OPENAI_API_KEY` |
+| `agentmemory` | Running agentmemory server, smart-search endpoint | none (auth optional via `AGENTMEMORY_SECRET`) |
+
+## Sandbox first
+
+Running the `agentmemory` adapter against your real `~/.agentmemory` directory pollutes the eval with pre-existing memories AND pollutes your real store with eval test data. Always sandbox.
+
+`eval/scripts/sandbox.sh` spins up a clean agentmemory + iii-engine on ports 3411/3412 with state in `/tmp/agentmemory-eval-sandbox/`, exports `AGENTMEMORY_BASE_URL`, and tears down on exit.
+
+```sh
+source eval/scripts/sandbox.sh
+npm run eval:coding-life -- --adapters grep,agentmemory
+```
+
+Requires iii v0.11.2 on PATH (agentmemory pin). If you already have a different version installed, install the pinned build into `~/.local/bin` and make sure that directory comes first on `PATH`:
+
+```sh
+mkdir -p ~/.local/bin
+curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin
+export PATH="$HOME/.local/bin:$PATH"  # add to ~/.zshrc or ~/.bashrc for persistence
+```
+
+## Quickstart
+
+### coding-agent-life-v1 (in-house, no download)
+
+```sh
+# grep baseline, no sandbox needed
+npm run eval:coding-life -- --adapters grep
+
+# add agentmemory + vector (sandbox + OpenAI key)
+source eval/scripts/sandbox.sh
+OPENAI_API_KEY=sk-... npm run eval:coding-life -- --adapters grep,vector,agentmemory
+```
+
+### LongMemEval `_s` (public, 278MB download)
+
+```sh
+mkdir -p ~/datasets/longmemeval
+curl -Lo ~/datasets/longmemeval/longmemeval_s.json \
+  https://huggingface.co/datasets/xiaowu0162/longmemeval/resolve/main/longmemeval_s
+
+source eval/scripts/sandbox.sh
+
+# Stratified sample of 10 per type (fast iteration, ~$0.20 OpenAI cost)
+OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \
+  npm run eval:longmemeval -- --stratify 10
+
+# Full 500 questions × 3 adapters (~$2 OpenAI cost)
+OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \
+  npm run eval:longmemeval
+```
+
+## Repo layout
+
+```text
+eval/
+├── README.md
+├── runner/
+│   ├── types.ts                   Adapter, Question, RankedDoc, ScoreRow
+│   ├── score.ts                   P@K, R@K, aggregation
+│   ├── load.ts                    LongMemEval JSON → Question[]
+│   ├── adapters/
+│   │   ├── grep.ts                tokenized substring baseline
+│   │   ├── vector.ts              OpenAI embeddings + cosine
+│   │   └── agentmemory.ts         POST /agentmemory/{remember,smart-search}
+│   ├── longmemeval.ts             public benchmark runner
+│   └── coding-life.ts             in-house benchmark runner
+└── data/
+    └── coding-agent-life-v1/
+        ├── sessions.json          15 fictional sessions (~6KB)
+        └── queries.json           15 queries with gold session IDs
+```
+
+Reports land in `eval/reports/<bench>/` (gitignored): `scores.ndjson` + `summary.json`.
+
+Published scorecards land in `docs/benchmarks/YYYY-MM-DD-<bench>.md`.
+
+## Writing a new adapter
+
+1. Implement `Adapter<State>` from `eval/runner/types.ts`:
+   ```ts
+   import type { Adapter } from "../types.js";
+   export const myAdapter: Adapter<MyState> = {
+     name: "my-adapter",
+     async init(sessions, config) { /* index */ return state; },
+     async query(q, state, k) { /* search */ return ranked; },
+   };
+   ```
+2. Register in `eval/runner/{longmemeval,coding-life}.ts` `ADAPTERS` map.
+3. Run against `coding-agent-life-v1` to sanity-check before committing OpenAI spend on LongMemEval.
+
+## Why a benchmark for agentmemory
+
+agentmemory ships BM25 + embeddings + consolidation + graph retrieval. Numbers from those layers should be measured against grep/vector baselines so the value of each layer is provable.
+
+The in-house corpus is small on purpose (15 sessions) — covers single-session, multi-session, preference, and temporal question types without taking 15 minutes to run. LongMemEval gives the public-comparison axis.
diff --git a/eval/data/coding-agent-life-v1/queries.json b/eval/data/coding-agent-life-v1/queries.json
new file mode 100644
index 00000000..5603e8a0
--- /dev/null
+++ b/eval/data/coding-agent-life-v1/queries.json
@@ -0,0 +1,107 @@
+[
+  {
+    "id": "q-001",
+    "type": "single-session-bug",
+    "question": "Where did we land the auth env var precedence fix?",
+    "answer": "PR #11 with SHIPCTL_TOKEN > SHIP_TOKEN > SC_TOKEN precedence",
+    "goldSessionIds": ["sess-001"]
+  },
+  {
+    "id": "q-002",
+    "type": "single-session-infra",
+    "question": "What was the multi-arch Docker fix?",
+    "answer": "Added --platform=$BUILDPLATFORM and BUILDX_PLATFORMS for amd64+arm64",
+    "goldSessionIds": ["sess-002"]
+  },
+  {
+    "id": "q-003",
+    "type": "single-session-refactor",
+    "question": "Where did we consolidate the retry logic?",
+    "answer": "src/retry.rs with exponential backoff base=200ms cap=30s full jitter",
+    "goldSessionIds": ["sess-003"]
+  },
+  {
+    "id": "q-004",
+    "type": "single-session-feature",
+    "question": "Which PR introduced helm chart support?",
+    "answer": "PR #14",
+    "goldSessionIds": ["sess-004"]
+  },
+  {
+    "id": "q-005",
+    "type": "single-session-test",
+    "question": "Which test was flaky on macos and how was it fixed?",
+    "answer": "fs-watcher emits_changekind_file_delete; bumped wait to 1500ms + retry: 2",
+    "goldSessionIds": ["sess-005"]
+  },
+  {
+    "id": "q-006",
+    "type": "single-session-perf",
+    "question": "How did we fix the memory leak?",
+    "answer": "Replaced unbounded HashMap with LruCache cap=10k in src/cache.rs (PR #16)",
+    "goldSessionIds": ["sess-006"]
+  },
+  {
+    "id": "q-007",
+    "type": "single-session-api",
+    "question": "How did we handle the github API rate limit?",
+    "answer": "Conditional requests with If-None-Match etag and 304 caching via http-cache",
+    "goldSessionIds": ["sess-007"]
+  },
+  {
+    "id": "q-008",
+    "type": "single-session-db",
+    "question": "What was the schema migration approach for run_history?",
+    "answer": "Three-phase: nullable column + dual-write, backfill + flip reads, drop old column",
+    "goldSessionIds": ["sess-008"]
+  },
+  {
+    "id": "q-009",
+    "type": "single-session-infra",
+    "question": "How is the docs site deployed?",
+    "answer": "GitHub Actions docs.yml workflow + mdbook build + Cloudflare Pages on shipctl.dev",
+    "goldSessionIds": ["sess-009"]
+  },
+  {
+    "id": "q-010",
+    "type": "single-session-release",
+    "question": "Which PR set up the cross-platform release pipeline?",
+    "answer": "PR #19 with cross-rs for linux and native macos/windows builds",
+    "goldSessionIds": ["sess-010"]
+  },
+  {
+    "id": "q-011",
+    "type": "multi-session-causal",
+    "question": "What was the root cause of the staging incident, and where was it fixed?",
+    "answer": "SHIPCTL_TOKEN unset caused fallback to bad SC_TOKEN; fixed in PR #11 (sess-001) with precedence test; documented in post-mortem (sess-014)",
+    "goldSessionIds": ["sess-001", "sess-014"]
+  },
+  {
+    "id": "q-012",
+    "type": "preference",
+    "question": "Which async runtime does the team prefer for new code?",
+    "answer": "async-std (decided in arch review, not tokio)",
+    "goldSessionIds": ["sess-015"]
+  },
+  {
+    "id": "q-013",
+    "type": "preference",
+    "question": "What are the user's formatting preferences?",
+    "answer": "2-space indent in yaml, tab in rust, rust-analyzer over rls, cargo clippy before commit",
+    "goldSessionIds": ["sess-012"]
+  },
+  {
+    "id": "q-014",
+    "type": "multi-session-review",
+    "question": "What was the race condition Aria fixed in the schema migration?",
+    "answer": "Dual-write step had race when two writers hit same row; Aria added per-row advisory lock",
+    "goldSessionIds": ["sess-008", "sess-013"]
+  },
+  {
+    "id": "q-015",
+    "type": "temporal",
+    "question": "What was shipped on April 8th 2026?",
+    "answer": "Auth env-var precedence fix in PR #11, then started docker arm64 work",
+    "goldSessionIds": ["sess-001", "sess-011"]
+  }
+]
diff --git a/eval/data/coding-agent-life-v1/sessions.json b/eval/data/coding-agent-life-v1/sessions.json
new file mode 100644
index 00000000..71134134
--- /dev/null
+++ b/eval/data/coding-agent-life-v1/sessions.json
@@ -0,0 +1,77 @@
+[
+  {
+    "id": "sess-001",
+    "timestamp": "2026-04-08T09:14:00Z",
+    "content": "[user] auth check broken on staging when SHIPCTL_TOKEN unset, falling back to wrong env var\n[assistant] looked at src/auth.rs:42. precedence is SHIPCTL_TOKEN > SHIP_TOKEN > legacy SC_TOKEN. legacy path didn't validate. fixing precedence + adding length check.\n[user] yeah ship it\n[assistant] PR #11 opened with fix + regression test. covers all three env names."
+  },
+  {
+    "id": "sess-002",
+    "timestamp": "2026-04-08T14:30:00Z",
+    "content": "[user] docker build failing for arm64 on M2 runner\n[assistant] missing target in Dockerfile FROM line. need --platform=$BUILDPLATFORM on builder stage. updated Dockerfile + Makefile to set BUILDX_PLATFORMS=linux/amd64,linux/arm64. tested locally on apple silicon, builds clean."
+  },
+  {
+    "id": "sess-003",
+    "timestamp": "2026-04-09T10:02:00Z",
+    "content": "[user] retry logic is duplicated across handlers. consolidate.\n[assistant] extracted retry_with_backoff into src/retry.rs. exponential backoff base=200ms cap=30s jitter=full. all 4 callsites now use it. cuts ~80 lines."
+  },
+  {
+    "id": "sess-004",
+    "timestamp": "2026-04-10T11:45:00Z",
+    "content": "[user] add helm chart support\n[assistant] new subcommand `shipctl helm install`. wraps helm CLI, threads through --namespace and --values. PR #14 has the changes + 12 integration tests. Aria reviewed and approved."
+  },
+  {
+    "id": "sess-005",
+    "timestamp": "2026-04-11T16:20:00Z",
+    "content": "[user] test flake on macos-22 runner. emits_changekind_file_delete keeps failing\n[assistant] fsevents propagation slower on apple silicon, the 800ms wait isn't enough. bumped to 1500ms + describe-level retry: 2. all 15 fs-watcher tests pass now."
+  },
+  {
+    "id": "sess-006",
+    "timestamp": "2026-04-12T09:50:00Z",
+    "content": "[user] memory leak in long-running daemon. RSS grows 5MB/min.\n[assistant] traced to unbounded HashMap in src/cache.rs. swapped for LruCache with 10k cap. RSS stable at 180MB after 6h soak test. PR #16."
+  },
+  {
+    "id": "sess-007",
+    "timestamp": "2026-04-13T13:15:00Z",
+    "content": "[user] github API rate-limiting us when listing PRs. 5000/hr isn't enough during ci\n[assistant] added conditional requests with If-None-Match etag. cache 304 responses. drops actual API calls by ~70%. used http-cache crate."
+  },
+  {
+    "id": "sess-008",
+    "timestamp": "2026-04-15T10:30:00Z",
+    "content": "[user] schema migration for run_history table needs zero-downtime\n[assistant] three-phase migration. phase 1: add nullable new column + dual-write. phase 2: backfill + flip reads. phase 3: drop old column. avoids ALTER TABLE locks. ran on staging, took 4s for 2M rows."
+  },
+  {
+    "id": "sess-009",
+    "timestamp": "2026-04-16T15:40:00Z",
+    "content": "[user] docs site is stale. set up auto-deploy from main\n[assistant] added .github/workflows/docs.yml. mdbook build + cloudflare pages deploy. domain shipctl.dev wired up. preview deploys on PRs."
+  },
+  {
+    "id": "sess-010",
+    "timestamp": "2026-04-17T11:00:00Z",
+    "content": "[user] release pipeline. cut v0.3.0 with binaries for linux/macos/windows\n[assistant] new .github/workflows/release.yml. cross-rs for static linux, native for macos + windows. uploads to gh release + crates.io publish. tag-triggered. PR #19 has the workflow."
+  },
+  {
+    "id": "sess-011",
+    "timestamp": "2026-04-08T17:00:00Z",
+    "content": "[user] standup notes: shipped #11 auth fix this morning. starting on docker arm64 next."
+  },
+  {
+    "id": "sess-012",
+    "timestamp": "2026-04-10T18:30:00Z",
+    "content": "[user] preferences: I like rust analyzer over rls. always run cargo clippy before commit. prefer 2-space indent in yaml but tab in rust."
+  },
+  {
+    "id": "sess-013",
+    "timestamp": "2026-04-15T19:00:00Z",
+    "content": "[assistant] reviewed Aria's PR #18 (schema migration). flagged race condition in dual-write step when two writers hit same row. Aria added per-row advisory lock. lgtm now."
+  },
+  {
+    "id": "sess-014",
+    "timestamp": "2026-04-16T20:10:00Z",
+    "content": "[user] post-mortem from prod incident last week: SHIPCTL_TOKEN was unset in staging, fell back to bad SC_TOKEN which had wrong perms. delivery delayed 40min. action items: (1) precedence test (done in #11), (2) startup validation, (3) alert on auth fallback."
+  },
+  {
+    "id": "sess-015",
+    "timestamp": "2026-04-17T16:45:00Z",
+    "content": "[user] preferences: stick to async-std not tokio for new code. team agreed in arch review."
+  }
+]
diff --git a/eval/runner/adapters/agentmemory.ts b/eval/runner/adapters/agentmemory.ts
new file mode 100644
index 00000000..38028a7d
--- /dev/null
+++ b/eval/runner/adapters/agentmemory.ts
@@ -0,0 +1,93 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface AgentMemoryState {
+  baseUrl: string;
+  secret?: string;
+  sessions: Session[];
+  observationToSession: Map<string, string>;
+}
+
+interface RememberResponse {
+  memory?: { id?: string };
+  observationId?: string;
+  id?: string;
+  observation?: { id?: string };
+}
+
+interface SmartSearchResponse {
+  results?: Array<{
+    obsId?: string;
+    id?: string;
+    observationId?: string;
+    sessionId?: string;
+    score?: number;
+    content?: string;
+  }>;
+  observations?: Array<{
+    obsId?: string;
+    id?: string;
+    sessionId?: string;
+    score?: number;
+    content?: string;
+  }>;
+}
+
+function authHeaders(secret?: string): Record<string, string> {
+  const h: Record<string, string> = { "Content-Type": "application/json" };
+  if (secret) h.Authorization = `Bearer ${secret}`;
+  return h;
+}
+
+export const agentmemoryAdapter: Adapter<AgentMemoryState> = {
+  name: "agentmemory-hybrid",
+  async init(sessions, config) {
+    const baseUrl = (config?.baseUrl as string) ?? process.env.AGENTMEMORY_BASE_URL ?? "http://localhost:3111";
+    const secret = (config?.secret as string) ?? process.env.AGENTMEMORY_SECRET;
+    const observationToSession = new Map<string, string>();
+    for (const s of sessions) {
+      const res = await fetch(`${baseUrl}/agentmemory/remember`, {
+        method: "POST",
+        headers: authHeaders(secret),
+        body: JSON.stringify({
+          content: s.content,
+          type: "eval-session",
+          concepts: [s.id],
+        }),
+      });
+      if (!res.ok) {
+        throw new Error(`remember failed for ${s.id}: ${res.status} ${await res.text()}`);
+      }
+      const body = (await res.json()) as RememberResponse;
+      const obsId =
+        body.memory?.id ?? body.observationId ?? body.id ?? body.observation?.id;
+      if (obsId) observationToSession.set(obsId, s.id);
+    }
+    return { baseUrl, secret, sessions, observationToSession };
+  },
+  async query(q, state, k) {
+    const res = await fetch(`${state.baseUrl}/agentmemory/smart-search`, {
+      method: "POST",
+      headers: authHeaders(state.secret),
+      body: JSON.stringify({ query: q, limit: Math.max(k * 10, 50) }),
+    });
+    if (!res.ok) {
+      throw new Error(`smart-search failed: ${res.status} ${await res.text()}`);
+    }
+    const body = (await res.json()) as SmartSearchResponse;
+    const rows = body.results ?? body.observations ?? [];
+    const ranked: RankedDoc[] = [];
+    const seen = new Set<string>();
+    for (const row of rows) {
+      let sessionId = row.sessionId;
+      if (!sessionId) {
+        const memId = row.obsId ?? row.id ?? row.observationId;
+        sessionId = memId ? state.observationToSession.get(memId) : undefined;
+      }
+      if (!sessionId || seen.has(sessionId)) continue;
+      seen.add(sessionId);
+      ranked.push({ sessionId, score: row.score ?? 0 });
+      if (ranked.length >= k) break;
+    }
+    return ranked;
+  },
+};
diff --git a/eval/runner/adapters/grep.ts b/eval/runner/adapters/grep.ts
new file mode 100644
index 00000000..28b18ea6
--- /dev/null
+++ b/eval/runner/adapters/grep.ts
@@ -0,0 +1,36 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface GrepState {
+  sessions: Session[];
+}
+
+function tokenize(s: string): string[] {
+  return s
+    .toLowerCase()
+    .replace(/[^a-z0-9_]+/g, " ")
+    .split(/\s+/)
+    .filter((t) => t.length > 2);
+}
+
+export const grepAdapter: Adapter<GrepState> = {
+  name: "grep",
+  async init(sessions) {
+    return { sessions };
+  },
+  async query(q, state, k) {
+    const terms = tokenize(q);
+    const scored: RankedDoc[] = [];
+    for (const s of state.sessions) {
+      const body = s.content.toLowerCase();
+      let hits = 0;
+      for (const t of terms) {
+        if (body.includes(t)) hits += 1;
+      }
+      if (hits > 0) {
+        scored.push({ sessionId: s.id, score: hits });
+      }
+    }
+    scored.sort((a, b) => b.score - a.score);
+    return scored.slice(0, k);
+  },
+};
diff --git a/eval/runner/adapters/vector.ts b/eval/runner/adapters/vector.ts
new file mode 100644
index 00000000..c40e414d
--- /dev/null
+++ b/eval/runner/adapters/vector.ts
@@ -0,0 +1,108 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface VectorState {
+  sessions: Session[];
+  embeddings: Float32Array[];
+}
+
+const OPENAI_URL = "https://api.openai.com/v1/embeddings";
+const MODEL = "text-embedding-3-small";
+const DIM = 1536;
+
+async function embed(text: string, apiKey: string): Promise<Float32Array> {
+  const res = await fetch(OPENAI_URL, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify({ input: text, model: MODEL }),
+  });
+  if (!res.ok) {
+    throw new Error(`OpenAI embed failed: ${res.status} ${await res.text()}`);
+  }
+  const data = (await res.json()) as { data: Array<{ embedding: number[] }> };
+  return Float32Array.from(data.data[0].embedding);
+}
+
+async function embedBatch(texts: string[], apiKey: string): Promise<Float32Array[]> {
+  const res = await fetch(OPENAI_URL, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify({ input: texts, model: MODEL }),
+  });
+  if (!res.ok) {
+    throw new Error(`OpenAI batch embed failed: ${res.status} ${await res.text()}`);
+  }
+  const data = (await res.json()) as { data: Array<{ embedding: number[]; index: number }> };
+  if (!Array.isArray(data.data) || data.data.length !== texts.length) {
+    throw new Error(
+      `OpenAI batch embed: expected ${texts.length} embeddings, got ${data.data?.length ?? 0}`,
+    );
+  }
+  const out = new Array<Float32Array>(texts.length);
+  for (const row of data.data) {
+    if (
+      !Number.isInteger(row.index) ||
+      row.index < 0 ||
+      row.index >= texts.length ||
+      out[row.index] !== undefined
+    ) {
+      throw new Error(`OpenAI batch embed: invalid or duplicate index ${row.index}`);
+    }
+    if (!Array.isArray(row.embedding) || row.embedding.length === 0) {
+      throw new Error(`OpenAI batch embed: empty embedding at index ${row.index}`);
+    }
+    out[row.index] = Float32Array.from(row.embedding);
+  }
+  return out;
+}
+
+function cosine(a: Float32Array, b: Float32Array): number {
+  let dot = 0;
+  let na = 0;
+  let nb = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    na += a[i] * a[i];
+    nb += b[i] * b[i];
+  }
+  const denom = Math.sqrt(na) * Math.sqrt(nb);
+  return denom === 0 ? 0 : dot / denom;
+}
+
+export const vectorAdapter: Adapter<VectorState> = {
+  name: "vector",
+  async init(sessions) {
+    const apiKey = process.env.OPENAI_API_KEY;
+    if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter");
+    const embeddings: Float32Array[] = new Array(sessions.length);
+    const BATCH = 50;
+    for (let i = 0; i < sessions.length; i += BATCH) {
+      const batch = sessions.slice(i, i + BATCH);
+      const vecs = await embedBatch(
+        batch.map((s) => s.content.slice(0, 8000)),
+        apiKey,
+      );
+      for (let j = 0; j < vecs.length; j++) embeddings[i + j] = vecs[j];
+    }
+    if (embeddings.length > 0 && embeddings[0].length !== DIM) {
+      throw new Error(`unexpected embedding dim: ${embeddings[0].length}`);
+    }
+    return { sessions, embeddings };
+  },
+  async query(q, state, k) {
+    const apiKey = process.env.OPENAI_API_KEY;
+    if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter");
+    const qvec = await embed(q, apiKey);
+    const scored: RankedDoc[] = state.sessions.map((s, i) => ({
+      sessionId: s.id,
+      score: cosine(qvec, state.embeddings[i]),
+    }));
+    scored.sort((a, b) => b.score - a.score);
+    return scored.slice(0, k);
+  },
+};
diff --git a/eval/runner/coding-life.ts b/eval/runner/coding-life.ts
new file mode 100644
index 00000000..753ca87f
--- /dev/null
+++ b/eval/runner/coding-life.ts
@@ -0,0 +1,101 @@
+import { readFileSync, existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { parseArgs } from "node:util";
+import { agentmemoryAdapter } from "./adapters/agentmemory.js";
+import { grepAdapter } from "./adapters/grep.js";
+import { vectorAdapter } from "./adapters/vector.js";
+import { aggregate, scoreQuestion } from "./score.js";
+import type { Adapter, Question, ScoreRow, Session } from "./types.js";
+
+const ADAPTERS: Record<string, Adapter> = {
+  grep: grepAdapter as unknown as Adapter,
+  vector: vectorAdapter as unknown as Adapter,
+  agentmemory: agentmemoryAdapter as unknown as Adapter,
+};
+
+interface CliOptions {
+  data: string;
+  adapters: string;
+  k: string;
+  out: string;
+}
+
+function parse(): CliOptions {
+  const { values } = parseArgs({
+    options: {
+      data: { type: "string", default: "eval/data/coding-agent-life-v1" },
+      adapters: { type: "string", default: "grep,vector,agentmemory" },
+      k: { type: "string", default: "5" },
+      out: { type: "string", default: "eval/reports/coding-life" },
+    },
+  });
+  return values as unknown as CliOptions;
+}
+
+async function main(): Promise<void> {
+  const opts = parse();
+  const k = Number(opts.k);
+  if (!Number.isInteger(k) || k <= 0) {
+    console.error(`--k must be a positive integer, got: ${opts.k}`);
+    process.exit(2);
+  }
+  const sessions = JSON.parse(
+    readFileSync(resolve(opts.data, "sessions.json"), "utf8"),
+  ) as Session[];
+  const queriesRaw = JSON.parse(
+    readFileSync(resolve(opts.data, "queries.json"), "utf8"),
+  ) as Array<Omit<Question, "haystack">>;
+  const questions: Question[] = queriesRaw.map((q) => ({ ...q, haystack: sessions }));
+  const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean);
+  for (const a of adapterNames) {
+    if (!ADAPTERS[a]) {
+      console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`);
+      process.exit(2);
+    }
+  }
+  console.log(
+    `loaded ${sessions.length} sessions, ${questions.length} queries, adapters: ${adapterNames.join(",")}, k=${k}`,
+  );
+
+  const outDir = resolve(opts.out);
+  mkdirSync(outDir, { recursive: true });
+  const ndjsonPath = `${outDir}/scores.ndjson`;
+  if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, "");
+
+  const rows: ScoreRow[] = [];
+  for (const adapterName of adapterNames) {
+    const adapter = ADAPTERS[adapterName];
+    console.log(`\n== ${adapter.name} ==`);
+    const state = await adapter.init(sessions);
+    try {
+      for (const q of questions) {
+        const t0 = performance.now();
+        const ranked = await adapter.query(q.question, state, k);
+        const latencyMs = performance.now() - t0;
+        const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs);
+        rows.push(row);
+        appendFileSync(ndjsonPath, JSON.stringify(row) + "\n");
+        const mark = row.hit ? "+" : "-";
+        console.log(
+          `  ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`,
+        );
+      }
+    } finally {
+      if (adapter.teardown) await adapter.teardown(state);
+    }
+  }
+
+  const agg = aggregate(rows);
+  writeFileSync(`${outDir}/summary.json`, JSON.stringify(agg, null, 2));
+  console.log("\n=== Summary ===");
+  for (const [adapter, stats] of Object.entries(agg.byAdapter)) {
+    console.log(
+      `  ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`,
+    );
+  }
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/eval/runner/load.ts b/eval/runner/load.ts
new file mode 100644
index 00000000..aece2452
--- /dev/null
+++ b/eval/runner/load.ts
@@ -0,0 +1,54 @@
+import { readFileSync } from "node:fs";
+import type { Question, Session } from "./types.js";
+
+interface LongMemEvalRaw {
+  question_id: string;
+  question_type: string;
+  question: string;
+  answer?: string;
+  answer_session_ids: string[];
+  haystack_session_ids: string[];
+  haystack_sessions: Array<Array<{ role: string; content: string }>>;
+}
+
+function flattenSession(turns: Array<{ role: string; content: string }>): string {
+  return turns.map((t) => `[${t.role}] ${t.content}`).join("\n\n");
+}
+
+export function loadLongMemEval(path: string, limit?: number): Question[] {
+  const raw = JSON.parse(readFileSync(path, "utf8")) as LongMemEvalRaw[];
+  const slice = typeof limit === "number" ? raw.slice(0, limit) : raw;
+  const questions: Question[] = [];
+  for (const r of slice) {
+    if (r.haystack_session_ids.length !== r.haystack_sessions.length) {
+      throw new Error(
+        `LongMemEval row ${r.question_id}: haystack_session_ids (${r.haystack_session_ids.length}) and haystack_sessions (${r.haystack_sessions.length}) length mismatch`,
+      );
+    }
+    const haystack: Session[] = r.haystack_session_ids.map((id, i) => ({
+      id,
+      content: flattenSession(r.haystack_sessions[i]),
+    }));
+    questions.push({
+      id: r.question_id,
+      type: r.question_type,
+      question: r.question,
+      answer: r.answer,
+      goldSessionIds: r.answer_session_ids,
+      haystack,
+    });
+  }
+  return questions;
+}
+
+export function stratifySample(questions: Question[], perType: number): Question[] {
+  const buckets: Record<string, Question[]> = {};
+  for (const q of questions) {
+    (buckets[q.type] ??= []).push(q);
+  }
+  const out: Question[] = [];
+  for (const type of Object.keys(buckets).sort()) {
+    out.push(...buckets[type].slice(0, perType));
+  }
+  return out;
+}
diff --git a/eval/runner/longmemeval.ts b/eval/runner/longmemeval.ts
new file mode 100644
index 00000000..a906fa21
--- /dev/null
+++ b/eval/runner/longmemeval.ts
@@ -0,0 +1,126 @@
+import { existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { parseArgs } from "node:util";
+import { agentmemoryAdapter } from "./adapters/agentmemory.js";
+import { grepAdapter } from "./adapters/grep.js";
+import { vectorAdapter } from "./adapters/vector.js";
+import { loadLongMemEval, stratifySample } from "./load.js";
+import { aggregate, scoreQuestion } from "./score.js";
+import type { Adapter, ScoreRow } from "./types.js";
+
+const ADAPTERS: Record<string, Adapter> = {
+  grep: grepAdapter as unknown as Adapter,
+  vector: vectorAdapter as unknown as Adapter,
+  agentmemory: agentmemoryAdapter as unknown as Adapter,
+};
+
+interface CliOptions {
+  data: string;
+  adapters: string;
+  k: string;
+  limit?: string;
+  stratify?: string;
+  out: string;
+}
+
+function parse(): CliOptions {
+  const { values } = parseArgs({
+    options: {
+      data: { type: "string", default: process.env.LONGMEMEVAL_PATH ?? "" },
+      adapters: { type: "string", default: "grep,vector,agentmemory" },
+      k: { type: "string", default: "5" },
+      limit: { type: "string" },
+      stratify: { type: "string" },
+      out: { type: "string", default: "eval/reports/longmemeval" },
+    },
+  });
+  return values as unknown as CliOptions;
+}
+
+async function main(): Promise<void> {
+  const opts = parse();
+  if (!opts.data) {
+    console.error("--data <path/to/longmemeval_s.json> required (or LONGMEMEVAL_PATH env)");
+    process.exit(2);
+  }
+  const k = Number(opts.k);
+  if (!Number.isInteger(k) || k <= 0) {
+    console.error(`--k must be a positive integer, got: ${opts.k}`);
+    process.exit(2);
+  }
+  let limit: number | undefined;
+  if (opts.limit !== undefined) {
+    limit = Number(opts.limit);
+    if (!Number.isInteger(limit) || limit <= 0) {
+      console.error(`--limit must be a positive integer, got: ${opts.limit}`);
+      process.exit(2);
+    }
+  }
+  let perType: number | undefined;
+  if (opts.stratify !== undefined) {
+    perType = Number(opts.stratify);
+    if (!Number.isInteger(perType) || perType <= 0) {
+      console.error(`--stratify must be a positive integer, got: ${opts.stratify}`);
+      process.exit(2);
+    }
+  }
+  const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean);
+  for (const a of adapterNames) {
+    if (!ADAPTERS[a]) {
+      console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`);
+      process.exit(2);
+    }
+  }
+  let questions = loadLongMemEval(resolve(opts.data), limit);
+  if (perType) questions = stratifySample(questions, perType);
+  console.log(
+    `loaded ${questions.length} questions, adapters: ${adapterNames.join(",")}, k=${k}`,
+  );
+
+  const outDir = resolve(opts.out);
+  mkdirSync(outDir, { recursive: true });
+  const ndjsonPath = `${outDir}/scores.ndjson`;
+  if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, "");
+  mkdirSync(dirname(ndjsonPath), { recursive: true });
+
+  const rows: ScoreRow[] = [];
+  for (const adapterName of adapterNames) {
+    const adapter = ADAPTERS[adapterName];
+    console.log(`\n== ${adapter.name} ==`);
+    for (const q of questions) {
+      const t0 = performance.now();
+      const state = await adapter.init(q.haystack);
+      try {
+        const ranked = await adapter.query(q.question, state, k);
+        const latencyMs = performance.now() - t0;
+        const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs);
+        rows.push(row);
+        appendFileSync(ndjsonPath, JSON.stringify(row) + "\n");
+        const mark = row.hit ? "+" : "-";
+        console.log(
+          `  ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`,
+        );
+      } finally {
+        if (adapter.teardown) await adapter.teardown(state);
+      }
+    }
+  }
+
+  const agg = aggregate(rows);
+  const summaryPath = `${outDir}/summary.json`;
+  writeFileSync(summaryPath, JSON.stringify(agg, null, 2));
+
+  console.log("\n=== Summary ===");
+  for (const [adapter, stats] of Object.entries(agg.byAdapter)) {
+    console.log(
+      `  ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`,
+    );
+  }
+  console.log(`\nwrote ${ndjsonPath}`);
+  console.log(`wrote ${summaryPath}`);
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/eval/runner/score.ts b/eval/runner/score.ts
new file mode 100644
index 00000000..b21d30ca
--- /dev/null
+++ b/eval/runner/score.ts
@@ -0,0 +1,78 @@
+import type { Question, RankedDoc, ScoreRow } from "./types.js";
+
+export function scoreQuestion(
+  q: Question,
+  ranked: RankedDoc[],
+  k: number,
+  adapter: string,
+  latencyMs: number,
+): ScoreRow {
+  const topK = ranked.slice(0, k).map((r) => r.sessionId);
+  const gold = new Set(q.goldSessionIds);
+  const hits = topK.filter((id) => gold.has(id)).length;
+  const precisionAtK = k > 0 ? hits / k : 0;
+  const recallAtK = gold.size === 0 ? 0 : hits / gold.size;
+  const hit = hits > 0;
+  let topGoldRank: number | null = null;
+  for (let i = 0; i < ranked.length; i++) {
+    if (gold.has(ranked[i].sessionId)) {
+      topGoldRank = i + 1;
+      break;
+    }
+  }
+  return {
+    questionId: q.id,
+    questionType: q.type,
+    adapter,
+    k,
+    precisionAtK,
+    recallAtK,
+    hit,
+    topGoldRank,
+    latencyMs,
+  };
+}
+
+export function aggregate(rows: ScoreRow[]): {
+  byAdapter: Record<string, { p: number; r: number; hit: number; n: number; latencyP50: number }>;
+  byType: Record<string, Record<string, { p: number; r: number; hit: number; n: number }>>;
+} {
+  const byAdapter: Record<
+    string,
+    { p: number; r: number; hit: number; n: number; latencyP50: number }
+  > = {};
+  const latencies: Record<string, number[]> = {};
+  for (const r of rows) {
+    const a = (byAdapter[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0, latencyP50: 0 });
+    a.p += r.precisionAtK;
+    a.r += r.recallAtK;
+    a.hit += r.hit ? 1 : 0;
+    a.n += 1;
+    (latencies[r.adapter] ??= []).push(r.latencyMs);
+  }
+  for (const adapter of Object.keys(byAdapter)) {
+    const a = byAdapter[adapter];
+    a.p = a.p / a.n;
+    a.r = a.r / a.n;
+    const sorted = latencies[adapter].slice().sort((x, y) => x - y);
+    a.latencyP50 = sorted[Math.floor(sorted.length / 2)] ?? 0;
+  }
+  const byType: Record<string, Record<string, { p: number; r: number; hit: number; n: number }>> =
+    {};
+  for (const r of rows) {
+    const t = (byType[r.questionType] ??= {});
+    const a = (t[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0 });
+    a.p += r.precisionAtK;
+    a.r += r.recallAtK;
+    a.hit += r.hit ? 1 : 0;
+    a.n += 1;
+  }
+  for (const t of Object.keys(byType)) {
+    for (const adapter of Object.keys(byType[t])) {
+      const a = byType[t][adapter];
+      a.p = a.p / a.n;
+      a.r = a.r / a.n;
+    }
+  }
+  return { byAdapter, byType };
+}
diff --git a/eval/runner/types.ts b/eval/runner/types.ts
new file mode 100644
index 00000000..e72a6408
--- /dev/null
+++ b/eval/runner/types.ts
@@ -0,0 +1,38 @@
+export interface Session {
+  id: string;
+  timestamp?: string;
+  content: string;
+}
+
+export interface Question {
+  id: string;
+  type: string;
+  question: string;
+  answer?: string;
+  goldSessionIds: string[];
+  haystack: Session[];
+}
+
+export interface RankedDoc {
+  sessionId: string;
+  score: number;
+}
+
+export interface Adapter<State = unknown> {
+  name: string;
+  init(sessions: Session[], config?: Record<string, unknown>): Promise<State>;
+  query(q: string, state: State, k: number): Promise<RankedDoc[]>;
+  teardown?(state: State): Promise<void>;
+}
+
+export interface ScoreRow {
+  questionId: string;
+  questionType: string;
+  adapter: string;
+  k: number;
+  precisionAtK: number;
+  recallAtK: number;
+  hit: boolean;
+  topGoldRank: number | null;
+  latencyMs: number;
+}
diff --git a/eval/scripts/sandbox.sh b/eval/scripts/sandbox.sh
new file mode 100755
index 00000000..5d402330
--- /dev/null
+++ b/eval/scripts/sandbox.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# Boot a sandboxed agentmemory + iii-engine on alt ports with a clean data dir,
+# so eval runs aren't polluted by (and don't pollute) your real ~/.agentmemory.
+# Source it: `source eval/scripts/sandbox.sh` then run eval scripts;
+# the sandbox is torn down on EXIT.
+
+set -euo pipefail
+
+SANDBOX_ROOT="${SANDBOX_ROOT:-/tmp/agentmemory-eval-sandbox}"
+SANDBOX_PORT="${SANDBOX_PORT:-3411}"
+SANDBOX_STREAM_PORT="${SANDBOX_STREAM_PORT:-3412}"
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+if ! command -v iii >/dev/null 2>&1; then
+  echo "iii binary not on PATH. Install pinned version:"
+  echo "  curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin"
+  exit 1
+fi
+
+iii_ver=$(iii --version 2>&1 | head -1)
+if [[ "$iii_ver" != "0.11.2" ]]; then
+  echo "warning: iii version on PATH is $iii_ver; agentmemory pins 0.11.2"
+fi
+
+if [[ ! -f "$REPO_ROOT/dist/index.mjs" ]]; then
+  echo "dist/ missing. Run: npm run build" >&2
+  exit 1
+fi
+
+if [[ -z "${SANDBOX_ROOT:-}" || "$SANDBOX_ROOT" == "/" || "$SANDBOX_ROOT" != /tmp/* ]]; then
+  echo "refusing to wipe SANDBOX_ROOT='$SANDBOX_ROOT' — must be non-empty and under /tmp/" >&2
+  exit 1
+fi
+rm -rf "$SANDBOX_ROOT"
+mkdir -p "$SANDBOX_ROOT/data" "$SANDBOX_ROOT/.agentmemory"
+
+cat > "$SANDBOX_ROOT/iii-config.yaml" <<EOF
+workers:
+  - name: iii-http
+    config:
+      port: $SANDBOX_PORT
+      host: 127.0.0.1
+      default_timeout: 180000
+      cors:
+        allowed_origins: ["http://localhost:$SANDBOX_PORT", "http://127.0.0.1:$SANDBOX_PORT"]
+        allowed_methods: [GET, POST, PUT, DELETE, OPTIONS]
+  - name: iii-state
+    config:
+      adapter:
+        name: kv
+        config:
+          store_method: file_based
+          file_path: $SANDBOX_ROOT/data/state_store.db
+  - name: iii-queue
+    config:
+      adapter:
+        name: builtin
+  - name: iii-pubsub
+    config:
+      adapter:
+        name: local
+  - name: iii-cron
+    config:
+      adapter:
+        name: kv
+  - name: iii-stream
+    config:
+      port: $SANDBOX_STREAM_PORT
+      host: 127.0.0.1
+      adapter:
+        name: kv
+        config:
+          store_method: file_based
+          file_path: $SANDBOX_ROOT/data/stream_store
+  - name: iii-observability
+    config:
+      enabled: true
+      service_name: agentmemory-eval
+      exporter: memory
+      sampling_ratio: 1.0
+      metrics_enabled: true
+      logs_enabled: false
+      logs_console_output: false
+  - name: iii-exec
+    config:
+      exec:
+        - node $REPO_ROOT/dist/index.mjs
+EOF
+
+cd "$SANDBOX_ROOT"
+HOME="$SANDBOX_ROOT" iii --config "$SANDBOX_ROOT/iii-config.yaml" > "$SANDBOX_ROOT/iii.log" 2>&1 &
+SANDBOX_PID=$!
+
+cleanup() {
+  echo "tearing down sandbox (pid $SANDBOX_PID)"
+  kill "$SANDBOX_PID" 2>/dev/null || true
+  sleep 1
+  kill -9 "$SANDBOX_PID" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# wait for livez
+for i in $(seq 1 30); do
+  if curl -sS --max-time 1 "http://localhost:$SANDBOX_PORT/agentmemory/livez" 2>/dev/null | grep -q '"status":"ok"'; then
+    export AGENTMEMORY_BASE_URL="http://localhost:$SANDBOX_PORT"
+    echo "sandbox ready: $AGENTMEMORY_BASE_URL"
+    echo "  state: $SANDBOX_ROOT/data/"
+    echo "  logs:  $SANDBOX_ROOT/iii.log"
+    return 0 2>/dev/null || exit 0
+  fi
+  sleep 1
+done
+
+echo "sandbox failed to come up within 30s. last log lines:" >&2
+tail -10 "$SANDBOX_ROOT/iii.log" >&2
+exit 1
diff --git a/integrations/hermes/plugin.yaml b/integrations/hermes/plugin.yaml
index b4f32151..9ea5cb98 100644
--- a/integrations/hermes/plugin.yaml
+++ b/integrations/hermes/plugin.yaml
@@ -4,6 +4,9 @@ description: "Persistent cross-session memory for Hermes Agent via agentmemory.
 author: "Rohit Ghumare"
 homepage: "https://github.com/rohitg00/agentmemory"
 hooks:
+  - prefetch
+  - sync_turn
   - on_session_end
   - on_pre_compress
   - on_memory_write
+  - system_prompt_block
diff --git a/package.json b/package.json
index 820fc8f7..bc245a2f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@agentmemory/agentmemory",
-  "version": "0.9.20",
+  "version": "0.9.21",
   "description": "Persistent memory for AI coding agents, powered by iii-engine's three primitives",
   "type": "module",
   "main": "dist/index.mjs",
@@ -25,7 +25,9 @@
     "test:watch": "vitest --exclude test/integration.test.ts",
     "test:integration": "vitest run test/integration.test.ts",
     "test:all": "vitest run",
-    "bench:load": "node --import tsx benchmark/load-100k.ts"
+    "bench:load": "node --import tsx benchmark/load-100k.ts",
+    "eval:longmemeval": "tsx eval/runner/longmemeval.ts",
+    "eval:coding-life": "tsx eval/runner/coding-life.ts"
   },
   "keywords": [
     "ai",
@@ -60,7 +62,7 @@
     "@anthropic-ai/sdk": "^0.39.0",
     "@clack/prompts": "^1.2.0",
     "dotenv": "^17.4.2",
-    "iii-sdk": "^0.11.2",
+    "iii-sdk": "0.11.2",
     "zod": "^4.0.0"
   },
   "optionalDependencies": {
diff --git a/packages/mcp/package.json b/packages/mcp/package.json
index 403295dd..96da3ae4 100644
--- a/packages/mcp/package.json
+++ b/packages/mcp/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@agentmemory/mcp",
-  "version": "0.9.20",
+  "version": "0.9.21",
   "description": "Standalone MCP server for agentmemory — thin shim that re-exposes @agentmemory/agentmemory's MCP entrypoint",
   "type": "module",
   "bin": {
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index a18860e4..e53f8088 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "agentmemory",
-  "version": "0.9.20",
+  "version": "0.9.21",
   "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 51 MCP tools, 4 skills, real-time viewer.",
   "author": {
     "name": "Rohit Ghumare",
diff --git a/plugin/.codex-plugin/plugin.json b/plugin/.codex-plugin/plugin.json
index f8d676f6..0a7cc173 100644
--- a/plugin/.codex-plugin/plugin.json
+++ b/plugin/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "agentmemory",
-  "version": "0.9.20",
+  "version": "0.9.21",
   "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 6 hooks, 51 MCP tools, 4 skills, real-time viewer.",
   "author": {
     "name": "Rohit Ghumare",
diff --git a/plugin/.mcp.copilot.json b/plugin/.mcp.copilot.json
new file mode 100644
index 00000000..01d03f7d
--- /dev/null
+++ b/plugin/.mcp.copilot.json
@@ -0,0 +1,14 @@
+{
+  "mcpServers": {
+    "agentmemory": {
+      "type": "local",
+      "command": "npx",
+      "args": ["-y", "@agentmemory/mcp"],
+      "env": {
+        "AGENTMEMORY_URL": "${AGENTMEMORY_URL}",
+        "AGENTMEMORY_SECRET": "${AGENTMEMORY_SECRET}"
+      },
+      "tools": ["*"]
+    }
+  }
+}
diff --git a/plugin/hooks/hooks.codex.json b/plugin/hooks/hooks.codex.json
index 73e43c66..d2c3a3b6 100644
--- a/plugin/hooks/hooks.codex.json
+++ b/plugin/hooks/hooks.codex.json
@@ -5,7 +5,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\"",
             "statusMessage": "agentmemory: loading session context"
           }
         ]
@@ -16,7 +16,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\"",
             "statusMessage": "agentmemory: recalling relevant memories"
           }
         ]
@@ -28,7 +28,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\""
           }
         ]
       }
@@ -38,7 +38,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\""
           }
         ]
       }
@@ -48,7 +48,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\""
           }
         ]
       }
@@ -58,7 +58,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\""
           }
         ]
       }
diff --git a/plugin/hooks/hooks.copilot.json b/plugin/hooks/hooks.copilot.json
new file mode 100644
index 00000000..b7d09f8b
--- /dev/null
+++ b/plugin/hooks/hooks.copilot.json
@@ -0,0 +1,72 @@
+{
+  "version": 1,
+  "hooks": {
+    "sessionStart": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-start.mjs"
+      }
+    ],
+    "userPromptSubmitted": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/prompt-submit.mjs"
+      }
+    ],
+    "preToolUse": [
+      {
+        "type": "command",
+        "matcher": "edit|write|create|read|view|glob|grep",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+      }
+    ],
+    "postToolUse": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+      }
+    ],
+    "postToolUseFailure": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-failure.mjs"
+      }
+    ],
+    "preCompact": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+      }
+    ],
+    "agentStop": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/stop.mjs"
+      }
+    ],
+    "sessionEnd": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-end.mjs"
+      }
+    ],
+    "subagentStart": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-start.mjs"
+      }
+    ],
+    "subagentStop": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-stop.mjs"
+      }
+    ],
+    "notification": [
+      {
+        "type": "command",
+        "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/notification.mjs"
+      }
+    ]
+  }
+}
diff --git a/plugin/hooks/hooks.json b/plugin/hooks/hooks.json
index d60d664a..a13c9973 100644
--- a/plugin/hooks/hooks.json
+++ b/plugin/hooks/hooks.json
@@ -5,7 +5,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\""
           }
         ]
       }
@@ -15,7 +15,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\""
           }
         ]
       }
@@ -26,7 +26,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\""
           }
         ]
       }
@@ -36,7 +36,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\""
           }
         ]
       }
@@ -46,7 +46,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs\""
           }
         ]
       }
@@ -56,7 +56,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\""
           }
         ]
       }
@@ -66,7 +66,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs\""
           }
         ]
       }
@@ -76,7 +76,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs\""
           }
         ]
       }
@@ -86,7 +86,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs\""
           }
         ]
       }
@@ -96,7 +96,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs\""
           }
         ]
       }
@@ -106,7 +106,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\""
           }
         ]
       }
@@ -116,7 +116,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs"
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs\""
           }
         ]
       }
diff --git a/plugin/plugin.json b/plugin/plugin.json
new file mode 100644
index 00000000..4dd30bb7
--- /dev/null
+++ b/plugin/plugin.json
@@ -0,0 +1,15 @@
+{
+  "name": "agentmemory",
+  "version": "0.9.21",
+  "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 53 MCP tools, 4 skills, real-time viewer.",
+  "author": {
+    "name": "Rohit Ghumare",
+    "url": "https://github.com/rohitg00"
+  },
+  "license": "Apache-2.0",
+  "homepage": "https://github.com/rohitg00/agentmemory",
+  "repository": "https://github.com/rohitg00/agentmemory",
+  "skills": "skills/",
+  "mcpServers": ".mcp.copilot.json",
+  "hooks": "hooks/hooks.copilot.json"
+}
diff --git a/plugin/scripts/notification.mjs b/plugin/scripts/notification.mjs
index a318848d..8ba2c9b0 100755
--- a/plugin/scripts/notification.mjs
+++ b/plugin/scripts/notification.mjs
@@ -22,8 +22,10 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	if (data.notification_type !== "permission_prompt") return;
-	const sessionId = data.session_id || "unknown";
+	const notificationType = data.notification_type ?? data.notificationType;
+	if (notificationType !== "permission_prompt") return;
+	const rawSessionId = data.session_id ?? data.sessionId;
+	const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown";
 	try {
 		await fetch(`${REST_URL}/agentmemory/observe`, {
 			method: "POST",
@@ -35,7 +37,7 @@ async function main() {
 				cwd: data.cwd || process.cwd(),
 				timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 				data: {
-					notification_type: data.notification_type,
+					notification_type: notificationType,
 					title: data.title,
 					message: data.message
 				}
diff --git a/plugin/scripts/post-tool-failure.mjs b/plugin/scripts/post-tool-failure.mjs
index 3a593f3a..902a0930 100755
--- a/plugin/scripts/post-tool-failure.mjs
+++ b/plugin/scripts/post-tool-failure.mjs
@@ -22,8 +22,11 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	if (data.is_interrupt) return;
-	const sessionId = data.session_id || "unknown";
+	if (data.is_interrupt || data.isInterrupt) return;
+	const sessionId = data.session_id || data.sessionId || "unknown";
+	const toolName = data.tool_name ?? data.toolName;
+	const toolInput = data.tool_input ?? data.toolArgs;
+	const error = data.error ?? data.errorMessage;
 	try {
 		await fetch(`${REST_URL}/agentmemory/observe`, {
 			method: "POST",
@@ -35,9 +38,9 @@ async function main() {
 				cwd: data.cwd || process.cwd(),
 				timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 				data: {
-					tool_name: data.tool_name,
-					tool_input: typeof data.tool_input === "string" ? data.tool_input.slice(0, 4e3) : JSON.stringify(data.tool_input ?? "").slice(0, 4e3),
-					error: typeof data.error === "string" ? data.error.slice(0, 4e3) : JSON.stringify(data.error ?? "").slice(0, 4e3)
+					tool_name: toolName,
+					tool_input: typeof toolInput === "string" ? toolInput.slice(0, 4e3) : JSON.stringify(toolInput ?? "").slice(0, 4e3),
+					error: typeof error === "string" ? error.slice(0, 4e3) : JSON.stringify(error ?? "").slice(0, 4e3)
 				}
 			}),
 			signal: AbortSignal.timeout(3e3)
diff --git a/plugin/scripts/post-tool-use.mjs b/plugin/scripts/post-tool-use.mjs
index 5ebec645..68a78ef7 100755
--- a/plugin/scripts/post-tool-use.mjs
+++ b/plugin/scripts/post-tool-use.mjs
@@ -22,8 +22,10 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
-	const { imageData, cleanOutput } = extractImageData(data.tool_output);
+	const sessionId = data.session_id || data.sessionId || "unknown";
+	const toolName = data.tool_name ?? data.toolName;
+	const toolInput = data.tool_input ?? data.toolArgs;
+	const { imageData, cleanOutput } = extractImageData(toolOutput(data));
 	try {
 		await fetch(`${REST_URL}/agentmemory/observe`, {
 			method: "POST",
@@ -35,8 +37,8 @@ async function main() {
 				cwd: data.cwd || process.cwd(),
 				timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 				data: {
-					tool_name: data.tool_name,
-					tool_input: data.tool_input,
+					tool_name: toolName,
+					tool_input: toolInput,
 					tool_output: truncate(cleanOutput, 8e3),
 					...imageData ? { image_data: imageData } : {}
 				}
@@ -45,6 +47,16 @@ async function main() {
 		});
 	} catch {}
 }
+function toolOutput(data) {
+	if (data.tool_response !== void 0) return data.tool_response;
+	if (data.tool_output !== void 0) return data.tool_output;
+	const result = data.tool_result ?? data.toolResult;
+	if (typeof result === "object" && result !== null) {
+		const obj = result;
+		return obj.text_result_for_llm ?? obj.textResultForLlm ?? result;
+	}
+	return result;
+}
 function isBase64Image(val) {
 	return typeof val === "string" && (val.startsWith("data:image/") || val.startsWith("iVBORw0KGgo") || val.startsWith("/9j/"));
 }
diff --git a/plugin/scripts/pre-compact.mjs b/plugin/scripts/pre-compact.mjs
index bff9e7fa..b68bf025 100755
--- a/plugin/scripts/pre-compact.mjs
+++ b/plugin/scripts/pre-compact.mjs
@@ -22,7 +22,7 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
 	const project = data.cwd || process.cwd();
 	if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") try {
 		await fetch(`${REST_URL}/agentmemory/claude-bridge/sync`, {
diff --git a/plugin/scripts/pre-tool-use.mjs b/plugin/scripts/pre-tool-use.mjs
index 561b6b0d..16892fcd 100755
--- a/plugin/scripts/pre-tool-use.mjs
+++ b/plugin/scripts/pre-tool-use.mjs
@@ -24,18 +24,22 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const toolName = data.tool_name;
+	const toolName = typeof data.tool_name === "string" ? data.tool_name : typeof data.toolName === "string" ? data.toolName : void 0;
 	if (!toolName) return;
+	const normalizedToolName = toolName.toLowerCase();
 	if (![
-		"Edit",
-		"Write",
-		"Read",
-		"Glob",
-		"Grep"
-	].includes(toolName)) return;
-	const toolInput = data.tool_input || {};
+		"edit",
+		"write",
+		"create",
+		"read",
+		"view",
+		"glob",
+		"grep"
+	].includes(normalizedToolName)) return;
+	const rawToolInput = data.tool_input ?? data.toolArgs;
+	const toolInput = typeof rawToolInput === "object" && rawToolInput !== null && !Array.isArray(rawToolInput) ? rawToolInput : {};
 	const files = [];
-	const fileKeys = toolName === "Grep" ? ["path", "file"] : [
+	const fileKeys = normalizedToolName === "grep" ? ["path", "file"] : [
 		"file_path",
 		"path",
 		"file",
@@ -47,11 +51,12 @@ async function main() {
 	}
 	if (files.length === 0) return;
 	const terms = [];
-	if (toolName === "Grep" || toolName === "Glob") {
+	if (normalizedToolName === "grep" || normalizedToolName === "glob") {
 		const pattern = toolInput["pattern"];
 		if (typeof pattern === "string" && pattern.length > 0) terms.push(pattern);
 	}
-	const sessionId = data.session_id || "unknown";
+	const rawSessionId = data.session_id || data.sessionId;
+	const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown";
 	try {
 		const res = await fetch(`${REST_URL}/agentmemory/enrich`, {
 			method: "POST",
diff --git a/plugin/scripts/prompt-submit.mjs b/plugin/scripts/prompt-submit.mjs
index 18aa040a..a8a61192 100755
--- a/plugin/scripts/prompt-submit.mjs
+++ b/plugin/scripts/prompt-submit.mjs
@@ -22,7 +22,7 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
 	try {
 		await fetch(`${REST_URL}/agentmemory/observe`, {
 			method: "POST",
@@ -33,7 +33,7 @@ async function main() {
 				project: data.cwd || process.cwd(),
 				cwd: data.cwd || process.cwd(),
 				timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-				data: { prompt: data.prompt }
+				data: { prompt: data.prompt ?? data.userPrompt }
 			}),
 			signal: AbortSignal.timeout(3e3)
 		});
diff --git a/plugin/scripts/session-end.mjs b/plugin/scripts/session-end.mjs
index 8e1de092..7707e357 100755
--- a/plugin/scripts/session-end.mjs
+++ b/plugin/scripts/session-end.mjs
@@ -22,7 +22,7 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
 	try {
 		await fetch(`${REST_URL}/agentmemory/session/end`, {
 			method: "POST",
diff --git a/plugin/scripts/session-start.mjs b/plugin/scripts/session-start.mjs
index 9e573e24..f1ec1be6 100755
--- a/plugin/scripts/session-start.mjs
+++ b/plugin/scripts/session-start.mjs
@@ -25,7 +25,7 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || `ses_${Date.now().toString(36)}`;
+	const sessionId = data.session_id || data.sessionId || `ses_${Date.now().toString(36)}`;
 	const project = data.cwd || process.cwd();
 	const url = `${REST_URL}/agentmemory/session/start`;
 	const init = {
diff --git a/plugin/scripts/stop.mjs b/plugin/scripts/stop.mjs
index e0ffa350..3fe5cb36 100755
--- a/plugin/scripts/stop.mjs
+++ b/plugin/scripts/stop.mjs
@@ -22,7 +22,7 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
 	try {
 		await fetch(`${REST_URL}/agentmemory/summarize`, {
 			method: "POST",
diff --git a/plugin/scripts/subagent-start.mjs b/plugin/scripts/subagent-start.mjs
index db143459..c0d0b5eb 100755
--- a/plugin/scripts/subagent-start.mjs
+++ b/plugin/scripts/subagent-start.mjs
@@ -23,7 +23,9 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
+	const agentId = data.agent_id || data.agentName;
+	const agentType = data.agent_type || data.agentDisplayName || data.agentName;
 	fetch(`${REST_URL}/agentmemory/observe`, {
 		method: "POST",
 		headers: authHeaders(),
@@ -34,8 +36,8 @@ async function main() {
 			cwd: data.cwd || process.cwd(),
 			timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 			data: {
-				agent_id: data.agent_id,
-				agent_type: data.agent_type
+				agent_id: agentId,
+				agent_type: agentType
 			}
 		}),
 		signal: AbortSignal.timeout(TIMEOUT_MS)
diff --git a/plugin/scripts/subagent-stop.mjs b/plugin/scripts/subagent-stop.mjs
index 7ec66a7d..8765756d 100755
--- a/plugin/scripts/subagent-stop.mjs
+++ b/plugin/scripts/subagent-stop.mjs
@@ -22,7 +22,9 @@ async function main() {
 		return;
 	}
 	if (isSdkChildContext(data)) return;
-	const sessionId = data.session_id || "unknown";
+	const sessionId = data.session_id || data.sessionId || "unknown";
+	const agentId = data.agent_id || data.agentName;
+	const agentType = data.agent_type || data.agentDisplayName || data.agentName;
 	const lastMsg = typeof data.last_assistant_message === "string" ? data.last_assistant_message.slice(0, 4e3) : "";
 	try {
 		await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -35,8 +37,8 @@ async function main() {
 				cwd: data.cwd || process.cwd(),
 				timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 				data: {
-					agent_id: data.agent_id,
-					agent_type: data.agent_type,
+					agent_id: agentId,
+					agent_type: agentType,
 					last_message: lastMsg
 				}
 			}),
diff --git a/scripts/backfill-imported-sessions.sh b/scripts/backfill-imported-sessions.sh
new file mode 100755
index 00000000..a247a57e
--- /dev/null
+++ b/scripts/backfill-imported-sessions.sh
@@ -0,0 +1,259 @@
+#!/usr/bin/env bash
+# Backfill memory artifacts for sessions imported via `agentmemory import-jsonl`.
+#
+# The import path only persists Session + Observation rows (via synthetic,
+# zero-LLM compression) and the deterministic crystal/lesson derivation.
+# It does NOT call mem::summarize, so the semantic/procedural/reflect tiers
+# of the consolidation pipeline have nothing to roll up.
+#
+# This script walks every session tagged `jsonl-import` and:
+#   1. POSTs /agentmemory/summarize per session  (LLM call)
+#   2. POSTs /agentmemory/consolidate-pipeline once at the end
+#
+# Graph extraction (/agentmemory/graph/extract) is intentionally skipped —
+# its API takes a per-observation payload, which is cost-prohibitive for
+# bulk imports. `reflect` falls back to a no-graph clustering mode.
+#
+# Usage:
+#   scripts/backfill-imported-sessions.sh --dry-run
+#   scripts/backfill-imported-sessions.sh --limit 5
+#   scripts/backfill-imported-sessions.sh                 # process all
+
+set -euo pipefail
+
+URL="${AGENTMEMORY_URL:-http://localhost:3111}"
+DRY_RUN=0
+LIMIT=0           # 0 = no limit
+ONLY_TAG="jsonl-import"
+SKIP_CONSOLIDATE=0
+SKIP_AGENTS=0     # drop sessions whose project starts with "agent-"
+MAX_OBS=0         # 0 = no cap; skip sessions with more observations than this
+DEBUG_ON_ERROR=0  # on failure, dump session metadata + obs to DEBUG_DIR
+DEBUG_DIR="${AGENTMEMORY_DEBUG_DIR:-./agentmemory-debug}"
+PROJECT_PATTERN=""  # jq test() regex against .project; "" means no filter
+
+# Cost-estimate knobs (defaults tuned for DeepSeek V4 Flash on DeepInfra:
+# $0.14 / 1M input, $0.28 / 1M output). Override via env if needed.
+COST_IN_PER_1M="${AGENTMEMORY_COST_IN_PER_1M:-0.14}"
+COST_OUT_PER_1M="${AGENTMEMORY_COST_OUT_PER_1M:-0.28}"
+# Rough token weight per compressed observation, derived from inspecting
+# real synthetic-compression payloads in the kv store (mostly 100-300 tok,
+# heavy-tailed). Override if your sessions are unusually verbose.
+TOKENS_PER_OBS="${AGENTMEMORY_TOKENS_PER_OBS:-200}"
+# Reserved per-call output budget (XML summary is small).
+TOKENS_OUT_PER_SESSION="${AGENTMEMORY_TOKENS_OUT_PER_SESSION:-500}"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --dry-run)         DRY_RUN=1; shift ;;
+    --limit)           LIMIT="${2:?--limit needs a number}"; shift 2 ;;
+    --tag)             ONLY_TAG="${2:?--tag needs a value (use empty string for all)}"; shift 2 ;;
+    --skip-consolidate) SKIP_CONSOLIDATE=1; shift ;;
+    --skip-agents)     SKIP_AGENTS=1; shift ;;
+    --max-obs)         MAX_OBS="${2:?--max-obs needs a number}"; shift 2 ;;
+    --debug-on-error)  DEBUG_ON_ERROR=1; shift ;;
+    --project-pattern) PROJECT_PATTERN="${2:?--project-pattern needs a regex}"; shift 2 ;;
+    -h|--help)
+      sed -n '2,28p' "$0"
+      exit 0 ;;
+    *) echo "unknown flag: $1" >&2; exit 2 ;;
+  esac
+done
+
+for bin in curl jq; do
+  command -v "$bin" >/dev/null || { echo "missing dependency: $bin" >&2; exit 1; }
+done
+
+# Curl timeout profiles. Metadata reads (livez, sessions list, observations
+# pull for debug dumps) should fail fast and retry transient blips. The LLM
+# work calls (summarize, consolidate) intentionally have no --retry and a
+# wide --max-time: each call can legitimately take minutes for chunked
+# summarize on large sessions, and retrying a half-finished LLM job is
+# expensive both in dollars and in duplicated server-side work.
+META_CURL_OPTS=(--connect-timeout 10 --max-time 30 --retry 2 --retry-delay 1)
+WORK_CURL_OPTS=(--connect-timeout 10 --max-time 1800)
+
+echo "agentmemory backfill — server: $URL"
+[[ "$DRY_RUN" == 1 ]] && echo "DRY RUN: no POSTs will be made."
+
+# --- liveness ---
+if ! curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/livez" >/dev/null; then
+  echo "server not reachable at $URL (try: npx @agentmemory/agentmemory)" >&2
+  exit 1
+fi
+
+# --- collect session ids ---
+sessions_json="$(curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/sessions")"
+filter='.sessions[] | select(.status=="completed")'
+if [[ -n "$ONLY_TAG" ]]; then
+  filter+=" | select((.tags // []) | index(\"$ONLY_TAG\"))"
+fi
+if [[ "$SKIP_AGENTS" == 1 ]]; then
+  filter+=' | select((.project // "") | startswith("agent-") | not)'
+fi
+if [[ -n "$PROJECT_PATTERN" ]]; then
+  # jq's test() applies a regex against the project string.
+  filter+=" | select((.project // \"\") | test(\"$PROJECT_PATTERN\"))"
+fi
+if [[ "$MAX_OBS" -gt 0 ]]; then
+  filter+=" | select((.observationCount // 0) <= $MAX_OBS)"
+fi
+filter+=' | "\(.id)\t\(.observationCount // 0)\t\(.project // "")"'
+
+rows=()
+while IFS= read -r line; do
+  rows+=("$line")
+done < <(echo "$sessions_json" | jq -r "$filter")
+total="${#rows[@]}"
+
+if [[ "$total" -eq 0 ]]; then
+  echo "no sessions matched (tag='$ONLY_TAG'); nothing to do."
+  exit 0
+fi
+
+if [[ "$LIMIT" -gt 0 && "$LIMIT" -lt "$total" ]]; then
+  rows=("${rows[@]:0:$LIMIT}")
+fi
+
+echo "matched $total session(s); will process ${#rows[@]}."
+total_obs=0
+for row in "${rows[@]}"; do
+  obs="$(cut -f2 <<<"$row")"
+  total_obs=$(( total_obs + obs ))
+done
+est_in=$(( total_obs * TOKENS_PER_OBS + ${#rows[@]} * 500 ))
+est_out=$(( ${#rows[@]} * TOKENS_OUT_PER_SESSION ))
+est_cost="$(awk -v i="$est_in" -v o="$est_out" -v ci="$COST_IN_PER_1M" -v co="$COST_OUT_PER_1M" \
+  'BEGIN { printf "%.2f", (i*ci + o*co) / 1000000 }')"
+
+echo "≈ ${#rows[@]} summarize LLM calls (one per session, covering $total_obs observations)"
+printf '≈ %d input tok + %d output tok → $%s  (rates: in=$%s/1M out=$%s/1M, %s tok/obs)\n' \
+  "$est_in" "$est_out" "$est_cost" "$COST_IN_PER_1M" "$COST_OUT_PER_1M" "$TOKENS_PER_OBS"
+echo
+
+if [[ "$DRY_RUN" == 1 ]]; then
+  printf '%-40s %10s  %s\n' "session" "obs" "project"
+  for row in "${rows[@]}"; do
+    id="$(cut -f1 <<<"$row")"
+    obs="$(cut -f2 <<<"$row")"
+    proj="$(cut -f3 <<<"$row")"
+    printf '%-40s %10s  %s\n' "$id" "$obs" "$proj"
+  done
+  echo
+  echo "(dry run) next steps if you re-run without --dry-run:"
+  echo "  for each session above: POST $URL/agentmemory/summarize {sessionId}"
+  if [[ "$SKIP_CONSOLIDATE" == 0 ]]; then
+    echo "  then: POST $URL/agentmemory/consolidate-pipeline {}"
+  fi
+  exit 0
+fi
+
+# --- summarize loop ---
+if [[ "$DEBUG_ON_ERROR" == 1 ]]; then
+  mkdir -p "$DEBUG_DIR"
+  echo "debug mode: failed calls will dump to $DEBUG_DIR/"
+  echo
+fi
+
+dump_failure() {
+  local id="$1" obs="$2" resp="$3"
+  # Replace anything outside [A-Za-z0-9._-] with `_` before joining with
+  # DEBUG_DIR. Session IDs from the API are UUIDs in practice, but the
+  # server doesn't enforce that — a hostile or buggy id containing `/` or
+  # `..` would otherwise escape the debug directory.
+  local safe_id
+  safe_id="$(printf '%s' "$id" | tr -c 'A-Za-z0-9._-' '_')"
+  local file="$DEBUG_DIR/${safe_id}.json"
+  # Pull the raw observations (what would have gone into the prompt) so the
+  # operator can reconstruct the upstream payload locally. We also compute
+  # narrative size stats so size-related rejections are immediately visible.
+  # Stream observations through stdin (avoids exec-arg overflow on
+  # multi-thousand-obs sessions — macOS argv ceiling is ~256k).
+  # `--get --data-urlencode` percent-encodes the session id so special
+  # characters can't corrupt the query string.
+  curl -fsS "${META_CURL_OPTS[@]}" --get \
+       --data-urlencode "sessionId=$id" \
+       "$URL/agentmemory/observations" \
+    | jq \
+        --arg id "$id" \
+        --argjson obsCount "$obs" \
+        --arg url "$URL/agentmemory/summarize" \
+        --argjson response "$resp" \
+        '. as $root
+         | .observations as $obs
+         | {
+             sessionId: $id,
+             observationCount: $obsCount,
+             request: { url: $url, method: "POST", body: { sessionId: $id } },
+             response: $response,
+             observations: $obs,
+             stats: {
+               totalNarrativeBytes: ($obs | map(.narrative // "" | length) | add // 0),
+               maxNarrativeBytes:   ($obs | map(.narrative // "" | length) | max // 0),
+               titleHistogram:      ($obs | group_by(.title) | map({title: .[0].title, count: length}) | sort_by(-.count))
+             }
+           }' >"$file"
+  echo "      → $file"
+}
+
+ok=0; skipped=0; failed=0
+i=0
+for row in "${rows[@]}"; do
+  i=$(( i + 1 ))
+  id="$(cut -f1 <<<"$row")"
+  obs="$(cut -f2 <<<"$row")"
+
+  body="$(jq -nc --arg id "$id" '{sessionId:$id}')"
+  resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/summarize" \
+    -H 'content-type: application/json' --data "$body" || echo '{"success":false,"error":"curl_failed"}')"
+  # iii's HTTP layer occasionally returns non-JSON (HTML 5xx, empty body
+  # on timeout, etc.). Validate before parsing so `set -e` doesn't abort
+  # the whole backfill loop on a single bad response.
+  if jq -e . >/dev/null 2>&1 <<<"$resp"; then
+    status="$(jq -r '.success // false' <<<"$resp")"
+    err="$(jq -r '.error // ""' <<<"$resp")"
+    title="$(jq -r '.summary.title // ""' <<<"$resp")"
+  else
+    status="false"
+    err="invalid_json_response"
+    title=""
+  fi
+
+  if [[ "$status" == "true" ]]; then
+    ok=$(( ok + 1 ))
+    printf '[%3d/%3d] OK    %s  obs=%-5s  %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$title"
+  elif [[ "$err" == "no_observations" || "$err" == "no_provider" ]]; then
+    skipped=$(( skipped + 1 ))
+    printf '[%3d/%3d] SKIP  %s  obs=%-5s  %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err"
+  else
+    failed=$(( failed + 1 ))
+    printf '[%3d/%3d] FAIL  %s  obs=%-5s  %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err"
+    [[ "$DEBUG_ON_ERROR" == 1 ]] && dump_failure "$id" "$obs" "$resp"
+  fi
+done
+
+echo
+echo "summarize: ok=$ok skipped=$skipped failed=$failed"
+
+# --- consolidate ---
+if [[ "$SKIP_CONSOLIDATE" == 1 ]]; then
+  echo "skipping consolidate-pipeline (--skip-consolidate)"
+  exit 0
+fi
+
+if [[ "$ok" -eq 0 ]]; then
+  echo "no summaries produced; skipping consolidate-pipeline."
+  exit 0
+fi
+
+echo
+echo "running consolidate-pipeline …"
+resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/consolidate-pipeline" \
+  -H 'content-type: application/json' --data '{}' || echo '{"success":false,"error":"curl_failed"}')"
+if jq -e . >/dev/null 2>&1 <<<"$resp"; then
+  echo "$resp" | jq .
+else
+  echo "consolidate-pipeline returned non-JSON (likely a timeout or upstream error):"
+  printf '%s\n' "$resp" | head -c 500
+  echo
+fi
diff --git a/src/cli.ts b/src/cli.ts
index 5eca18ce..d3d33855 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -117,8 +117,9 @@ Usage: agentmemory [command] [options]
 Commands:
   (default)          Start agentmemory worker
   init               Copy bundled .env.example to ~/.agentmemory/.env if absent
-  connect [agent]    Wire agentmemory into an installed agent (claude-code, codex,
-                     cursor, gemini-cli, openclaw, hermes, pi, openhuman).
+  connect [agent]    Wire agentmemory into an installed agent (claude-code,
+                     copilot-cli, codex, cursor, gemini-cli, openclaw,
+                     hermes, pi, openhuman).
                      No arg = interactive picker. --all wires every detected agent.
                      --dry-run shows what would change. --force re-installs.
   status             Show connection status, memory count, flags, and health
@@ -195,9 +196,36 @@ function getBaseUrl(): string {
   return `http://localhost:${getRestPort()}`;
 }
 
+let discoveredViewerPort: number | null = null;
+
+export async function discoverViewerPort(): Promise<void> {
+  if (discoveredViewerPort !== null) return;
+  try {
+    const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    if (res.ok) {
+      const data = await res.json() as { viewerPort?: number | null };
+      if (typeof data.viewerPort === "number") {
+        discoveredViewerPort = data.viewerPort;
+      }
+    }
+  } catch {}
+}
+
 function getViewerUrl(): string {
   const envUrl = process.env["AGENTMEMORY_VIEWER_URL"];
   if (envUrl) return envUrl.replace(/\/+$/, "");
+  
+  if (discoveredViewerPort !== null) {
+    try {
+      const u = new URL(getBaseUrl());
+      return `${u.protocol}//${u.hostname}:${discoveredViewerPort}`;
+    } catch {
+      return `http://localhost:${discoveredViewerPort}`;
+    }
+  }
+  
   try {
     const u = new URL(getBaseUrl());
     const vPort =
@@ -257,7 +285,18 @@ async function isAgentmemoryReady(): Promise<boolean> {
     const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, {
       signal: AbortSignal.timeout(2000),
     });
-    return res.ok;
+    if (!res.ok) return false;
+    try {
+      const data = await res.json() as { viewerPort?: number | null; viewerSkipped?: boolean };
+      if (typeof data.viewerPort === "number") {
+        discoveredViewerPort = data.viewerPort;
+        return true;
+      }
+      if (data.viewerSkipped) return true;
+      return false;
+    } catch {
+      return false;
+    }
   } catch {
     return false;
   }
@@ -497,17 +536,8 @@ function detectIiiConsole(): IiiConsoleState {
   return { kind: "missing" };
 }
 
-// install.iii.dev/console/main/install.sh has a bug in its release-tag
-// filter that rejects every stable release for iii-hq/iii: the jq
-// predicate uses `startswith("v")` while the actual tags are
-// `iii/v0.12.0` (slash-prefixed). The `--next` path uses a regex
-// without the startswith constraint and therefore works today,
-// installing the most recent prerelease (e.g. iii/v0.14.0-next.1).
-//
-// Pass `--next` until the upstream fix lands (iii-hq/iii#1652).
-// Switch back to the bare invocation once the script is patched.
 const III_CONSOLE_INSTALL_CMD =
-  "curl -fsSL https://install.iii.dev/console/main/install.sh | bash -s -- --next";
+  "curl -fsSL https://install.iii.dev/console/main/install.sh | sh";
 
 async function ensureIiiConsole(): Promise<IiiConsoleState> {
   const state = detectIiiConsole();
@@ -1101,6 +1131,9 @@ async function runStatus() {
       apiFetch<any>(base, "config/flags"),
     ]);
 
+    if (typeof healthRes?.viewerPort === "number") {
+      discoveredViewerPort = healthRes.viewerPort;
+    }
     const h = healthRes?.health;
     const status = healthRes?.status || "unknown";
     const version = healthRes?.version || "?";
@@ -1260,6 +1293,7 @@ function buildDoctorEffects(): DoctorEffects {
     iiiBinaryVersion: (binPath: string) => iiiBinVersion(binPath),
     viewerReachable: async (timeoutMs = 2000) => {
       try {
+        await discoverViewerPort();
         const res = await fetch(getViewerUrl(), {
           signal: AbortSignal.timeout(timeoutMs),
         });
@@ -1975,8 +2009,8 @@ async function runUpgrade() {
         label: "Refreshing dependencies (pnpm install)",
       });
       requireSuccess(installOk, "pnpm install");
-      runCommand(pnpmBin, ["up", "iii-sdk@latest"], {
-        label: "Upgrading iii-sdk to latest",
+      runCommand(pnpmBin, ["up", "iii-sdk@0.11.2"], {
+        label: "Pinning iii-sdk@0.11.2",
         optional: true,
       });
     } else if (npmBin) {
@@ -1984,8 +2018,8 @@ async function runUpgrade() {
         label: "Refreshing dependencies (npm install)",
       });
       requireSuccess(installOk, "npm install");
-      runCommand(npmBin, ["install", "iii-sdk@latest"], {
-        label: "Upgrading iii-sdk to latest",
+      runCommand(npmBin, ["install", "iii-sdk@0.11.2"], {
+        label: "Pinning iii-sdk@0.11.2",
         optional: true,
       });
     } else {
diff --git a/src/cli/connect/codex-hooks.ts b/src/cli/connect/codex-hooks.ts
new file mode 100644
index 00000000..14b8284a
--- /dev/null
+++ b/src/cli/connect/codex-hooks.ts
@@ -0,0 +1,107 @@
+import { existsSync, readFileSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+/**
+ * Workaround for openai/codex#16430 — Codex Desktop does not dispatch
+ * plugin-local `hooks.json` even though both `CodexHooks` and `PluginHooks`
+ * feature flags are stable + default-enabled in
+ * `codex-rs/features/src/lib.rs`. Until upstream fixes plugin-scope
+ * dispatch, the same hook commands can be mirrored into the global
+ * `~/.codex/hooks.json`, which is loaded reliably.
+ *
+ * This module builds that mirror, with `${CLAUDE_PLUGIN_ROOT}` resolved to
+ * the bundled `plugin/` directory so the user-scope file does not depend
+ * on env-var expansion (Codex only injects `CLAUDE_PLUGIN_ROOT` for
+ * plugin-scope hooks).
+ *
+ * Identification on re-install: every command we write contains the
+ * resolved `<pluginRoot>/scripts/` prefix, so subsequent installs can
+ * strip our entries and re-add cleanly without touching the user's other
+ * hook entries.
+ */
+
+type HookHandler = { type: string; command: string };
+type HookEntry = { matcher?: string; hooks: HookHandler[] };
+export type HookManifest = { hooks: Record<string, HookEntry[]> };
+
+/**
+ * Locate the bundled `plugin/` directory at runtime. Walks up from the
+ * module's own location looking for `plugin/scripts/` + `plugin/hooks/`,
+ * both shipped via the npm `files` field. Works for both `dist/cli.mjs`
+ * (bundled) and `src/cli/connect/codex-hooks.ts` (dev) layouts.
+ */
+export function findPluginRoot(startUrl: string = import.meta.url): string {
+  const here = dirname(fileURLToPath(startUrl));
+  let dir = here;
+  for (let i = 0; i < 12; i++) {
+    if (
+      existsSync(join(dir, "plugin", "scripts")) &&
+      existsSync(join(dir, "plugin", "hooks"))
+    ) {
+      return resolve(join(dir, "plugin"));
+    }
+    const parent = dirname(dir);
+    if (parent === dir) break;
+    dir = parent;
+  }
+  throw new Error(
+    `agentmemory: could not locate bundled plugin/ directory (searched up from ${here})`,
+  );
+}
+
+/**
+ * Build the merged hooks.json content.
+ *
+ *   1. Strip any entry from `existing` whose first hook command points
+ *      under `<pluginRoot>/scripts/`. This lets us re-install idempotently
+ *      without leaving stale references.
+ *   2. Append fresh entries from the bundled Codex manifest with
+ *      `${CLAUDE_PLUGIN_ROOT}` rewritten to the absolute plugin path.
+ *      Matcher values from the bundled manifest are preserved so PreToolUse
+ *      event routing keeps working.
+ */
+export function buildMergedHooks(
+  existing: HookManifest | null,
+  pluginRoot: string,
+): HookManifest {
+  const codexManifestPath = join(pluginRoot, "hooks", "hooks.codex.json");
+  const ours = JSON.parse(readFileSync(codexManifestPath, "utf-8")) as HookManifest;
+  const scriptsDir = join(pluginRoot, "scripts");
+
+  const out: HookManifest = { hooks: {} };
+
+  if (existing?.hooks) {
+    for (const [event, entries] of Object.entries(existing.hooks)) {
+      const kept = entries.filter((entry) => !isAgentmemoryEntry(entry, scriptsDir));
+      if (kept.length > 0) out.hooks[event] = kept;
+    }
+  }
+
+  for (const [event, entries] of Object.entries(ours.hooks)) {
+    const resolvedEntries: HookEntry[] = entries.map((entry) => {
+      const next: HookEntry = {
+        hooks: entry.hooks.map((handler) => ({
+          type: handler.type,
+          command: handler.command.replace(/\$\{CLAUDE_PLUGIN_ROOT\}/g, pluginRoot),
+        })),
+      };
+      if (entry.matcher !== undefined) next.matcher = entry.matcher;
+      return next;
+    });
+    out.hooks[event] = [...(out.hooks[event] ?? []), ...resolvedEntries];
+  }
+
+  return out;
+}
+
+function isAgentmemoryEntry(entry: HookEntry, scriptsDir: string): boolean {
+  const normalizedScriptsDir = normalizePathForCommandMatch(scriptsDir);
+  return entry.hooks.some((handler) =>
+    normalizePathForCommandMatch(handler.command).includes(normalizedScriptsDir),
+  );
+}
+
+function normalizePathForCommandMatch(value: string): string {
+  return value.replace(/\\/g, "/");
+}
diff --git a/src/cli/connect/codex.ts b/src/cli/connect/codex.ts
index 003dc99a..a87b2858 100644
--- a/src/cli/connect/codex.ts
+++ b/src/cli/connect/codex.ts
@@ -8,10 +8,18 @@ import {
   logAlreadyWired,
   logBackup,
   logInstalled,
+  readJsonSafe,
+  writeJsonAtomic,
 } from "./util.js";
+import {
+  buildMergedHooks,
+  findPluginRoot,
+  type HookManifest,
+} from "./codex-hooks.js";
 
 const CODEX_DIR = join(homedir(), ".codex");
 const CODEX_TOML = join(CODEX_DIR, "config.toml");
+const CODEX_HOOKS = join(CODEX_DIR, "hooks.json");
 
 const TOML_BLOCK = `[mcp_servers.agentmemory]
 command = "npx"
@@ -57,7 +65,7 @@ export const adapter: ConnectAdapter = {
   displayName: "Codex CLI",
   docs: "https://github.com/rohitg00/agentmemory#codex-cli-codex-plugin-platform",
   protocolNote:
-    "→ Using MCP. Hooks are also available — see docs/codex.md.",
+    "→ Using MCP. Hooks ship via the Codex plugin; on Codex Desktop, also pass --with-hooks to install the global hooks.json workaround for openai/codex#16430.",
 
   detect(): boolean {
     return existsSync(CODEX_DIR);
@@ -77,6 +85,7 @@ export const adapter: ConnectAdapter = {
       p.log.info(
         `[dry-run] Would ${wired ? "rewrite" : "append"} [mcp_servers.agentmemory] in ${CODEX_TOML}`,
       );
+      if (opts.withHooks) installCodexHooks(opts);
       return { kind: "installed", mutatedPath: CODEX_TOML };
     }
 
@@ -105,6 +114,16 @@ export const adapter: ConnectAdapter = {
     p.log.info(
       "Codex picks up MCP servers on next launch. For the deeper plugin install, run: codex plugin marketplace add rohitg00/agentmemory && codex plugin install agentmemory",
     );
+
+    if (opts.withHooks) {
+      const hookResult = installCodexHooks(opts);
+      if (hookResult.kind === "skipped") {
+        p.log.warn(
+          `Codex hooks fallback skipped: ${hookResult.reason}. MCP wiring still applied.`,
+        );
+      }
+    }
+
     return {
       kind: "installed",
       mutatedPath: CODEX_TOML,
@@ -112,3 +131,50 @@ export const adapter: ConnectAdapter = {
     };
   },
 };
+
+/**
+ * Install the global `~/.codex/hooks.json` fallback. See
+ * `codex-hooks.ts` for context (openai/codex#16430). Returns a result
+ * describing the side effect for the caller's summary; failures here do
+ * not roll back the MCP wiring.
+ */
+function installCodexHooks(opts: ConnectOptions): ConnectResult {
+  let pluginRoot: string;
+  try {
+    pluginRoot = findPluginRoot();
+  } catch (err) {
+    return {
+      kind: "skipped",
+      reason: err instanceof Error ? err.message : String(err),
+    };
+  }
+
+  const existing = readJsonSafe<HookManifest>(CODEX_HOOKS);
+  const merged = buildMergedHooks(existing, pluginRoot);
+
+  if (opts.dryRun) {
+    p.log.info(
+      `[dry-run] Would ${existing ? "merge" : "create"} ${CODEX_HOOKS} with ${Object.keys(merged.hooks).length} event(s)`,
+    );
+    return { kind: "installed", mutatedPath: CODEX_HOOKS };
+  }
+
+  let backupPath: string | undefined;
+  if (existsSync(CODEX_HOOKS)) {
+    backupPath = backupFile(CODEX_HOOKS, "codex-hooks", "json");
+    logBackup(backupPath);
+  }
+
+  writeJsonAtomic(CODEX_HOOKS, merged);
+
+  logInstalled("Codex hooks (workaround for openai/codex#16430)", CODEX_HOOKS);
+  p.log.info(
+    "User-scope hooks reference absolute paths under the bundled plugin/ dir. Re-run `agentmemory connect codex --with-hooks` after upgrading agentmemory to refresh them.",
+  );
+
+  return {
+    kind: "installed",
+    mutatedPath: CODEX_HOOKS,
+    ...(backupPath !== undefined && { backupPath }),
+  };
+}
diff --git a/src/cli/connect/copilot-cli.ts b/src/cli/connect/copilot-cli.ts
new file mode 100644
index 00000000..8cce5a54
--- /dev/null
+++ b/src/cli/connect/copilot-cli.ts
@@ -0,0 +1,91 @@
+import { existsSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join } from "node:path";
+import * as p from "@clack/prompts";
+import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js";
+import {
+  AGENTMEMORY_COPILOT_MCP_BLOCK,
+  backupFile,
+  logAlreadyWired,
+  logBackup,
+  logInstalled,
+  readJsonSafe,
+  writeJsonAtomic,
+} from "./util.js";
+
+const COPILOT_DIR = process.env["COPILOT_HOME"] || join(homedir(), ".copilot");
+const COPILOT_MCP_JSON = join(COPILOT_DIR, "mcp-config.json");
+
+type CopilotMcpEntry = typeof AGENTMEMORY_COPILOT_MCP_BLOCK;
+type CopilotConfig = {
+  mcpServers?: Record<string, CopilotMcpEntry>;
+  [key: string]: unknown;
+};
+
+function entryMatches(entry: unknown): boolean {
+  if (!entry || typeof entry !== "object") return false;
+  return JSON.stringify(entry) === JSON.stringify(AGENTMEMORY_COPILOT_MCP_BLOCK);
+}
+
+export const adapter: ConnectAdapter = {
+  name: "copilot-cli",
+  displayName: "GitHub Copilot CLI",
+  docs: "https://github.com/rohitg00/agentmemory#github-copilot-cli",
+  protocolNote:
+    "→ Using MCP. Install the plugin too for full hooks/skills coverage.",
+
+  detect(): boolean {
+    return existsSync(COPILOT_DIR);
+  },
+
+  async install(opts: ConnectOptions): Promise<ConnectResult> {
+    const existing = readJsonSafe<CopilotConfig>(COPILOT_MCP_JSON);
+    const next: CopilotConfig = existing ? { ...existing } : {};
+    const servers: Record<string, CopilotMcpEntry> = {
+      ...((next.mcpServers as Record<string, CopilotMcpEntry>) ?? {}),
+    };
+
+    const alreadyHas = entryMatches(servers["agentmemory"]);
+    if (alreadyHas && !opts.force) {
+      logAlreadyWired("GitHub Copilot CLI", COPILOT_MCP_JSON);
+      return { kind: "already-wired", mutatedPath: COPILOT_MCP_JSON };
+    }
+
+    if (opts.dryRun) {
+      p.log.info(
+        `[dry-run] Would ${alreadyHas ? "overwrite" : "add"} mcpServers.agentmemory in ${COPILOT_MCP_JSON}`,
+      );
+      return { kind: "installed", mutatedPath: COPILOT_MCP_JSON };
+    }
+
+    let backupPath: string | undefined;
+    if (existsSync(COPILOT_MCP_JSON)) {
+      backupPath = backupFile(COPILOT_MCP_JSON, "copilot-cli");
+      logBackup(backupPath);
+    } else {
+      mkdirSync(dirname(COPILOT_MCP_JSON), { recursive: true });
+    }
+
+    servers["agentmemory"] = AGENTMEMORY_COPILOT_MCP_BLOCK;
+    next.mcpServers = servers;
+    writeJsonAtomic(COPILOT_MCP_JSON, next);
+
+    const verify = readJsonSafe<CopilotConfig>(COPILOT_MCP_JSON);
+    if (!entryMatches(verify?.mcpServers?.["agentmemory"])) {
+      p.log.error(
+        `Verification failed: ${COPILOT_MCP_JSON} did not contain mcpServers.agentmemory after write.`,
+      );
+      return { kind: "skipped", reason: "verification-failed" };
+    }
+
+    logInstalled("GitHub Copilot CLI", COPILOT_MCP_JSON);
+    p.log.info(
+      "Copilot picks up MCP servers on next launch or after `/mcp`. Install the plugin too for full hooks/skills.",
+    );
+    return {
+      kind: "installed",
+      mutatedPath: COPILOT_MCP_JSON,
+      ...(backupPath !== undefined && { backupPath }),
+    };
+  },
+};
diff --git a/src/cli/connect/index.ts b/src/cli/connect/index.ts
index 17aedf8f..48f86817 100644
--- a/src/cli/connect/index.ts
+++ b/src/cli/connect/index.ts
@@ -2,6 +2,7 @@ import { platform } from "node:os";
 import * as p from "@clack/prompts";
 import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js";
 import { adapter as claudeCode } from "./claude-code.js";
+import { adapter as copilotCli } from "./copilot-cli.js";
 import { adapter as codex } from "./codex.js";
 import { adapter as cursor } from "./cursor.js";
 import { adapter as geminiCli } from "./gemini-cli.js";
@@ -12,6 +13,7 @@ import { adapter as pi } from "./pi.js";
 
 export const ADAPTERS: readonly ConnectAdapter[] = [
   claudeCode,
+  copilotCli,
   codex,
   cursor,
   geminiCli,
@@ -34,19 +36,22 @@ function parseFlags(args: string[]): {
   dryRun: boolean;
   force: boolean;
   all: boolean;
+  withHooks: boolean;
   positional: string[];
 } {
   const positional: string[] = [];
   let dryRun = false;
   let force = false;
   let all = false;
+  let withHooks = false;
   for (const a of args) {
     if (a === "--dry-run") dryRun = true;
     else if (a === "--force") force = true;
     else if (a === "--all") all = true;
+    else if (a === "--with-hooks") withHooks = true;
     else if (!a.startsWith("-")) positional.push(a);
   }
-  return { dryRun, force, all, positional };
+  return { dryRun, force, all, withHooks, positional };
 }
 
 export async function runAdapter(
@@ -74,7 +79,10 @@ export async function runAdapter(
 }
 
 export async function runConnect(args: string[]): Promise<void> {
-  if (platform() === "win32") {
+  const { dryRun, force, all, withHooks, positional } = parseFlags(args);
+  const allowWindowsAdapter =
+    positional.length === 1 && positional[0]?.toLowerCase() === "copilot-cli";
+  if (platform() === "win32" && !allowWindowsAdapter) {
     p.intro("agentmemory connect");
     p.log.warn(
       "Windows: automated `connect` is not supported yet. See https://github.com/rohitg00/agentmemory#other-agents for manual install steps.",
@@ -83,8 +91,7 @@ export async function runConnect(args: string[]): Promise<void> {
     return;
   }
 
-  const { dryRun, force, all, positional } = parseFlags(args);
-  const opts: ConnectOptions = { dryRun, force };
+  const opts: ConnectOptions = { dryRun, force, withHooks };
 
   p.intro("agentmemory connect");
 
diff --git a/src/cli/connect/types.ts b/src/cli/connect/types.ts
index 4f64c867..8abd2745 100644
--- a/src/cli/connect/types.ts
+++ b/src/cli/connect/types.ts
@@ -1,6 +1,13 @@
 export type ConnectOptions = {
   dryRun: boolean;
   force: boolean;
+  /**
+   * When true, the Codex adapter additionally writes a global
+   * `~/.codex/hooks.json` block referencing absolute paths to bundled hook
+   * scripts. Workaround for openai/codex#16430, which prevents plugin-local
+   * hooks from dispatching on Codex Desktop. No-op for other adapters.
+   */
+  withHooks?: boolean;
 };
 
 export type ConnectAdapter = {
diff --git a/src/cli/connect/util.ts b/src/cli/connect/util.ts
index 6d5f61ac..8902e3ef 100644
--- a/src/cli/connect/util.ts
+++ b/src/cli/connect/util.ts
@@ -26,6 +26,27 @@ export const AGENTMEMORY_MCP_BLOCK = {
   },
 };
 
+const COPILOT_MCP_COMMAND =
+  process.platform === "win32"
+    ? {
+        command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe",
+        args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"],
+      }
+    : {
+        command: "npx",
+        args: ["-y", "@agentmemory/mcp"],
+      };
+
+export const AGENTMEMORY_COPILOT_MCP_BLOCK = {
+  type: "local" as const,
+  ...COPILOT_MCP_COMMAND,
+  env: {
+    AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+    AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+  },
+  tools: ["*"],
+};
+
 export function backupsDir(): string {
   return join(homedir(), ".agentmemory", "backups");
 }
diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts
index 92b23d62..2e148a1b 100644
--- a/src/cli/onboarding.ts
+++ b/src/cli/onboarding.ts
@@ -36,6 +36,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
 // where they overlap; the rest fall back to the generic `◇`.
 const NATIVE_AGENTS: { value: string; label: string; glyph: string }[] = [
   { value: "claude-code", label: "Claude Code", glyph: "⟁" },
+  { value: "copilot-cli", label: "GitHub Copilot CLI", glyph: "◈" },
   { value: "codex", label: "Codex", glyph: "◎" },
   { value: "openhuman", label: "OpenHuman", glyph: "◇" },
   { value: "openclaw", label: "OpenClaw", glyph: "◇" },
@@ -67,7 +68,7 @@ const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [
   { value: "skip", label: "Skip — BM25-only mode (no LLM key)", envKey: null },
 ];
 
-function buildAgentOptions(): { value: string; label: string; hint?: string }[] {
+export function buildAgentOptions(): { value: string; label: string; hint?: string }[] {
   return [
     ...NATIVE_AGENTS.map((a) => ({
       value: a.value,
@@ -82,6 +83,15 @@ function buildAgentOptions(): { value: string; label: string; hint?: string }[]
   ];
 }
 
+export function getInitialAgentValues(
+  env: Record<string, string | undefined> = process.env,
+): string[] {
+  if (env["COPILOT_CLI"] === "1" || env["COPILOT_AGENT_SESSION_ID"]) {
+    return ["copilot-cli"];
+  }
+  return ["claude-code"];
+}
+
 // Mirror src/cli.ts findEnvExample so onboarding ships the same .env
 // skeleton whether called directly or via `agentmemory init`. We
 // duplicate (rather than import) so the onboarding module doesn't
@@ -137,7 +147,31 @@ export interface OnboardingResult {
   provider: string | null;
 }
 
+function shouldSkipInteractiveOnboarding(): boolean {
+  const ci = process.env["CI"];
+  return (
+    process.stdin.isTTY !== true ||
+    process.stdout.isTTY !== true ||
+    (ci !== undefined && ci !== "" && ci !== "0" && ci.toLowerCase() !== "false")
+  );
+}
+
+function writeDefaultOnboardingPrefs(): OnboardingResult {
+  writePrefs({
+    lastAgent: null,
+    lastAgents: [],
+    lastProvider: null,
+    skipSplash: true,
+    firstRunAt: new Date().toISOString(),
+  });
+  return { agents: [], provider: null };
+}
+
 export async function runOnboarding(): Promise<OnboardingResult> {
+  if (shouldSkipInteractiveOnboarding()) {
+    return writeDefaultOnboardingPrefs();
+  }
+
   p.note(
     [
       "Welcome to agentmemory.",
@@ -153,7 +187,7 @@ export async function runOnboarding(): Promise<OnboardingResult> {
     message: "Which agents will use agentmemory? (space to toggle, enter to confirm)",
     options: buildAgentOptions(),
     required: false,
-    initialValues: ["claude-code"],
+    initialValues: getInitialAgentValues(),
   });
   if (p.isCancel(agentsPicked)) {
     p.cancel("Setup cancelled. Re-run any time with: agentmemory --reset");
@@ -166,7 +200,7 @@ export async function runOnboarding(): Promise<OnboardingResult> {
       [
         "━ how this works ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
         "All selected agents share the same memory at :3111.",
-        "A memory saved by Claude Code is visible to Codex + Cursor instantly.",
+        "A memory saved by Claude Code is visible to Copilot + Codex + Cursor instantly.",
         "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
       ].join("\n"),
     );
diff --git a/src/config.ts b/src/config.ts
index 4a416ed1..eed5725e 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -159,6 +159,10 @@ export function getEnvVar(key: string): string | undefined {
   return getMergedEnv()[key];
 }
 
+export function isDropStaleIndexEnabled(): boolean {
+  return getMergedEnv()["AGENTMEMORY_DROP_STALE_INDEX"] === "true";
+}
+
 export function detectLlmProviderKind(): "llm" | "noop" {
   const env = getMergedEnv();
   if (
diff --git a/src/functions/diagnostics.ts b/src/functions/diagnostics.ts
index 42f822cb..a63d7959 100644
--- a/src/functions/diagnostics.ts
+++ b/src/functions/diagnostics.ts
@@ -7,8 +7,14 @@ import type {
   Action,
   ActionEdge,
   DiagnosticCheck,
+  Insight,
   Lease,
+  Lesson,
   Checkpoint,
+  Crystal,
+  ProceduralMemory,
+  SemanticMemory,
+  SessionSummary,
   Signal,
   Sentinel,
   Sketch,
@@ -25,6 +31,12 @@ const ALL_CATEGORIES = [
   "signals",
   "sessions",
   "memories",
+  "lessons",
+  "summaries",
+  "semantic",
+  "procedural",
+  "crystals",
+  "insights",
   "mesh",
 ];
 
@@ -354,6 +366,186 @@ export function registerDiagnosticsFunction(sdk: ISdk, kv: StateKV): void {
         }
       }
 
+      if (categories.includes("lessons")) {
+        // Counts only live lessons (deleted=true rows are tombstoned).
+        // Catches bad confidence values that would silently break recall
+        // scoring (memory_lesson_recall multiplies by confidence).
+        const lessons = await kv.list<Lesson>(KV.lessons);
+        const live = lessons.filter((l) => !l.deleted);
+        let lessonIssues = 0;
+        for (const l of live) {
+          // Number.isFinite rejects NaN / Infinity / non-numbers; a
+          // corrupted row passing those would silently survive the < / >
+          // range check (e.g. NaN < 0 is false, NaN > 1 is false, so the
+          // bad row would be "healthy") and skew memory_lesson_recall's
+          // scoring downstream. Surface as warning.
+          if (
+            !Number.isFinite(l.confidence) ||
+            l.confidence < 0 ||
+            l.confidence > 1
+          ) {
+            checks.push({
+              name: `lesson-bad-confidence:${l.id}`,
+              category: "lessons",
+              status: "warn",
+              message: `Lesson ${l.id} has confidence ${l.confidence} (expected finite number in 0..1)`,
+              fixable: false,
+            });
+            lessonIssues++;
+          }
+        }
+        if (lessonIssues === 0) {
+          checks.push({
+            name: "lessons-ok",
+            category: "lessons",
+            status: "pass",
+            message: `All ${live.length} lessons are healthy (${lessons.length - live.length} tombstoned)`,
+            fixable: false,
+          });
+        }
+      }
+
+      if (categories.includes("summaries")) {
+        const summaries = await kv.list<SessionSummary>(KV.summaries);
+        let summaryIssues = 0;
+        for (const s of summaries) {
+          // typeof guard before .trim() — a corrupted row with title=null
+          // or title=42 would otherwise throw and abort the whole diagnose
+          // run before later categories get checked.
+          if (typeof s.title !== "string" || s.title.trim().length === 0) {
+            checks.push({
+              name: `summary-missing-title:${s.sessionId}`,
+              category: "summaries",
+              status: "warn",
+              message: `Summary for session ${s.sessionId} has no title`,
+              fixable: false,
+            });
+            summaryIssues++;
+          }
+        }
+        if (summaryIssues === 0) {
+          checks.push({
+            name: "summaries-ok",
+            category: "summaries",
+            status: "pass",
+            message: `All ${summaries.length} session summaries are consistent`,
+            fixable: false,
+          });
+        }
+      }
+
+      if (categories.includes("semantic")) {
+        const semantic = await kv.list<SemanticMemory>(KV.semantic);
+        let semanticIssues = 0;
+        for (const s of semantic) {
+          if (
+            !Number.isFinite(s.confidence) ||
+            s.confidence < 0 ||
+            s.confidence > 1
+          ) {
+            checks.push({
+              name: `semantic-bad-confidence:${s.id}`,
+              category: "semantic",
+              status: "warn",
+              message: `Semantic fact ${s.id} has confidence ${s.confidence} (expected finite number in 0..1)`,
+              fixable: false,
+            });
+            semanticIssues++;
+          }
+        }
+        if (semanticIssues === 0) {
+          checks.push({
+            name: "semantic-ok",
+            category: "semantic",
+            status: "pass",
+            message: `All ${semantic.length} semantic memories are consistent`,
+            fixable: false,
+          });
+        }
+      }
+
+      if (categories.includes("procedural")) {
+        const procedural = await kv.list<ProceduralMemory>(KV.procedural);
+        let proceduralIssues = 0;
+        for (const p of procedural) {
+          if (!Array.isArray(p.steps) || p.steps.length === 0) {
+            checks.push({
+              name: `procedural-empty-steps:${p.id}`,
+              category: "procedural",
+              status: "warn",
+              message: `Procedural memory "${p.name}" (${p.id}) has no steps`,
+              fixable: false,
+            });
+            proceduralIssues++;
+          }
+        }
+        if (proceduralIssues === 0) {
+          checks.push({
+            name: "procedural-ok",
+            category: "procedural",
+            status: "pass",
+            message: `All ${procedural.length} procedural memories are consistent`,
+            fixable: false,
+          });
+        }
+      }
+
+      if (categories.includes("crystals")) {
+        const crystals = await kv.list<Crystal>(KV.crystals);
+        let crystalIssues = 0;
+        for (const c of crystals) {
+          if (typeof c.narrative !== "string" || c.narrative.trim().length === 0) {
+            checks.push({
+              name: `crystal-empty-narrative:${c.id}`,
+              category: "crystals",
+              status: "warn",
+              message: `Crystal ${c.id} has empty narrative`,
+              fixable: false,
+            });
+            crystalIssues++;
+          }
+        }
+        if (crystalIssues === 0) {
+          checks.push({
+            name: "crystals-ok",
+            category: "crystals",
+            status: "pass",
+            message: `All ${crystals.length} crystals are consistent`,
+            fixable: false,
+          });
+        }
+      }
+
+      if (categories.includes("insights")) {
+        const insights = await kv.list<Insight>(KV.insights);
+        let insightIssues = 0;
+        for (const i of insights) {
+          if (
+            !Number.isFinite(i.confidence) ||
+            i.confidence < 0 ||
+            i.confidence > 1
+          ) {
+            checks.push({
+              name: `insight-bad-confidence:${i.id}`,
+              category: "insights",
+              status: "warn",
+              message: `Insight ${i.id} has confidence ${i.confidence} (expected finite number in 0..1)`,
+              fixable: false,
+            });
+            insightIssues++;
+          }
+        }
+        if (insightIssues === 0) {
+          checks.push({
+            name: "insights-ok",
+            category: "insights",
+            status: "pass",
+            message: `All ${insights.length} insights are consistent`,
+            fixable: false,
+          });
+        }
+      }
+
       if (categories.includes("mesh")) {
         const peers = await kv.list<MeshPeer>(KV.mesh);
         let meshIssues = 0;
diff --git a/src/functions/export-import.ts b/src/functions/export-import.ts
index 674b14da..4c997630 100644
--- a/src/functions/export-import.ts
+++ b/src/functions/export-import.ts
@@ -176,7 +176,7 @@ export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void {
       const strategy = data.strategy || "merge";
       const importData = data.exportData;
 
-      const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20"]);
+      const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20", "0.9.21"]);
       if (!supportedVersions.has(importData.version)) {
         return {
           success: false,
diff --git a/src/functions/search.ts b/src/functions/search.ts
index 74af9ff1..b4444b48 100644
--- a/src/functions/search.ts
+++ b/src/functions/search.ts
@@ -86,6 +86,99 @@ export async function vectorIndexAddGuarded(
   }
 }
 
+// Batched variant: calls EmbeddingProvider.embedBatch ONCE for the whole
+// batch, then writes each resulting vector. Use this for bulk paths
+// (rebuildIndex, future bulk-add APIs) where per-item serial awaits
+// dominate wallclock. A batch of N has roughly the latency of a single
+// embed (network + GPU setup amortized), so backfilling a 500k-obs
+// corpus drops from days to hours on a per-batch endpoint like vLLM.
+//
+// Per-item failure shape:
+//   - whole-batch network/provider error → all skipped, single warn line
+//   - per-item dimension mismatch → that item skipped, others continue
+export async function vectorIndexAddBatchGuarded(
+  items: Array<{
+    id: string
+    sessionId: string
+    text: string
+    context: { kind: "memory" | "observation" | "synthetic"; logId: string }
+  }>,
+): Promise<{ ok: number; fail: number }> {
+  const vi = vectorIndex
+  const ep = currentEmbeddingProvider
+  if (!vi || !ep || items.length === 0) return { ok: 0, fail: 0 }
+
+  let embeddings: Float32Array[]
+  try {
+    embeddings = await ep.embedBatch(items.map((i) => clipEmbedInput(i.text)))
+  } catch (err) {
+    logger.warn("vector-index add batch: embed failed — skipping batch", {
+      batchSize: items.length,
+      provider: ep.name,
+      error: err instanceof Error ? err.message : String(err),
+    })
+    return { ok: 0, fail: items.length }
+  }
+
+  if (embeddings.length !== items.length) {
+    logger.warn(
+      "vector-index add batch: provider returned wrong length — skipping batch",
+      {
+        batchSize: items.length,
+        returned: embeddings.length,
+        provider: ep.name,
+      },
+    )
+    return { ok: 0, fail: items.length }
+  }
+
+  let ok = 0
+  let fail = 0
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i]
+    const embedding = embeddings[i]
+    if (embedding.length !== ep.dimensions) {
+      logger.warn("vector-index add batch: dimension mismatch — skipping item", {
+        kind: item.context.kind,
+        id: item.context.logId,
+        provider: ep.name,
+        expected: ep.dimensions,
+        received: embedding.length,
+      })
+      fail++
+      continue
+    }
+    try {
+      vi.add(item.id, item.sessionId, embedding)
+      ok++
+    } catch (err) {
+      logger.warn("vector-index add batch: index write failed — skipping item", {
+        kind: item.context.kind,
+        id: item.context.logId,
+        error: err instanceof Error ? err.message : String(err),
+      })
+      fail++
+    }
+  }
+  return { ok, fail }
+}
+
+// Embed-batch size for rebuild. Each item is one /v1/embeddings call's
+// `input` array element; the provider sees the whole batch as one HTTP
+// round-trip. 32 fits comfortably under typical per-request token budgets
+// (32 × ~110 tok/item ≈ 3.5k tokens) and gets close to per-call
+// throughput for GPU-backed endpoints (vLLM, Triton, etc.). Override via
+// REBUILD_EMBED_BATCH_SIZE for endpoints that prefer smaller/larger
+// batches. Set to 1 to fall back to the legacy per-item path.
+const DEFAULT_REBUILD_EMBED_BATCH = 32
+
+function getRebuildEmbedBatchSize(): number {
+  const raw = process.env.REBUILD_EMBED_BATCH_SIZE
+  if (!raw) return DEFAULT_REBUILD_EMBED_BATCH
+  const n = parseInt(raw, 10)
+  return Number.isFinite(n) && n > 0 ? n : DEFAULT_REBUILD_EMBED_BATCH
+}
+
 export async function rebuildIndex(kv: StateKV): Promise<number> {
   const idx = getSearchIndex()
   idx.clear()
@@ -96,8 +189,28 @@ export async function rebuildIndex(kv: StateKV): Promise<number> {
   // repopulation loops run, so BM25 and vector stay in sync.
   vectorIndex?.clear()
 
+  const batchSize = getRebuildEmbedBatchSize()
+  // Accumulator for the batched embed flush. BM25 add is synchronous and
+  // doesn't need batching — only the vector path benefits.
+  type EmbedJob = {
+    id: string
+    sessionId: string
+    text: string
+    context: { kind: "memory" | "observation" | "synthetic"; logId: string }
+  }
+  const pending: EmbedJob[] = []
   let count = 0
 
+  const flush = async (): Promise<void> => {
+    if (pending.length === 0) return
+    await vectorIndexAddBatchGuarded(pending)
+    pending.length = 0
+  }
+  const enqueue = async (job: EmbedJob): Promise<void> => {
+    pending.push(job)
+    if (pending.length >= batchSize) await flush()
+  }
+
   // Memories live in their own KV scope outside per-session observation
   // scopes, so they need a separate walk. Without this, mem::remember
   // entries vanish from BM25 on every restart even after the live-write
@@ -108,12 +221,12 @@ export async function rebuildIndex(kv: StateKV): Promise<number> {
       if (memory.isLatest === false) continue
       if (!memory.title || !memory.content) continue
       idx.add(memoryToObservation(memory))
-      await vectorIndexAddGuarded(
-        memory.id,
-        memory.sessionIds[0] ?? 'memory',
-        memory.title + ' ' + memory.content,
-        { kind: "memory", logId: memory.id },
-      )
+      await enqueue({
+        id: memory.id,
+        sessionId: memory.sessionIds[0] ?? 'memory',
+        text: memory.title + ' ' + memory.content,
+        context: { kind: "memory", logId: memory.id },
+      })
       count++
     }
   } catch (err) {
@@ -123,7 +236,10 @@ export async function rebuildIndex(kv: StateKV): Promise<number> {
   }
 
   const sessions = await kv.list<Session>(KV.sessions)
-  if (!sessions.length) return count
+  if (!sessions.length) {
+    await flush()
+    return count
+  }
 
   const obsPerSession: CompressedObservation[][] = []
   const failedSessions: string[] = []
@@ -148,16 +264,19 @@ export async function rebuildIndex(kv: StateKV): Promise<number> {
     for (const obs of observations) {
       if (obs.title && obs.narrative) {
         idx.add(obs)
-        await vectorIndexAddGuarded(
-          obs.id,
-          obs.sessionId,
-          obs.title + ' ' + obs.narrative,
-          { kind: "observation", logId: obs.id },
-        )
+        await enqueue({
+          id: obs.id,
+          sessionId: obs.sessionId,
+          text: obs.title + ' ' + obs.narrative,
+          context: { kind: "observation", logId: obs.id },
+        })
         count++
       }
     }
   }
+
+  // Drain the last partial batch.
+  await flush()
   return count
 }
 
diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts
index fdeed273..c80b1f87 100644
--- a/src/functions/smart-search.ts
+++ b/src/functions/smart-search.ts
@@ -1,24 +1,32 @@
 import type { ISdk } from "iii-sdk";
 import type {
+  CompactLessonResult,
   CompactSearchResult,
   CompressedObservation,
   HybridSearchResult,
+  Lesson,
 } from "../types.js";
 import { KV } from "../state/schema.js";
 import { StateKV } from "../state/kv.js";
 import { recordAccessBatch } from "./access-tracker.js";
 import { logger } from "../logger.js";
 
+// Compact mode trims each lesson's content for at-a-glance display. The
+// full content is fetched via memory_lesson_recall when the caller needs it.
+const LESSON_CONTENT_PREVIEW_CHARS = 240;
+
 export function registerSmartSearchFunction(
   sdk: ISdk,
   kv: StateKV,
   searchFn: (query: string, limit: number) => Promise<HybridSearchResult[]>,
 ): void {
-  sdk.registerFunction("mem::smart-search", 
+  sdk.registerFunction("mem::smart-search",
     async (data: {
       query?: string;
       expandIds?: Array<string | { obsId: string; sessionId: string }>;
       limit?: number;
+      project?: string;
+      includeLessons?: boolean;
     }) => {
 
       if (data.expandIds && data.expandIds.length > 0) {
@@ -68,7 +76,21 @@ export function registerSmartSearchFunction(
       }
 
       const limit = Math.max(1, Math.min(data.limit ?? 20, 100));
-      const hybridResults = await searchFn(data.query, limit);
+      // Cap lesson results at a smaller number than observations: lessons
+      // are denser (curated insights) so 10 is usually plenty for a recall.
+      const lessonLimit = Math.min(limit, 10);
+      const includeLessons = data.includeLessons !== false;
+
+      // Run observation hybrid-search and lesson recall in parallel so the
+      // extra lesson lookup adds no wallclock when the underlying calls
+      // can overlap. Lesson recall is best-effort: if mem::lesson-recall
+      // fails or returns unexpected shape, log + fall back to empty.
+      const [hybridResults, lessons] = await Promise.all([
+        searchFn(data.query, limit),
+        includeLessons
+          ? recallLessons(sdk, data.query, lessonLimit, data.project)
+          : Promise.resolve([]),
+      ]);
 
       const compact: CompactSearchResult[] = hybridResults.map((r) => ({
         obsId: r.observation.id,
@@ -87,12 +109,51 @@ export function registerSmartSearchFunction(
       logger.info("Smart search compact", {
         query: data.query,
         results: compact.length,
+        lessons: lessons.length,
       });
-      return { mode: "compact", results: compact };
+      const response: {
+        mode: "compact";
+        results: CompactSearchResult[];
+        lessons?: CompactLessonResult[];
+      } = { mode: "compact", results: compact };
+      if (includeLessons) response.lessons = lessons;
+      return response;
     },
   );
 }
 
+async function recallLessons(
+  sdk: ISdk,
+  query: string,
+  limit: number,
+  project?: string,
+): Promise<CompactLessonResult[]> {
+  try {
+    const result = (await sdk.trigger({
+      function_id: "mem::lesson-recall",
+      payload: { query, limit, project },
+    })) as { success?: boolean; lessons?: Array<Lesson & { score?: number }> };
+    if (!result?.success || !Array.isArray(result.lessons)) return [];
+    return result.lessons.map((l) => ({
+      lessonId: l.id,
+      content:
+        l.content.length > LESSON_CONTENT_PREVIEW_CHARS
+          ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…"
+          : l.content,
+      confidence: l.confidence,
+      score: l.score ?? l.confidence,
+      createdAt: l.createdAt,
+      project: l.project,
+      tags: l.tags ?? [],
+    }));
+  } catch (err) {
+    logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", {
+      error: err instanceof Error ? err.message : String(err),
+    });
+    return [];
+  }
+}
+
 async function findObservation(
   kv: StateKV,
   obsId: string,
diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts
index 140e0e12..80b29a09 100644
--- a/src/functions/summarize.ts
+++ b/src/functions/summarize.ts
@@ -7,7 +7,12 @@ import type {
 } from "../types.js";
 import { KV } from "../state/schema.js";
 import { StateKV } from "../state/kv.js";
-import { SUMMARY_SYSTEM, buildSummaryPrompt } from "../prompts/summary.js";
+import {
+  SUMMARY_SYSTEM,
+  buildSummaryPrompt,
+  REDUCE_SYSTEM,
+  buildReducePrompt,
+} from "../prompts/summary.js";
 import { getXmlTag, getXmlChildren } from "../prompts/xml.js";
 import { SummaryOutputSchema } from "../eval/schemas.js";
 import { validateOutput } from "../eval/validator.js";
@@ -16,6 +21,169 @@ import type { MetricsStore } from "../eval/metrics-store.js";
 import { safeAudit } from "./audit.js";
 import { logger } from "../logger.js";
 
+// Per-chunk observation budget when a session is too large to fit in one
+// LLM call. Default ≈ 50k input tokens per chunk at ~110 tok/obs — fits
+// comfortably in 128k-window models. Override via SUMMARIZE_CHUNK_SIZE.
+const CHUNK_SIZE_DEFAULT = 400;
+// Concurrent in-flight chunk calls. 6 keeps a 100-chunk session under
+// iii's 180s function-invocation timeout at ~8s/call while staying
+// inside generous-but-not-unlimited provider rate limits (well below
+// OpenAI free tier's 500 RPM). High-throughput providers
+// (Novita / DeepInfra / DeepSeek) typically allow 100+ concurrent — set
+// SUMMARIZE_CHUNK_CONCURRENCY higher to cover ~1000+ chunk sessions.
+const CHUNK_CONCURRENCY_DEFAULT = 6;
+// Bail on the merged summary if more than this fraction of chunks fail
+// to parse — a half-blind narrative is worse than a clean error.
+const MAX_SKIP_RATIO = 0.5;
+
+function getChunkSize(): number {
+  const raw = process.env.SUMMARIZE_CHUNK_SIZE;
+  if (!raw) return CHUNK_SIZE_DEFAULT;
+  const n = parseInt(raw, 10);
+  return Number.isFinite(n) && n > 0 ? n : CHUNK_SIZE_DEFAULT;
+}
+
+function getChunkConcurrency(): number {
+  const raw = process.env.SUMMARIZE_CHUNK_CONCURRENCY;
+  if (!raw) return CHUNK_CONCURRENCY_DEFAULT;
+  const n = parseInt(raw, 10);
+  return Number.isFinite(n) && n > 0 ? n : CHUNK_CONCURRENCY_DEFAULT;
+}
+
+// One chunk call with retry-once. Returns null when both attempts fail —
+// whether by parse failure, provider 4xx (content rejected by upstream
+// filters), or transient network/5xx errors that didn't recover on retry.
+// All failure modes are equivalent at this layer: the chunk is unusable,
+// skip it and let the caller decide via the skip-ratio bailout whether
+// the overall summary is still trustworthy. Errors that affect every
+// chunk (auth, model down) will trip the bailout naturally.
+async function summarizeChunkWithRetry(
+  provider: MemoryProvider,
+  chunk: CompressedObservation[],
+  sessionId: string,
+  project: string,
+  idx: number,
+  total: number,
+): Promise<SessionSummary | null> {
+  for (let attempt = 1; attempt <= 2; attempt++) {
+    try {
+      const xml = await provider.summarize(
+        SUMMARY_SYSTEM,
+        buildSummaryPrompt(chunk),
+      );
+      const parsed = parseSummaryXml(xml, sessionId, project, chunk.length);
+      if (parsed) return parsed;
+      logger.warn("Summarize chunk parse failed", {
+        sessionId,
+        chunk: `${idx + 1}/${total}`,
+        attempt,
+      });
+    } catch (err) {
+      logger.warn("Summarize chunk LLM call failed", {
+        sessionId,
+        chunk: `${idx + 1}/${total}`,
+        attempt,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+  return null;
+}
+
+// Returns the final summary XML string. For sessions ≤ chunk size, this is
+// a single LLM call (legacy behavior). For larger sessions, observations
+// are split into chunks processed in parallel batches, each chunk retried
+// once on parse failure, persistently-bad chunks skipped, and remaining
+// partials merged via a reduce call.
+async function produceSummaryXml(
+  provider: MemoryProvider,
+  compressed: CompressedObservation[],
+  sessionId: string,
+  project: string,
+): Promise<{
+  response: string;
+  mode: "single" | "chunked";
+  chunks: number;
+  skipped?: number;
+}> {
+  const chunkSize = getChunkSize();
+  if (compressed.length <= chunkSize) {
+    const response = await provider.summarize(
+      SUMMARY_SYSTEM,
+      buildSummaryPrompt(compressed),
+    );
+    return { response, mode: "single", chunks: 1 };
+  }
+
+  const chunks: CompressedObservation[][] = [];
+  for (let i = 0; i < compressed.length; i += chunkSize) {
+    chunks.push(compressed.slice(i, i + chunkSize));
+  }
+  const concurrency = getChunkConcurrency();
+  logger.info("Summarize chunking session", {
+    sessionId,
+    chunks: chunks.length,
+    chunkSize,
+    concurrency,
+    totalObservations: compressed.length,
+  });
+
+  // Sparse array preserves chunk → index mapping after parallel resolution,
+  // so the reduce step sees partials in chronological order even when some
+  // were skipped.
+  const partialByIdx: Array<SessionSummary | null> = new Array(chunks.length).fill(null);
+  for (let batchStart = 0; batchStart < chunks.length; batchStart += concurrency) {
+    const batch = chunks.slice(batchStart, batchStart + concurrency);
+    await Promise.all(
+      batch.map(async (chunk, j) => {
+        const idx = batchStart + j;
+        partialByIdx[idx] = await summarizeChunkWithRetry(
+          provider,
+          chunk,
+          sessionId,
+          project,
+          idx,
+          chunks.length,
+        );
+      }),
+    );
+  }
+
+  const skipped = partialByIdx.filter((p) => p === null).length;
+  const partials = partialByIdx.filter((p): p is SessionSummary => p !== null);
+
+  if (skipped > Math.floor(chunks.length * MAX_SKIP_RATIO)) {
+    throw new Error(
+      `too_many_chunks_skipped: ${skipped}/${chunks.length} chunks failed to parse after retry`,
+    );
+  }
+  if (skipped > 0) {
+    logger.warn("Summarize chunks partially skipped", {
+      sessionId,
+      skipped,
+      total: chunks.length,
+    });
+  }
+
+  const reduceInput = partials.map((p) => {
+    const originalIdx = partialByIdx.indexOf(p);
+    return {
+      title: p.title,
+      narrative: p.narrative,
+      keyDecisions: p.keyDecisions,
+      filesModified: p.filesModified,
+      concepts: p.concepts,
+      obsRangeStart: originalIdx * chunkSize + 1,
+      obsRangeEnd: Math.min((originalIdx + 1) * chunkSize, compressed.length),
+    };
+  });
+  const response = await provider.summarize(
+    REDUCE_SYSTEM,
+    buildReducePrompt(reduceInput),
+  );
+  return { response, mode: "chunked", chunks: chunks.length, skipped };
+}
+
 function parseSummaryXml(
   xml: string,
   sessionId: string,
@@ -85,8 +253,12 @@ export function registerSummarizeFunction(
       }
 
       try {
-        const prompt = buildSummaryPrompt(compressed);
-        const response = await provider.summarize(SUMMARY_SYSTEM, prompt);
+        const { response, mode, chunks } = await produceSummaryXml(
+          provider,
+          compressed,
+          sessionId,
+          session.project,
+        );
         if (!response || !response.trim()) {
           const latencyMs = Date.now() - startMs;
           if (metricsStore) {
@@ -95,8 +267,8 @@ export function registerSummarizeFunction(
           logger.warn("Empty provider response on summarize", {
             sessionId,
             provider: provider.name,
-            promptBytes: prompt.length,
-            systemBytes: SUMMARY_SYSTEM.length,
+            mode,
+            chunks,
             observationCount: compressed.length,
           });
           return { success: false, error: "empty_provider_response" };
diff --git a/src/hooks/notification.ts b/src/hooks/notification.ts
index 6c4b7b81..51347d50 100644
--- a/src/hooks/notification.ts
+++ b/src/hooks/notification.ts
@@ -29,9 +29,14 @@ async function main() {
   }
 
   if (isSdkChildContext(data)) return;
-  if (data.notification_type !== "permission_prompt") return;
+  const notificationType = data.notification_type ?? data.notificationType;
+  if (notificationType !== "permission_prompt") return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const rawSessionId = data.session_id ?? data.sessionId;
+  const sessionId =
+    typeof rawSessionId === "string" && rawSessionId.length > 0
+      ? rawSessionId
+      : "unknown";
 
   try {
     await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -44,7 +49,7 @@ async function main() {
         cwd: data.cwd || process.cwd(),
         timestamp: new Date().toISOString(),
         data: {
-          notification_type: data.notification_type,
+          notification_type: notificationType,
           title: data.title,
           message: data.message,
         },
diff --git a/src/hooks/post-tool-failure.ts b/src/hooks/post-tool-failure.ts
index 337aebdd..7fa71d05 100644
--- a/src/hooks/post-tool-failure.ts
+++ b/src/hooks/post-tool-failure.ts
@@ -29,9 +29,12 @@ async function main() {
   }
 
   if (isSdkChildContext(data)) return;
-  if (data.is_interrupt) return;
+  if (data.is_interrupt || data.isInterrupt) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+  const toolName = data.tool_name ?? data.toolName;
+  const toolInput = data.tool_input ?? data.toolArgs;
+  const error = data.error ?? data.errorMessage;
 
   try {
     await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -44,15 +47,15 @@ async function main() {
         cwd: data.cwd || process.cwd(),
         timestamp: new Date().toISOString(),
         data: {
-          tool_name: data.tool_name,
+          tool_name: toolName,
           tool_input:
-            typeof data.tool_input === "string"
-              ? data.tool_input.slice(0, 4000)
-              : JSON.stringify(data.tool_input ?? "").slice(0, 4000),
+            typeof toolInput === "string"
+              ? toolInput.slice(0, 4000)
+              : JSON.stringify(toolInput ?? "").slice(0, 4000),
           error:
-            typeof data.error === "string"
-              ? data.error.slice(0, 4000)
-              : JSON.stringify(data.error ?? "").slice(0, 4000),
+            typeof error === "string"
+              ? error.slice(0, 4000)
+              : JSON.stringify(error ?? "").slice(0, 4000),
         },
       }),
       signal: AbortSignal.timeout(3000),
diff --git a/src/hooks/post-tool-use.ts b/src/hooks/post-tool-use.ts
index 65afc8b1..c8319c48 100644
--- a/src/hooks/post-tool-use.ts
+++ b/src/hooks/post-tool-use.ts
@@ -30,9 +30,11 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+  const toolName = data.tool_name ?? data.toolName;
+  const toolInput = data.tool_input ?? data.toolArgs;
 
-  const { imageData, cleanOutput } = extractImageData(data.tool_output);
+  const { imageData, cleanOutput } = extractImageData(toolOutput(data));
 
   try {
     await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -45,8 +47,8 @@ async function main() {
         cwd: data.cwd || process.cwd(),
         timestamp: new Date().toISOString(),
         data: {
-          tool_name: data.tool_name,
-          tool_input: data.tool_input,
+          tool_name: toolName,
+          tool_input: toolInput,
           tool_output: truncate(cleanOutput, 8000),
           ...(imageData ? { image_data: imageData } : {}),
         },
@@ -57,6 +59,17 @@ async function main() {
   }
 }
 
+function toolOutput(data: Record<string, unknown>): unknown {
+  if (data.tool_response !== undefined) return data.tool_response;
+  if (data.tool_output !== undefined) return data.tool_output;
+  const result = data.tool_result ?? data.toolResult;
+  if (typeof result === "object" && result !== null) {
+    const obj = result as Record<string, unknown>;
+    return obj.text_result_for_llm ?? obj.textResultForLlm ?? result;
+  }
+  return result;
+}
+
 function isBase64Image(val: unknown): val is string {
   return typeof val === "string" && (
     val.startsWith("data:image/") ||
diff --git a/src/hooks/pre-compact.ts b/src/hooks/pre-compact.ts
index ea13ebec..77fb7a57 100644
--- a/src/hooks/pre-compact.ts
+++ b/src/hooks/pre-compact.ts
@@ -30,7 +30,7 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
   const project = (data.cwd as string) || process.cwd();
 
   if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") {
diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts
index 61f6c443..eea440c8 100644
--- a/src/hooks/pre-tool-use.ts
+++ b/src/hooks/pre-tool-use.ts
@@ -50,16 +50,28 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const toolName = data.tool_name as string;
+  const toolName =
+    typeof data.tool_name === "string"
+      ? data.tool_name
+      : typeof data.toolName === "string"
+        ? data.toolName
+        : undefined;
   if (!toolName) return;
 
-  const fileTools = ["Edit", "Write", "Read", "Glob", "Grep"];
-  if (!fileTools.includes(toolName)) return;
-
-  const toolInput = (data.tool_input || {}) as Record<string, unknown>;
+  const normalizedToolName = toolName.toLowerCase();
+  const fileTools = ["edit", "write", "create", "read", "view", "glob", "grep"];
+  if (!fileTools.includes(normalizedToolName)) return;
+
+  const rawToolInput = data.tool_input ?? data.toolArgs;
+  const toolInput =
+    typeof rawToolInput === "object" &&
+    rawToolInput !== null &&
+    !Array.isArray(rawToolInput)
+      ? (rawToolInput as Record<string, unknown>)
+      : {};
   const files: string[] = [];
   const fileKeys =
-    toolName === "Grep"
+    normalizedToolName === "grep"
       ? ["path", "file"]
       : ["file_path", "path", "file", "pattern"];
   for (const key of fileKeys) {
@@ -69,14 +81,18 @@ async function main() {
   if (files.length === 0) return;
 
   const terms: string[] = [];
-  if (toolName === "Grep" || toolName === "Glob") {
+  if (normalizedToolName === "grep" || normalizedToolName === "glob") {
     const pattern = toolInput["pattern"];
     if (typeof pattern === "string" && pattern.length > 0) {
       terms.push(pattern);
     }
   }
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const rawSessionId = data.session_id || data.sessionId;
+  const sessionId =
+    typeof rawSessionId === "string" && rawSessionId.length > 0
+      ? rawSessionId
+      : "unknown";
 
   try {
     const res = await fetch(`${REST_URL}/agentmemory/enrich`, {
diff --git a/src/hooks/prompt-submit.ts b/src/hooks/prompt-submit.ts
index 971b11be..10265a77 100644
--- a/src/hooks/prompt-submit.ts
+++ b/src/hooks/prompt-submit.ts
@@ -30,7 +30,7 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
 
   try {
     await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -42,7 +42,7 @@ async function main() {
         project: data.cwd || process.cwd(),
         cwd: data.cwd || process.cwd(),
         timestamp: new Date().toISOString(),
-        data: { prompt: data.prompt },
+        data: { prompt: data.prompt ?? data.userPrompt },
       }),
       signal: AbortSignal.timeout(3000),
     });
diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts
index 31bef22e..7efa550e 100644
--- a/src/hooks/session-end.ts
+++ b/src/hooks/session-end.ts
@@ -30,7 +30,7 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
 
   try {
     await fetch(`${REST_URL}/agentmemory/session/end`, {
@@ -76,4 +76,4 @@ async function main() {
   }
 }
 
-main();
\ No newline at end of file
+main();
diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts
index a6cefe41..444edc32 100644
--- a/src/hooks/session-start.ts
+++ b/src/hooks/session-start.ts
@@ -49,7 +49,8 @@ async function main() {
   if (isSdkChildContext(data)) return;
 
   const sessionId =
-    (data.session_id as string) || `ses_${Date.now().toString(36)}`;
+    ((data.session_id || data.sessionId) as string) ||
+    `ses_${Date.now().toString(36)}`;
   const project = (data.cwd as string) || process.cwd();
 
   const url = `${REST_URL}/agentmemory/session/start`;
diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts
index 1f2f5b8a..18ca371d 100644
--- a/src/hooks/stop.ts
+++ b/src/hooks/stop.ts
@@ -37,7 +37,7 @@ async function main() {
     return;
   }
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
 
   try {
     await fetch(`${REST_URL}/agentmemory/summarize`, {
@@ -51,4 +51,4 @@ async function main() {
   }
 }
 
-main();
\ No newline at end of file
+main();
diff --git a/src/hooks/subagent-start.ts b/src/hooks/subagent-start.ts
index 3f730adb..3463da0b 100644
--- a/src/hooks/subagent-start.ts
+++ b/src/hooks/subagent-start.ts
@@ -38,7 +38,9 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+  const agentId = data.agent_id || data.agentName;
+  const agentType = data.agent_type || data.agentDisplayName || data.agentName;
 
   fetch(`${REST_URL}/agentmemory/observe`, {
     method: "POST",
@@ -50,8 +52,8 @@ async function main() {
       cwd: data.cwd || process.cwd(),
       timestamp: new Date().toISOString(),
       data: {
-        agent_id: data.agent_id,
-        agent_type: data.agent_type,
+        agent_id: agentId,
+        agent_type: agentType,
       },
     }),
     signal: AbortSignal.timeout(TIMEOUT_MS),
diff --git a/src/hooks/subagent-stop.ts b/src/hooks/subagent-stop.ts
index c555746e..90b99fd6 100644
--- a/src/hooks/subagent-stop.ts
+++ b/src/hooks/subagent-stop.ts
@@ -30,7 +30,9 @@ async function main() {
 
   if (isSdkChildContext(data)) return;
 
-  const sessionId = (data.session_id as string) || "unknown";
+  const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+  const agentId = data.agent_id || data.agentName;
+  const agentType = data.agent_type || data.agentDisplayName || data.agentName;
   const lastMsg =
     typeof data.last_assistant_message === "string"
       ? data.last_assistant_message.slice(0, 4000)
@@ -47,8 +49,8 @@ async function main() {
         cwd: data.cwd || process.cwd(),
         timestamp: new Date().toISOString(),
         data: {
-          agent_id: data.agent_id,
-          agent_type: data.agent_type,
+          agent_id: agentId,
+          agent_type: agentType,
           last_message: lastMsg,
         },
       }),
diff --git a/src/index.ts b/src/index.ts
index b9b9e84d..704d4809 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -11,6 +11,7 @@ import {
   isAutoCompressEnabled,
   isConsolidationEnabled,
   isContextInjectionEnabled,
+  isDropStaleIndexEnabled,
 } from "./config.js";
 import {
   createProvider,
@@ -376,8 +377,7 @@ async function main() {
         .map((m) => `${m.obsId} (dim=${m.dim})`)
         .join(", ");
       const distinct = Array.from(seenDimensions).sort((a, b) => a - b).join(", ");
-      const dropStale =
-        process.env["AGENTMEMORY_DROP_STALE_INDEX"] === "true";
+      const dropStale = isDropStaleIndexEnabled();
       if (dropStale) {
         console.warn(
           `[agentmemory] Persisted vector index has ${mismatches.length} of ` +
@@ -412,16 +412,24 @@ async function main() {
   const needsRebuild = bm25Index.size === 0;
 
   if (needsRebuild) {
-    const indexCount = await rebuildIndex(kv).catch((err) => {
-      console.warn(`[agentmemory] Failed to rebuild search index:`, err);
-      return 0;
-    });
-    if (indexCount > 0) {
-      bootLog(
-        `Search index rebuilt: ${indexCount} entries`,
-      );
-      indexPersistence.scheduleSave();
-    }
+    // Fire-and-forget. rebuildIndex iterates every observation across
+    // every session and AWAITS an embedding-provider call per record.
+    // On a large corpus + rate-limited embedding endpoint that can
+    // take HOURS; awaiting it here blocks every subsequent boot step
+    // (including startViewerServer below, leaving the viewer port
+    // unbound for the duration). The index lazily fills in over time
+    // and search degrades gracefully — partial coverage > no viewer
+    // for hours. Errors still surface via the inner .catch.
+    void rebuildIndex(kv)
+      .then((indexCount) => {
+        if (indexCount > 0) {
+          bootLog(`Search index rebuilt: ${indexCount} entries`);
+          indexPersistence.scheduleSave();
+        }
+      })
+      .catch((err) => {
+        console.warn(`[agentmemory] Failed to rebuild search index:`, err);
+      });
   } else {
     // Backfill memories into BM25 for users upgrading from <0.9.5: prior
     // versions of mem::remember never indexed memories, so the persisted
diff --git a/src/mcp/standalone.ts b/src/mcp/standalone.ts
index 86678a76..1413cbf8 100644
--- a/src/mcp/standalone.ts
+++ b/src/mcp/standalone.ts
@@ -89,6 +89,8 @@ interface Validated {
   files?: string[];
   query?: string;
   limit?: number;
+  format?: string;
+  tokenBudget?: number;
   memoryIds?: string[];
   reason?: string;
 }
@@ -118,6 +120,17 @@ function validate(toolName: string, args: Record<string, unknown>): Validated {
       }
       v.query = query.trim();
       v.limit = parseLimit(args["limit"]);
+      const fmt = args["format"];
+      if (typeof fmt === "string" && fmt.trim()) {
+        v.format = fmt.trim().toLowerCase();
+      }
+      const budget = args["token_budget"];
+      if (typeof budget === "number" && Number.isFinite(budget) && budget > 0) {
+        v.tokenBudget = Math.floor(budget);
+      } else if (typeof budget === "string" && budget.trim()) {
+        const n = Number(budget);
+        if (Number.isFinite(n) && n > 0) v.tokenBudget = Math.floor(n);
+      }
       return v;
     }
     case "memory_sessions": {
@@ -159,11 +172,26 @@ async function handleProxy(
       });
       return textResponse(result);
     }
-    case "memory_recall":
+    case "memory_recall": {
+      const body: Record<string, unknown> = {
+        query: v.query,
+        limit: v.limit,
+        format: v.format ?? "full",
+      };
+      if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget;
+      const result = await handle.call("/agentmemory/search", {
+        method: "POST",
+        body: JSON.stringify(body),
+      });
+      return textResponse(result, true);
+    }
     case "memory_smart_search": {
+      const body: Record<string, unknown> = { query: v.query, limit: v.limit };
+      if (v.format != null) body["format"] = v.format;
+      if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget;
       const result = await handle.call("/agentmemory/smart-search", {
         method: "POST",
-        body: JSON.stringify({ query: v.query, limit: v.limit }),
+        body: JSON.stringify(body),
       });
       return textResponse(result, true);
     }
diff --git a/src/mcp/transport.ts b/src/mcp/transport.ts
index 766e6472..759ed019 100644
--- a/src/mcp/transport.ts
+++ b/src/mcp/transport.ts
@@ -1,5 +1,3 @@
-import { createInterface } from "node:readline";
-
 export interface JsonRpcRequest {
   jsonrpc: "2.0";
   id?: string | number;
@@ -19,6 +17,11 @@ export type RequestHandler = (
   params: Record<string, unknown>,
 ) => Promise<unknown>;
 
+export interface StdioMessageParser {
+  push: (chunk: Buffer | string) => void;
+  isFramed: () => boolean;
+}
+
 // JSON-RPC 2.0 notifications are messages without an `id` field. The spec
 // (and the MCP transport contract) requires the server to NOT send a
 // response for notifications. Some clients tolerate spurious responses;
@@ -130,26 +133,131 @@ export async function processLine(
   }
 }
 
+function findHeaderEnd(buffer: Buffer): { headerEnd: number; bodyStart: number } | null {
+  const crlf = buffer.indexOf("\r\n\r\n");
+  const lf = buffer.indexOf("\n\n");
+  if (crlf === -1 && lf === -1) return null;
+  if (crlf !== -1 && (lf === -1 || crlf <= lf)) {
+    return { headerEnd: crlf, bodyStart: crlf + 4 };
+  }
+  return { headerEnd: lf, bodyStart: lf + 2 };
+}
+
+function parseContentLength(header: string): number | null {
+  for (const line of header.split(/\r?\n/)) {
+    const match = line.match(/^content-length:\s*(\d+)\s*$/i);
+    if (match) return Number(match[1]);
+  }
+  return null;
+}
+
+export function formatResponse(
+  response: JsonRpcResponse,
+  framed: boolean,
+): string | Buffer[] {
+  const body = JSON.stringify(response);
+  if (!framed) return `${body}\n`;
+  const bytes = Buffer.from(body, "utf8");
+  return [Buffer.from(`Content-Length: ${bytes.length}\r\n\r\n`, "ascii"), bytes];
+}
+
+export function createMessageParser(
+  onMessage: (message: string) => void,
+  writeErr: (msg: string) => void = (msg) => process.stderr.write(msg),
+): StdioMessageParser {
+  let buffer = Buffer.alloc(0);
+  let framed = false;
+
+  function processBuffer(): void {
+    while (buffer.length > 0) {
+      if (buffer[0] === 10 || buffer[0] === 13) {
+        buffer = buffer.subarray(1);
+        continue;
+      }
+
+      const preview = buffer.toString("ascii", 0, Math.min(buffer.length, 32));
+      if (/^content-length:/i.test(preview)) {
+        const header = findHeaderEnd(buffer);
+        if (!header) return;
+
+        const headerText = buffer.subarray(0, header.headerEnd).toString("ascii");
+        const contentLength = parseContentLength(headerText);
+        if (contentLength === null) {
+          writeErr("[mcp-transport] missing Content-Length header\n");
+          buffer = buffer.subarray(header.bodyStart);
+          continue;
+        }
+
+        const messageEnd = header.bodyStart + contentLength;
+        if (buffer.length < messageEnd) return;
+
+        framed = true;
+        const message = buffer.subarray(header.bodyStart, messageEnd).toString("utf8");
+        buffer = buffer.subarray(messageEnd);
+        onMessage(message);
+        continue;
+      }
+
+      const newline = buffer.indexOf(10);
+      if (newline === -1) return;
+      const line = buffer
+        .subarray(0, newline)
+        .toString("utf8")
+        .replace(/\r$/, "");
+      buffer = buffer.subarray(newline + 1);
+      onMessage(line);
+    }
+  }
+
+  return {
+    push(chunk) {
+      const bytes = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, "utf8");
+      buffer = Buffer.concat([buffer, bytes]);
+      processBuffer();
+    },
+    isFramed() {
+      return framed;
+    },
+  };
+}
+
 export function createStdioTransport(handler: RequestHandler): {
   start: () => void;
   stop: () => void;
 } {
-  let rl: ReturnType<typeof createInterface> | null = null;
+  let parser: StdioMessageParser | null = null;
+  let queue = Promise.resolve();
 
   const writeResponse = (response: JsonRpcResponse) => {
-    process.stdout.write(JSON.stringify(response) + "\n");
+    const formatted = formatResponse(response, parser?.isFramed() ?? false);
+    if (typeof formatted === "string") {
+      process.stdout.write(formatted);
+      return;
+    }
+    for (const chunk of formatted) {
+      process.stdout.write(chunk);
+    }
   };
 
-  const onLine = (line: string) => processLine(line, handler, writeResponse);
+  const onData = (chunk: Buffer) => parser?.push(chunk);
 
   return {
     start() {
-      rl = createInterface({ input: process.stdin });
-      rl.on("line", onLine);
+      parser = createMessageParser((message) => {
+        queue = queue.then(() => processLine(message, handler, writeResponse));
+        void queue.catch((err) => {
+          process.stderr.write(
+            `[mcp-transport] request processing failed: ${
+              err instanceof Error ? err.message : String(err)
+            }\n`,
+          );
+        });
+      });
+      process.stdin.on("data", onData);
     },
     stop() {
-      rl?.close();
-      rl = null;
+      process.stdin.off("data", onData);
+      parser = null;
     },
   };
 }
diff --git a/src/prompts/summary.ts b/src/prompts/summary.ts
index f01b28b8..bd040212 100644
--- a/src/prompts/summary.ts
+++ b/src/prompts/summary.ts
@@ -36,3 +36,52 @@ export function buildSummaryPrompt(observations: Array<{
   })
   return `Session observations (${observations.length} total):\n\n${lines.join('\n\n---\n\n')}`
 }
+
+export const REDUCE_SYSTEM = `You are merging multiple partial summaries of the SAME coding session into one final session summary. The partials are chronological chunks of one continuous session — not separate sessions.
+
+Output EXACTLY this XML format with no additional text:
+
+<summary>
+  <title>Short session title (max 100 chars)</title>
+  <narrative>3-5 sentence narrative covering the whole session</narrative>
+  <decisions>
+    <decision>Key technical decision made</decision>
+  </decisions>
+  <files>
+    <file>path/to/modified/file</file>
+  </files>
+  <concepts>
+    <concept>key concept from session</concept>
+  </concepts>
+</summary>
+
+Rules:
+- Synthesize a single narrative that reflects the whole arc, not a chunk-by-chunk recap
+- Preserve every distinct decision across chunks
+- Union (deduplicate) all files and concepts
+- Title should capture the session's overall outcome`
+
+export function buildReducePrompt(partials: Array<{
+  title: string
+  narrative: string
+  keyDecisions: string[]
+  filesModified: string[]
+  concepts: string[]
+  obsRangeStart: number
+  obsRangeEnd: number
+}>): string {
+  const sections = partials.map((p, i) => {
+    const decisions = p.keyDecisions.map((d) => `  - ${d}`).join('\n')
+    const files = p.filesModified.map((f) => `  - ${f}`).join('\n')
+    const concepts = p.concepts.join(', ')
+    return `[Chunk ${i + 1} of ${partials.length} — obs ${p.obsRangeStart}-${p.obsRangeEnd}]
+Title: ${p.title}
+Narrative: ${p.narrative}
+Decisions:
+${decisions}
+Files:
+${files}
+Concepts: ${concepts}`
+  })
+  return `Partial summaries (${partials.length} chunks of one session, chronological):\n\n${sections.join('\n\n---\n\n')}`
+}
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
index bca2370f..88e10829 100644
--- a/src/providers/openai.ts
+++ b/src/providers/openai.ts
@@ -80,6 +80,13 @@ export class OpenAIProvider implements MemoryProvider {
     const body: Record<string, unknown> = {
       model: this.model,
       max_tokens: this.maxTokens,
+      // OpenAI API spec defines `stream` as defaulting to false, so omitting
+      // it should yield a JSON response. Some OpenAI-compatible proxies
+      // (notably 9Router < 0.4.56 — see decolua/9router#1260) default to
+      // text/event-stream when `stream` is absent, which crashes the
+      // `response.json()` call below with `Unexpected token 'd', "data: {"id"...`.
+      // Send it explicitly so non-spec endpoints route to non-streaming too.
+      stream: false,
       messages: [
         { role: "system", content: systemPrompt },
         { role: "user", content: userPrompt },
diff --git a/src/triggers/api.ts b/src/triggers/api.ts
index 083c2159..66eaadc2 100644
--- a/src/triggers/api.ts
+++ b/src/triggers/api.ts
@@ -9,6 +9,7 @@ import type { ResilientProvider } from "../providers/resilient.js";
 import { VERSION } from "../version.js";
 import { timingSafeCompare } from "../auth.js";
 import { renderViewerDocument } from "../viewer/document.js";
+import { getBoundViewerPort, getViewerSkipped } from "../viewer/server.js";
 import { MAX_FILES_UPPER_BOUND } from "../functions/replay.js";
 import {
   isGraphExtractionEnabled,
@@ -143,7 +144,7 @@ export function registerApiTriggers(
   sdk.registerFunction("api::liveness",
     async (): Promise<Response> => ({
       status_code: 200,
-      body: { status: "ok", service: "agentmemory" },
+      body: { status: "ok", service: "agentmemory", viewerPort: getBoundViewerPort(), viewerSkipped: getViewerSkipped() },
     }),
   );
   sdk.registerTrigger({
@@ -244,6 +245,8 @@ export function registerApiTriggers(
           health: health || null,
           functionMetrics,
           circuitBreaker,
+          viewerPort: getBoundViewerPort(),
+          viewerSkipped: getViewerSkipped(),
         },
       };
     },
diff --git a/src/types.ts b/src/types.ts
index bc38a058..72e347b3 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -266,6 +266,16 @@ export interface CompactSearchResult {
   timestamp: string;
 }
 
+export interface CompactLessonResult {
+  lessonId: string;
+  content: string;
+  confidence: number;
+  score: number;
+  createdAt: string;
+  project?: string;
+  tags: string[];
+}
+
 export interface TimelineEntry {
   observation: CompressedObservation;
   sessionId: string;
@@ -293,7 +303,7 @@ export interface ExportPagination {
 }
 
 export interface ExportData {
-  version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20";
+  version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20" | "0.9.21";
   exportedAt: string;
   sessions: Session[];
   observations: Record<string, CompressedObservation[]>;
diff --git a/src/version.ts b/src/version.ts
index 35bfcbb0..8a1b6acf 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = "0.9.20";
+export const VERSION = "0.9.21";
diff --git a/src/viewer/index.html b/src/viewer/index.html
index 4bd9293d..c2c200b8 100644
--- a/src/viewer/index.html
+++ b/src/viewer/index.html
@@ -1127,6 +1127,39 @@ <h1>agentmemory</h1>
       };
     }
 
+    // IME_SAFE_SEARCH_V2
+    function bindImeSafeSearch(input, ms, onSearch) {
+      var composing = false;
+      var justCommitted = false;
+      var run = debounce(function(value) { onSearch(value); }, ms);
+      input.addEventListener('compositionstart', function() { composing = true; });
+      input.addEventListener('compositionend', function() {
+        composing = false;
+        justCommitted = true;
+        onSearch(input.value);
+        setTimeout(function() { justCommitted = false; }, 0);
+      });
+      input.addEventListener('input', function(e) {
+        if (composing || e.isComposing) return;
+        if (justCommitted) return;
+        run(input.value);
+      });
+    }
+    function captureSearchFocus(ids) {
+      var a = document.activeElement;
+      if (!a || ids.indexOf(a.id) < 0) return null;
+      return { id: a.id, start: a.selectionStart, end: a.selectionEnd };
+    }
+    function restoreSearchFocus(focus) {
+      if (!focus) return;
+      var el = document.getElementById(focus.id);
+      if (!el) return;
+      el.focus();
+      if (typeof el.setSelectionRange === 'function') {
+        try { el.setSelectionRange(focus.start, focus.end); } catch (e) {}
+      }
+    }
+
     async function api(path, opts) {
       try {
         var url = REST + '/agentmemory/' + path;
@@ -1629,6 +1662,7 @@ <h1>agentmemory</h1>
 
       html += '<button class="btn" style="margin-top:14px;width:100%;font-size:11px;padding:8px;letter-spacing:0.06em;transition:all 0.15s ease;" data-action="rebuild-graph">↻ Rebuild Graph</button>';
       html += '<div id="selected-node-panel"></div>';
+      var __focus = captureSearchFocus(['graph-search']);
       sb.innerHTML = html;
 
       sb.querySelectorAll('input[type="checkbox"]').forEach(function(cb) {
@@ -1640,11 +1674,9 @@ <h1>agentmemory</h1>
 
       var searchInput = document.getElementById('graph-search');
       if (searchInput) {
-        searchInput.addEventListener('input', debounce(function() {
-          graphSearchTerm = this.value.toLowerCase();
-          renderGraph();
-        }, 150));
+        bindImeSafeSearch(searchInput, 200, function(v){ graphSearchTerm = v.toLowerCase(); renderGraph(); });
       }
+      restoreSearchFocus(__focus);
     }
 
     function initGraph() {
@@ -2198,7 +2230,26 @@ <h1>agentmemory</h1>
 
       var filtered = items.filter(function(m) {
         if (typeFilter && m.type !== typeFilter) return false;
-        if (search && !(m.title || '').toLowerCase().includes(search) && !(m.content || '').toLowerCase().includes(search)) return false;
+        const normalizedSearch = (search || '')
+          .normalize("NFKC")
+          .toLowerCase();
+ 
+        const normalizedTitle = (m.title || '')
+          .normalize("NFKC")
+          .toLowerCase();
+
+        const normalizedContent = (m.content || '')
+          .normalize("NFKC")
+          .toLowerCase();
+
+        if (
+          search &&
+          !normalizedTitle.includes(normalizedSearch) &&
+          !normalizedContent.includes(normalizedSearch)
+        ) {
+        return false;
+        }
+
         return true;
       });
 
@@ -2261,14 +2312,12 @@ <h1>agentmemory</h1>
         html += '</table>';
       }
 
+      var __focus = captureSearchFocus(['mem-search']);
       el.innerHTML = html;
 
       var searchInput = document.getElementById('mem-search');
       if (searchInput) {
-        searchInput.addEventListener('input', debounce(function() {
-          state.memories.search = this.value;
-          renderMemories();
-        }, 200));
+        bindImeSafeSearch(searchInput, 200, function(v){ state.memories.search = v; renderMemories(); });
       }
       var typeSelect = document.getElementById('mem-type-filter');
       if (typeSelect) {
@@ -2277,6 +2326,7 @@ <h1>agentmemory</h1>
           renderMemories();
         });
       }
+      restoreSearchFocus(__focus);
     }
 
     function deleteMemory(id, title) {
@@ -2853,7 +2903,7 @@ <h1>agentmemory</h1>
       html += '</div></div>';
 
       html += '<div style="display:flex;gap:8px;margin-bottom:12px;">';
-      html += '<input class="search-input" type="text" placeholder="Search lessons..." value="' + esc(state.lessons.search) + '" oninput="state.lessons.search=this.value;renderLessons()" style="flex:1" />';
+      html += '<input id="lessons-search" class="search-input" type="text" placeholder="Search lessons..." value="' + esc(state.lessons.search) + '" style="flex:1" />';
       html += '<span style="font-size:12px;color:var(--ink-faint);align-self:center;">' + items.length + ' lessons</span>';
       html += '</div>';
 
@@ -2882,7 +2932,11 @@ <h1>agentmemory</h1>
         html += '</tbody></table>';
       }
 
+      var __focus = captureSearchFocus(['lessons-search']);
       el.innerHTML = html;
+      var __ls = document.getElementById('lessons-search');
+      if (__ls) bindImeSafeSearch(__ls, 200, function(v){ state.lessons.search = v; renderLessons(); });
+      restoreSearchFocus(__focus);
     }
 
     async function loadActions() {
@@ -2912,8 +2966,8 @@ <h1>agentmemory</h1>
       }
 
       var html = '<div style="display:flex;gap:8px;margin-bottom:12px;flex-wrap:wrap;">';
-      html += '<input class="search-input" type="text" placeholder="Search actions..." value="' + esc(state.actions.search) + '" oninput="state.actions.search=this.value;renderActions()" style="flex:1;min-width:200px" />';
-      html += '<select style="padding:4px 8px;font-size:12px;border:1px solid var(--border);border-radius:4px;background:var(--bg);color:var(--ink);" onchange="state.actions.statusFilter=this.value;renderActions()">';
+      html += '<input id="actions-search" class="search-input" type="text" placeholder="Search actions..." value="' + esc(state.actions.search) + '" style="flex:1;min-width:200px" />';
+      html += '<select id="actions-status-filter" style="padding:4px 8px;font-size:12px;border:1px solid var(--border);border-radius:4px;background:var(--bg);color:var(--ink);">';
       html += '<option value="">All statuses</option>';
       ['pending','active','done','blocked','cancelled'].forEach(function(s) {
         html += '<option value="' + s + '"' + (statusFilter === s ? ' selected' : '') + '>' + s + '</option>';
@@ -2951,7 +3005,13 @@ <h1>agentmemory</h1>
         html += '</tbody></table>';
       }
 
+      var __focus = captureSearchFocus(['actions-search']);
       el.innerHTML = html;
+      var __as = document.getElementById('actions-search');
+      if (__as) bindImeSafeSearch(__as, 200, function(v){ state.actions.search = v; renderActions(); });
+      var __af = document.getElementById('actions-status-filter');
+      if (__af) __af.addEventListener('change', function(){ state.actions.statusFilter = this.value; renderActions(); });
+      restoreSearchFocus(__focus);
     }
 
     async function loadCrystals() {
@@ -2999,7 +3059,7 @@ <h1>agentmemory</h1>
       html += '</div></div>';
 
       html += '<div style="display:flex;gap:8px;margin-bottom:12px;">';
-      html += '<input class="search-input" type="text" placeholder="Search crystals..." value="' + esc(state.crystals.search) + '" oninput="state.crystals.search=this.value;renderCrystals()" style="flex:1" />';
+      html += '<input id="crystals-search" class="search-input" type="text" placeholder="Search crystals..." value="' + esc(state.crystals.search) + '" style="flex:1" />';
       html += '<span style="font-size:12px;color:var(--ink-faint);align-self:center;">' + items.length + ' crystals</span>';
       html += '</div>';
 
@@ -3060,7 +3120,11 @@ <h1>agentmemory</h1>
         });
       }
 
+      var __focus = captureSearchFocus(['crystals-search']);
       el.innerHTML = html;
+      var __cs = document.getElementById('crystals-search');
+      if (__cs) bindImeSafeSearch(__cs, 200, function(v){ state.crystals.search = v; renderCrystals(); });
+      restoreSearchFocus(__focus);
     }
 
     async function loadAudit() {
diff --git a/src/viewer/server.ts b/src/viewer/server.ts
index bd8e3c63..71598690 100644
--- a/src/viewer/server.ts
+++ b/src/viewer/server.ts
@@ -131,6 +131,16 @@ function readBody(req: IncomingMessage): Promise<string> {
 
 const MAX_VIEWER_PORT_RETRIES = 10;
 
+let boundViewerPort: number | null = null;
+let viewerSkipped = false;
+
+export function getBoundViewerPort(): number | null {
+  return boundViewerPort;
+}
+export function getViewerSkipped(): boolean {
+  return viewerSkipped;
+}
+
 export function startViewerServer(
   port: number,
   _kv: unknown,
@@ -138,6 +148,10 @@ export function startViewerServer(
   secret?: string,
   restPort?: number,
 ): Server {
+  // Reset exported runtime state for each start attempt.
+  boundViewerPort = null;
+  viewerSkipped = false;
+
   const resolvedRestPort = restPort ?? port - 2;
   const requestedPort = port;
   // Computed lazily on first request — `port` may be 0 here (OS-assigned)
@@ -227,6 +241,12 @@ export function startViewerServer(
   };
 
   server.on("listening", () => {
+    const addr = server.address();
+    boundViewerPort =
+      addr && typeof addr === "object" && "port" in addr
+        ? addr.port
+        : currentPort;
+    viewerSkipped = false;
     if (currentPort === requestedPort) {
       console.log(`[agentmemory] Viewer: http://localhost:${currentPort}`);
     } else {
@@ -244,10 +264,14 @@ export function startViewerServer(
       return;
     }
     if (err.code === "EADDRINUSE") {
+      boundViewerPort = null;
+      viewerSkipped = true;
       console.warn(
         `[agentmemory] Viewer ports ${requestedPort}-${requestedPort + MAX_VIEWER_PORT_RETRIES} all in use, skipping viewer.`,
       );
     } else {
+      boundViewerPort = null;
+      viewerSkipped = true;
       console.error(`[agentmemory] Viewer error:`, err.message);
     }
   });
diff --git a/test/cli-connect.test.ts b/test/cli-connect.test.ts
index 99174dac..fbb8c2b5 100644
--- a/test/cli-connect.test.ts
+++ b/test/cli-connect.test.ts
@@ -10,6 +10,17 @@ import {
 } from "../src/cli/connect/index.js";
 import type { ConnectAdapter } from "../src/cli/connect/types.js";
 
+const EXPECTED_COPILOT_MCP_COMMAND =
+  process.platform === "win32"
+    ? {
+        command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe",
+        args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"],
+      }
+    : {
+        command: "npx",
+        args: ["-y", "@agentmemory/mcp"],
+      };
+
 describe("agentmemory connect — dispatcher", () => {
   it("resolves every known agent by lowercase name", () => {
     for (const name of knownAgents()) {
@@ -29,10 +40,11 @@ describe("agentmemory connect — dispatcher", () => {
     expect(resolveAdapter("")).toBeNull();
   });
 
-  it("ships exactly the 8 agents specified by the spec", () => {
+  it("ships exactly the 9 agents specified by the spec", () => {
     expect(knownAgents().sort()).toEqual(
       [
         "claude-code",
+        "copilot-cli",
         "codex",
         "cursor",
         "gemini-cli",
@@ -42,7 +54,7 @@ describe("agentmemory connect — dispatcher", () => {
         "pi",
       ].sort(),
     );
-    expect(ADAPTERS.length).toBe(8);
+    expect(ADAPTERS.length).toBe(9);
   });
 
   it("every adapter exposes detect() and install()", () => {
@@ -175,7 +187,193 @@ describe("agentmemory connect — claude-code adapter (mock filesystem)", () =>
     if (result.kind === "installed") {
       expect(result.backupPath).toBeDefined();
       expect(existsSync(result.backupPath!)).toBe(true);
-      expect(result.backupPath!).toContain(".agentmemory/backups");
+      expect(result.backupPath!).toContain(join(".agentmemory", "backups"));
+    }
+  });
+});
+
+describe("agentmemory connect — copilot-cli adapter (mock filesystem)", () => {
+  let tmpHome: string;
+  let originalHome: string | undefined;
+  let originalUserprofile: string | undefined;
+  let originalCopilotHome: string | undefined;
+  let importCounter = 0;
+
+  beforeEach(() => {
+    tmpHome = mkdtempSync(join(tmpdir(), "am-connect-"));
+    originalHome = process.env["HOME"];
+    originalUserprofile = process.env["USERPROFILE"];
+    originalCopilotHome = process.env["COPILOT_HOME"];
+    process.env["HOME"] = tmpHome;
+    process.env["USERPROFILE"] = tmpHome;
+    delete process.env["COPILOT_HOME"];
+    vi.resetModules();
+  });
+
+  afterEach(() => {
+    if (originalHome !== undefined) process.env["HOME"] = originalHome;
+    else delete process.env["HOME"];
+    if (originalUserprofile !== undefined)
+      process.env["USERPROFILE"] = originalUserprofile;
+    else delete process.env["USERPROFILE"];
+    if (originalCopilotHome !== undefined)
+      process.env["COPILOT_HOME"] = originalCopilotHome;
+    else delete process.env["COPILOT_HOME"];
+    rmSync(tmpHome, { recursive: true, force: true });
+    vi.resetModules();
+  });
+
+  async function loadAdapter(): Promise<ConnectAdapter> {
+    const mod = await import(
+      "../src/cli/connect/copilot-cli.js?t=" + Date.now() + "-" + importCounter++
+    );
+    return (mod as { adapter: ConnectAdapter }).adapter;
+  }
+
+  it("detect() returns false when ~/.copilot doesn't exist", async () => {
+    const a = await loadAdapter();
+    expect(a.detect()).toBe(false);
+  });
+
+  it("install() writes mcpServers.agentmemory into ~/.copilot/mcp-config.json and is idempotent", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+
+    const a = await loadAdapter();
+    expect(a.detect()).toBe(true);
+
+    const first = await a.install({ dryRun: false, force: false });
+    expect(first.kind).toBe("installed");
+
+    const config = JSON.parse(
+      readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+    );
+    expect(config.mcpServers.agentmemory).toEqual({
+      type: "local",
+      ...EXPECTED_COPILOT_MCP_COMMAND,
+      env: {
+        AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+        AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+      },
+      tools: ["*"],
+    });
+
+    const second = await a.install({ dryRun: false, force: false });
+    expect(second.kind).toBe("already-wired");
+  });
+
+  it("honors COPILOT_HOME when locating mcp-config.json", async () => {
+    const customCopilotHome = join(tmpHome, "custom-copilot-home");
+    process.env["COPILOT_HOME"] = customCopilotHome;
+    require("node:fs").mkdirSync(customCopilotHome, { recursive: true });
+
+    const a = await loadAdapter();
+    expect(a.detect()).toBe(true);
+
+    const result = await a.install({ dryRun: false, force: false });
+    expect(result.kind).toBe("installed");
+    expect(result.mutatedPath).toBe(join(customCopilotHome, "mcp-config.json"));
+    expect(existsSync(join(customCopilotHome, "mcp-config.json"))).toBe(true);
+    expect(existsSync(join(tmpHome, ".copilot", "mcp-config.json"))).toBe(false);
+  });
+
+  it("install() preserves unrelated top-level keys and mcpServers entries", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+    writeFileSync(
+      join(tmpHome, ".copilot", "mcp-config.json"),
+      JSON.stringify({
+        otherTopLevel: { keep: true },
+        mcpServers: { other: { type: "local", command: "other" } },
+      }),
+    );
+
+    const a = await loadAdapter();
+    const result = await a.install({ dryRun: false, force: false });
+    expect(result.kind).toBe("installed");
+
+    const config = JSON.parse(
+      readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+    );
+    expect(config.otherTopLevel).toEqual({ keep: true });
+    expect(config.mcpServers.other).toEqual({ type: "local", command: "other" });
+    expect(config.mcpServers.agentmemory.command).toBe(
+      EXPECTED_COPILOT_MCP_COMMAND.command,
+    );
+  });
+
+  it("install() writes env passthrough block for AGENTMEMORY_URL + AGENTMEMORY_SECRET", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+
+    const a = await loadAdapter();
+    const result = await a.install({ dryRun: false, force: false });
+    expect(result.kind).toBe("installed");
+
+    const config = JSON.parse(
+      readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+    );
+    const entry = config.mcpServers.agentmemory;
+    expect(entry.env.AGENTMEMORY_URL).toBe("${AGENTMEMORY_URL}");
+    expect(entry.env.AGENTMEMORY_SECRET).toBe("${AGENTMEMORY_SECRET}");
+  });
+
+  it("install() with --force rewrites even when already wired", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+    writeFileSync(
+      join(tmpHome, ".copilot", "mcp-config.json"),
+      JSON.stringify({
+        mcpServers: {
+          agentmemory: {
+            type: "local",
+            ...EXPECTED_COPILOT_MCP_COMMAND,
+            env: {
+              AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+              AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+            },
+            tools: ["memory_save"],
+          },
+        },
+      }),
+    );
+
+    const a = await loadAdapter();
+    const result = await a.install({ dryRun: false, force: true });
+    expect(result.kind).toBe("installed");
+
+    const config = JSON.parse(
+      readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+    );
+    expect(config.mcpServers.agentmemory.tools).toEqual(["*"]);
+  });
+
+  it("install() with --dry-run does not mutate the file", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+    const before = JSON.stringify({ mcpServers: {} });
+    writeFileSync(join(tmpHome, ".copilot", "mcp-config.json"), before);
+
+    const a = await loadAdapter();
+    const result = await a.install({ dryRun: true, force: false });
+    expect(result.kind).toBe("installed");
+
+    const after = readFileSync(
+      join(tmpHome, ".copilot", "mcp-config.json"),
+      "utf-8",
+    );
+    expect(after).toBe(before);
+  });
+
+  it("install() creates a backup file when config pre-exists", async () => {
+    require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+    writeFileSync(
+      join(tmpHome, ".copilot", "mcp-config.json"),
+      JSON.stringify({ mcpServers: {} }),
+    );
+
+    const a = await loadAdapter();
+    const result = await a.install({ dryRun: false, force: false });
+    expect(result.kind).toBe("installed");
+    if (result.kind === "installed") {
+      expect(result.backupPath).toBeDefined();
+      expect(existsSync(result.backupPath!)).toBe(true);
+      expect(result.backupPath!).toContain(join(".agentmemory", "backups"));
     }
   });
 });
diff --git a/test/cli-onboarding.test.ts b/test/cli-onboarding.test.ts
new file mode 100644
index 00000000..9779a7e9
--- /dev/null
+++ b/test/cli-onboarding.test.ts
@@ -0,0 +1,94 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+const prompts = vi.hoisted(() => ({
+  note: vi.fn(),
+  multiselect: vi.fn(async () => {
+    throw new Error("interactive multiselect should not run in non-TTY onboarding");
+  }),
+  select: vi.fn(async () => {
+    throw new Error("interactive select should not run in non-TTY onboarding");
+  }),
+  confirm: vi.fn(async () => true),
+  isCancel: vi.fn(() => false),
+  cancel: vi.fn(),
+  log: {
+    warn: vi.fn(),
+    step: vi.fn(),
+    error: vi.fn(),
+  },
+}));
+
+vi.mock("@clack/prompts", () => prompts);
+vi.mock("../src/cli/connect/index.js", () => ({
+  resolveAdapter: vi.fn(),
+  runAdapter: vi.fn(),
+}));
+
+const ORIGINAL_HOME = process.env["HOME"];
+const ORIGINAL_USERPROFILE = process.env["USERPROFILE"];
+const stdinTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdin, "isTTY");
+const stdoutTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdout, "isTTY");
+
+let sandboxHome: string;
+
+function setTTY(value: boolean): void {
+  Object.defineProperty(process.stdin, "isTTY", { value, configurable: true });
+  Object.defineProperty(process.stdout, "isTTY", { value, configurable: true });
+}
+
+function restoreTTY(): void {
+  if (stdinTtyDescriptor) Object.defineProperty(process.stdin, "isTTY", stdinTtyDescriptor);
+  else delete (process.stdin as NodeJS.ReadStream & { isTTY?: boolean }).isTTY;
+  if (stdoutTtyDescriptor) Object.defineProperty(process.stdout, "isTTY", stdoutTtyDescriptor);
+  else delete (process.stdout as NodeJS.WriteStream & { isTTY?: boolean }).isTTY;
+}
+
+async function freshOnboarding() {
+  vi.resetModules();
+  return await import("../src/cli/onboarding.js");
+}
+
+describe("cli onboarding", () => {
+  beforeEach(() => {
+    sandboxHome = mkdtempSync(join(tmpdir(), "agentmemory-onboarding-"));
+    process.env["HOME"] = sandboxHome;
+    process.env["USERPROFILE"] = sandboxHome;
+    setTTY(false);
+    vi.clearAllMocks();
+  });
+
+  afterEach(() => {
+    restoreTTY();
+    if (ORIGINAL_HOME === undefined) delete process.env["HOME"];
+    else process.env["HOME"] = ORIGINAL_HOME;
+    if (ORIGINAL_USERPROFILE === undefined) delete process.env["USERPROFILE"];
+    else process.env["USERPROFILE"] = ORIGINAL_USERPROFILE;
+    rmSync(sandboxHome, { recursive: true, force: true });
+  });
+
+  it("does not prompt and records default preferences when onboarding runs without a TTY", async () => {
+    const { runOnboarding } = await freshOnboarding();
+
+    const result = await runOnboarding();
+
+    expect(result).toEqual({ agents: [], provider: null });
+    expect(prompts.multiselect).not.toHaveBeenCalled();
+    expect(prompts.select).not.toHaveBeenCalled();
+    expect(prompts.confirm).not.toHaveBeenCalled();
+
+    const preferencesPath = join(sandboxHome, ".agentmemory", "preferences.json");
+    expect(existsSync(preferencesPath)).toBe(true);
+    const preferences = JSON.parse(readFileSync(preferencesPath, "utf-8"));
+    expect(preferences).toMatchObject({
+      schemaVersion: 1,
+      lastAgent: null,
+      lastAgents: [],
+      lastProvider: null,
+      skipSplash: true,
+    });
+    expect(typeof preferences.firstRunAt).toBe("string");
+  });
+});
diff --git a/test/codex-connect-hooks.test.ts b/test/codex-connect-hooks.test.ts
new file mode 100644
index 00000000..75accbee
--- /dev/null
+++ b/test/codex-connect-hooks.test.ts
@@ -0,0 +1,137 @@
+import { describe, it, expect } from "vitest";
+import { writeFileSync, readFileSync, mkdirSync, rmSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  buildMergedHooks,
+  findPluginRoot,
+  type HookManifest,
+} from "../src/cli/connect/codex-hooks.js";
+
+const PLUGIN_ROOT = resolve(__dirname, "..", "plugin");
+
+describe("findPluginRoot", () => {
+  it("locates the bundled plugin/ directory from src/cli/connect/", () => {
+    const root = findPluginRoot();
+    expect(root).toBe(PLUGIN_ROOT);
+  });
+});
+
+describe("buildMergedHooks", () => {
+  it("rewrites ${CLAUDE_PLUGIN_ROOT} to absolute pluginRoot in every command", () => {
+    const merged = buildMergedHooks(null, PLUGIN_ROOT);
+    for (const entries of Object.values(merged.hooks)) {
+      for (const entry of entries) {
+        for (const handler of entry.hooks) {
+          expect(handler.command).not.toContain("${CLAUDE_PLUGIN_ROOT}");
+          expect(handler.command).toContain(`${PLUGIN_ROOT}/scripts/`);
+        }
+      }
+    }
+  });
+
+  it("preserves matchers from the bundled manifest (e.g. PreToolUse)", () => {
+    const merged = buildMergedHooks(null, PLUGIN_ROOT);
+    const preToolUse = merged.hooks["PreToolUse"];
+    expect(preToolUse).toBeDefined();
+    expect(preToolUse!.length).toBeGreaterThan(0);
+    expect(preToolUse![0].matcher).toBe("Edit|Write|Read|Glob|Grep");
+  });
+
+  it("includes all six expected lifecycle events", () => {
+    const merged = buildMergedHooks(null, PLUGIN_ROOT);
+    for (const event of [
+      "SessionStart",
+      "UserPromptSubmit",
+      "PreToolUse",
+      "PostToolUse",
+      "PreCompact",
+      "Stop",
+    ]) {
+      expect(Object.keys(merged.hooks)).toContain(event);
+    }
+  });
+
+  it("appends to existing user hooks without dropping them", () => {
+    const existing: HookManifest = {
+      hooks: {
+        SessionStart: [
+          {
+            hooks: [{ type: "command", command: "echo user-custom" }],
+          },
+        ],
+        UserPromptSubmit: [
+          {
+            hooks: [{ type: "command", command: "echo another-user-hook" }],
+          },
+        ],
+      },
+    };
+    const merged = buildMergedHooks(existing, PLUGIN_ROOT);
+    const sessionStart = merged.hooks["SessionStart"]!;
+    const userHook = sessionStart.find((e) =>
+      e.hooks.some((h) => h.command === "echo user-custom"),
+    );
+    expect(userHook, "user's SessionStart hook should survive").toBeDefined();
+    const ours = sessionStart.find((e) =>
+      e.hooks.some((h) => h.command.includes(`${PLUGIN_ROOT}/scripts/session-start.mjs`)),
+    );
+    expect(ours, "agentmemory SessionStart hook should be appended").toBeDefined();
+  });
+
+  it("re-install strips previous agentmemory entries (idempotent by script path)", () => {
+    const first = buildMergedHooks(null, PLUGIN_ROOT);
+    const second = buildMergedHooks(first, PLUGIN_ROOT);
+    for (const event of Object.keys(first.hooks)) {
+      expect(
+        second.hooks[event]!.length,
+        `${event} should not double after second install`,
+      ).toBe(first.hooks[event]!.length);
+    }
+  });
+
+  it("re-install preserves unrelated user entries", () => {
+    const userEntry = {
+      hooks: [{ type: "command", command: "echo user-untouchable" }],
+    };
+    const withUser: HookManifest = {
+      hooks: {
+        SessionStart: [userEntry],
+        Stop: [{ hooks: [{ type: "command", command: "echo also-user" }] }],
+      },
+    };
+    const installed = buildMergedHooks(withUser, PLUGIN_ROOT);
+    const reinstalled = buildMergedHooks(installed, PLUGIN_ROOT);
+    expect(
+      reinstalled.hooks["SessionStart"]!.some((e) =>
+        e.hooks.some((h) => h.command === "echo user-untouchable"),
+      ),
+    ).toBe(true);
+    expect(
+      reinstalled.hooks["Stop"]!.some((e) =>
+        e.hooks.some((h) => h.command === "echo also-user"),
+      ),
+    ).toBe(true);
+  });
+
+  it("handles empty existing manifest object", () => {
+    const merged = buildMergedHooks({ hooks: {} }, PLUGIN_ROOT);
+    expect(Object.keys(merged.hooks).length).toBeGreaterThan(0);
+  });
+});
+
+describe("buildMergedHooks file round-trip", () => {
+  it("produces JSON that parses back to a structurally equivalent manifest", () => {
+    const dir = join(tmpdir(), `agentmemory-codex-hooks-${process.pid}-${Date.now()}`);
+    mkdirSync(dir, { recursive: true });
+    const path = join(dir, "hooks.json");
+    try {
+      const merged = buildMergedHooks(null, PLUGIN_ROOT);
+      writeFileSync(path, `${JSON.stringify(merged, null, 2)}\n`, "utf-8");
+      const reread = JSON.parse(readFileSync(path, "utf-8")) as HookManifest;
+      expect(Object.keys(reread.hooks).sort()).toEqual(Object.keys(merged.hooks).sort());
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/test/codex-plugin.test.ts b/test/codex-plugin.test.ts
index bb380876..bbbd88db 100644
--- a/test/codex-plugin.test.ts
+++ b/test/codex-plugin.test.ts
@@ -9,6 +9,29 @@ function readJson<T = unknown>(path: string): T {
   return JSON.parse(readFileSync(path, "utf-8")) as T;
 }
 
+type HookHandler = { type: string; command: string };
+type HookEntry = { hooks: HookHandler[] };
+
+function hookCommands(path: string): string[] {
+  const manifest = readJson<{ hooks: Record<string, HookEntry[]> }>(path);
+  return Object.values(manifest.hooks).flatMap((entries) =>
+    entries.flatMap((entry) => entry.hooks.map((handler) => handler.command)),
+  );
+}
+
+describe("Plugin hook manifests", () => {
+  it("quote plugin script paths so roots with spaces stay intact", () => {
+    for (const manifest of ["hooks.json", "hooks.codex.json"]) {
+      const commands = hookCommands(join(pluginRoot, "hooks", manifest));
+      expect(commands.length, `${manifest} should contain hook commands`).toBeGreaterThan(0);
+
+      for (const command of commands) {
+        expect(command).toMatch(/^node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/[^\s"]+\.mjs"$/);
+      }
+    }
+  });
+});
+
 describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
   it("ships .codex-plugin/plugin.json with kebab-case name + version + references", () => {
     const manifestPath = join(pluginRoot, ".codex-plugin/plugin.json");
@@ -72,8 +95,6 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
   });
 
   it("hook command scripts referenced in hooks.codex.json exist on disk", () => {
-    type HookHandler = { type: string; command: string };
-    type HookEntry = { hooks: HookHandler[] };
     const hooks = readJson<{ hooks: Record<string, HookEntry[]> }>(
       join(pluginRoot, "hooks/hooks.codex.json"),
     );
@@ -81,7 +102,7 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
     for (const entries of Object.values(hooks.hooks)) {
       for (const entry of entries) {
         for (const handler of entry.hooks) {
-          const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s]+)/);
+          const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s"]+)/);
           if (match) scriptRefs.add(match[1]);
         }
       }
diff --git a/test/copilot-plugin.test.ts b/test/copilot-plugin.test.ts
new file mode 100644
index 00000000..e4121688
--- /dev/null
+++ b/test/copilot-plugin.test.ts
@@ -0,0 +1,377 @@
+import { describe, expect, it } from "vitest";
+import { readFileSync, existsSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { createServer } from "node:http";
+import { spawn } from "node:child_process";
+
+const repoRoot = resolve(__dirname, "..");
+const pluginRoot = join(repoRoot, "plugin");
+
+function readJson<T = unknown>(path: string): T {
+  return JSON.parse(readFileSync(path, "utf-8")) as T;
+}
+
+const SUPPORTED_COPILOT_EVENTS = new Set([
+  "sessionStart",
+  "userPromptSubmitted",
+  "preToolUse",
+  "postToolUse",
+  "postToolUseFailure",
+  "preCompact",
+  "agentStop",
+  "sessionEnd",
+  "subagentStart",
+  "subagentStop",
+  "notification",
+]);
+
+const REQUIRED_MINIMUM_EVENTS = [
+  "sessionStart",
+  "userPromptSubmitted",
+  "preToolUse",
+  "postToolUse",
+  "agentStop",
+];
+
+const KNOWN_SKILL_DIRS = [
+  "recall",
+  "remember",
+  "session-history",
+  "forget",
+  "handoff",
+  "recap",
+  "commit-context",
+  "commit-history",
+];
+
+describe("Copilot plugin manifest (plugin/plugin.json)", () => {
+  it("manifest exists with kebab-case name, version, and required fields", () => {
+    const manifestPath = join(pluginRoot, "plugin.json");
+    expect(existsSync(manifestPath)).toBe(true);
+    const manifest = readJson<{
+      name: string;
+      version: string;
+      description?: string;
+      skills?: string;
+      mcpServers?: string;
+      hooks?: string;
+    }>(manifestPath);
+    expect(manifest.name).toBe("agentmemory");
+    expect(manifest.name).toMatch(/^[a-z][a-z0-9-]*$/);
+    expect(manifest.version).toMatch(/^\d+\.\d+\.\d+/);
+    expect(manifest.skills).toBeDefined();
+    expect(manifest.mcpServers).toBeDefined();
+    expect(manifest.hooks).toBeDefined();
+  });
+
+  it("manifest version matches main package.json", () => {
+    const pkgVer = readJson<{ version: string }>(join(repoRoot, "package.json")).version;
+    const pluginVer = readJson<{ version: string }>(
+      join(pluginRoot, "plugin.json"),
+    ).version;
+    expect(pluginVer).toBe(pkgVer);
+  });
+
+  it("all referenced manifest paths resolve to existing files / directories", () => {
+    const manifest = readJson<{ skills: string; mcpServers: string; hooks: string }>(
+      join(pluginRoot, "plugin.json"),
+    );
+    const manifestDir = pluginRoot;
+    expect(existsSync(resolve(manifestDir, manifest.skills))).toBe(true);
+    expect(existsSync(resolve(manifestDir, manifest.mcpServers))).toBe(true);
+    expect(existsSync(resolve(manifestDir, manifest.hooks))).toBe(true);
+  });
+
+  it("skills path resolves and contains all known skill directories", () => {
+    const manifest = readJson<{ skills: string }>(join(pluginRoot, "plugin.json"));
+    const manifestDir = pluginRoot;
+    const skillsPath = resolve(manifestDir, manifest.skills);
+    for (const skill of KNOWN_SKILL_DIRS) {
+      expect(
+        existsSync(join(skillsPath, skill)),
+        `missing skill directory: ${skill}`,
+      ).toBe(true);
+    }
+  });
+});
+
+describe("Copilot MCP config (.mcp.copilot.json)", () => {
+  it("file exists with expected shape", () => {
+    const mcpPath = join(pluginRoot, ".mcp.copilot.json");
+    expect(existsSync(mcpPath)).toBe(true);
+    const config = readJson<{
+      mcpServers: {
+        agentmemory: {
+          type: string;
+          command: string;
+          args: string[];
+          env: Record<string, string>;
+          tools: string[];
+        };
+      };
+    }>(mcpPath);
+    const server = config.mcpServers.agentmemory;
+    expect(server.type).toBe("local");
+    expect(server.command).toBe("npx");
+    expect(server.args).toEqual(["-y", "@agentmemory/mcp"]);
+    expect(server.env["AGENTMEMORY_URL"]).toBe("${AGENTMEMORY_URL}");
+    expect(server.env["AGENTMEMORY_SECRET"]).toBe("${AGENTMEMORY_SECRET}");
+    expect(server.tools).toContain("*");
+  });
+});
+
+describe("Copilot hooks config (hooks/hooks.copilot.json)", () => {
+  type HookEntry = {
+    type: string;
+    command?: string;
+    bash?: string;
+    powershell?: string;
+    matcher?: string;
+  };
+
+  function loadHooks() {
+    return readJson<{ version: number; hooks: Record<string, HookEntry[]> }>(
+      join(pluginRoot, "hooks/hooks.copilot.json"),
+    );
+  }
+
+  it("has top-level version === 1 and hooks object", () => {
+    const config = loadHooks();
+    expect(config.version).toBe(1);
+    expect(config.hooks).toBeDefined();
+    expect(typeof config.hooks).toBe("object");
+  });
+
+  it("contains only supported Copilot event names", () => {
+    const config = loadHooks();
+    for (const event of Object.keys(config.hooks)) {
+      expect(
+        SUPPORTED_COPILOT_EVENTS.has(event),
+        `unsupported event "${event}" in hooks.copilot.json`,
+      ).toBe(true);
+    }
+  });
+
+  it("contains all required minimum events", () => {
+    const config = loadHooks();
+    const events = Object.keys(config.hooks);
+    for (const event of REQUIRED_MINIMUM_EVENTS) {
+      expect(events, `missing required event: ${event}`).toContain(event);
+    }
+  });
+
+  it("PreToolUse entry has the correct matcher", () => {
+    const config = loadHooks();
+    const preToolEntries = config.hooks["preToolUse"];
+    expect(preToolEntries).toBeDefined();
+    const withMatcher = preToolEntries.find(
+      (e) => e.matcher === "edit|write|create|read|view|glob|grep",
+    );
+    expect(
+      withMatcher,
+      "PreToolUse must have matcher edit|write|create|read|view|glob|grep",
+    ).toBeDefined();
+  });
+
+  it("every handler has type === 'command' and exactly one of command/bash/powershell", () => {
+    const config = loadHooks();
+    for (const [event, entries] of Object.entries(config.hooks)) {
+      for (const handler of entries) {
+        expect(handler.type, `${event} handler type`).toBe("command");
+        const commandFields = [handler.command, handler.bash, handler.powershell].filter(
+          (v): v is string => typeof v === "string" && v.trim().length > 0,
+        );
+        expect(
+          commandFields.length,
+          `${event} handler must have exactly one of command/bash/powershell`,
+        ).toBe(1);
+      }
+    }
+  });
+
+  it("every referenced script exists on disk", () => {
+    const config = loadHooks();
+    const scriptRefs = new Set<string>();
+    for (const entries of Object.values(config.hooks)) {
+      for (const handler of entries) {
+        const cmd = handler.command ?? handler.bash ?? handler.powershell ?? "";
+        const match = cmd.match(/\$\{(?:COPILOT_PLUGIN_ROOT|CLAUDE_PLUGIN_ROOT)\}\/(scripts\/[^\s]+)/);
+        if (match) scriptRefs.add(match[1]);
+      }
+    }
+    expect(scriptRefs.size).toBeGreaterThan(0);
+    for (const rel of scriptRefs) {
+      expect(existsSync(join(pluginRoot, rel)), `missing hook script: ${rel}`).toBe(true);
+    }
+  });
+});
+
+describe("Copilot hook scripts", () => {
+  type ObservedRequest = { path: string; body: Record<string, unknown> };
+
+  async function runHook(
+    script: string,
+    payload: Record<string, unknown>,
+    env: Record<string, string> = {},
+  ): Promise<{ requests: ObservedRequest[]; stdout: string }> {
+    const requests: ObservedRequest[] = [];
+    const server = createServer((req, res) => {
+      let raw = "";
+      req.on("data", (chunk) => {
+        raw += chunk;
+      });
+      req.on("end", () => {
+        requests.push({
+          path: req.url ?? "",
+          body: raw ? (JSON.parse(raw) as Record<string, unknown>) : {},
+        });
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ context: "remembered context" }));
+      });
+    });
+
+    await new Promise<void>((resolveServer) => {
+      server.listen(0, "127.0.0.1", resolveServer);
+    });
+
+    const address = server.address();
+    if (!address || typeof address === "string") {
+      server.close();
+      throw new Error("test server did not bind to a TCP port");
+    }
+
+    try {
+      const child = spawn(process.execPath, [join(pluginRoot, script)], {
+        env: {
+          ...process.env,
+          AGENTMEMORY_URL: `http://127.0.0.1:${address.port}`,
+          AGENTMEMORY_SECRET: "",
+          ...env,
+        },
+        stdio: ["pipe", "pipe", "pipe"],
+      });
+      let stdout = "";
+      let stderr = "";
+      child.stdout.on("data", (chunk) => {
+        stdout += chunk;
+      });
+      child.stderr.on("data", (chunk) => {
+        stderr += chunk;
+      });
+      child.stdin.end(JSON.stringify(payload));
+
+      const exitCode = await new Promise<number | null>((resolveExit, reject) => {
+        const timeout = setTimeout(() => {
+          child.kill();
+          reject(new Error(`hook ${script} timed out`));
+        }, 5000);
+        child.on("error", reject);
+        child.on("close", (code) => {
+          clearTimeout(timeout);
+          resolveExit(code);
+        });
+      });
+
+      expect(exitCode, stderr).toBe(0);
+      return { requests, stdout };
+    } finally {
+      await new Promise<void>((resolveClose) => {
+        server.close(() => resolveClose());
+      });
+    }
+  }
+
+  it("session-start accepts Copilot camelCase sessionId", async () => {
+    const result = await runHook(
+      "scripts/session-start.mjs",
+      { sessionId: "copilot-session", cwd: "C:\\repo" },
+      { AGENTMEMORY_INJECT_CONTEXT: "true" },
+    );
+
+    expect(result.stdout).toBe("remembered context");
+    expect(result.requests[0]?.path).toBe("/agentmemory/session/start");
+    expect(result.requests[0]?.body).toMatchObject({
+      sessionId: "copilot-session",
+      project: "C:\\repo",
+      cwd: "C:\\repo",
+    });
+  });
+
+  it("pre-tool-use narrows Copilot sessionId to strings", async () => {
+    const result = await runHook(
+      "scripts/pre-tool-use.mjs",
+      {
+        sessionId: 123,
+        toolName: "read",
+        toolArgs: { path: "src/index.ts" },
+      },
+      { AGENTMEMORY_INJECT_CONTEXT: "true" },
+    );
+
+    expect(result.stdout).toBe("remembered context");
+    expect(result.requests[0]?.path).toBe("/agentmemory/enrich");
+    expect(result.requests[0]?.body).toMatchObject({
+      sessionId: "unknown",
+      files: ["src/index.ts"],
+      terms: [],
+      toolName: "read",
+    });
+  });
+
+  it("prompt-submit accepts Copilot camelCase prompt payload", async () => {
+    const result = await runHook("scripts/prompt-submit.mjs", {
+      sessionId: "copilot-session",
+      cwd: "C:\\repo",
+      userPrompt: "remember this prompt",
+    });
+
+    expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+    expect(result.requests[0]?.body).toMatchObject({
+      hookType: "prompt_submit",
+      sessionId: "copilot-session",
+      data: { prompt: "remember this prompt" },
+    });
+  });
+
+  it("post-tool-failure accepts Copilot camelCase tool and error payloads", async () => {
+    const result = await runHook("scripts/post-tool-failure.mjs", {
+      sessionId: "copilot-session",
+      cwd: "C:\\repo",
+      toolName: "edit",
+      toolArgs: { filePath: "src/index.ts" },
+      errorMessage: "failed",
+    });
+
+    expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+    expect(result.requests[0]?.body).toMatchObject({
+      hookType: "post_tool_failure",
+      sessionId: "copilot-session",
+      data: {
+        tool_name: "edit",
+        tool_input: JSON.stringify({ filePath: "src/index.ts" }),
+        error: "failed",
+      },
+    });
+  });
+
+  it("notification accepts Copilot camelCase notificationType", async () => {
+    const result = await runHook("scripts/notification.mjs", {
+      sessionId: "copilot-session",
+      cwd: "C:\\repo",
+      notificationType: "permission_prompt",
+      title: "Tool approval",
+      message: "Approve edit",
+    });
+
+    expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+    expect(result.requests[0]?.body).toMatchObject({
+      hookType: "notification",
+      sessionId: "copilot-session",
+      data: {
+        notification_type: "permission_prompt",
+        title: "Tool approval",
+        message: "Approve edit",
+      },
+    });
+  });
+});
diff --git a/test/diagnostics.test.ts b/test/diagnostics.test.ts
index d2dc706e..053e1c40 100644
--- a/test/diagnostics.test.ts
+++ b/test/diagnostics.test.ts
@@ -195,7 +195,10 @@ describe("Diagnostics Functions", () => {
       };
 
       expect(result.success).toBe(true);
-      expect(result.summary.pass).toBe(8);
+      // 14 = 8 original (actions, leases, sentinels, sketches, signals,
+      // sessions, memories, mesh) + 6 added in #lesson-visibility
+      // (lessons, summaries, semantic, procedural, crystals, insights).
+      expect(result.summary.pass).toBe(14);
       expect(result.summary.warn).toBe(0);
       expect(result.summary.fail).toBe(0);
       expect(result.summary.fixable).toBe(0);
@@ -636,4 +639,229 @@ describe("Diagnostics Functions", () => {
       expect(unchanged!.status).toBe("blocked");
     });
   });
+
+  describe("per-store tally categories (#lesson-visibility)", () => {
+    it("lessons category: passes with valid live lessons + ignores tombstoned", async () => {
+      await kv.set(KV.lessons, "lsn_live", {
+        id: "lsn_live", content: "x", context: "", confidence: 0.8,
+        reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+        createdAt: "", updatedAt: "", decayRate: 0.05,
+      });
+      await kv.set(KV.lessons, "lsn_tomb", {
+        id: "lsn_tomb", content: "x", context: "", confidence: 0.5,
+        reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+        createdAt: "", updatedAt: "", decayRate: 0.05, deleted: true,
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["lessons"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const ok = result.checks.find((c) => c.name === "lessons-ok");
+      expect(ok?.status).toBe("pass");
+      expect(ok?.message).toMatch(/All 1 lessons.*1 tombstoned/);
+    });
+
+    it("lessons category: warns on out-of-range confidence", async () => {
+      await kv.set(KV.lessons, "lsn_bad", {
+        id: "lsn_bad", content: "x", context: "", confidence: 1.5,
+        reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+        createdAt: "", updatedAt: "", decayRate: 0.05,
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["lessons"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("summaries category: warns on missing title", async () => {
+      await kv.set(KV.summaries, "ses_1", {
+        sessionId: "ses_1", project: "p", createdAt: "", title: "",
+        narrative: "n", keyDecisions: [], filesModified: [], concepts: [],
+        observationCount: 1,
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["summaries"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("procedural category: warns on empty steps", async () => {
+      await kv.set(KV.procedural, "proc_1", {
+        id: "proc_1", name: "noop", steps: [], triggerCondition: "x",
+        frequency: 1, sourceSessionIds: [], strength: 0.5,
+        createdAt: "", updatedAt: "",
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["procedural"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("procedural-empty-steps:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("crystals category: warns on empty narrative", async () => {
+      await kv.set(KV.crystals, "cry_1", {
+        id: "cry_1", narrative: "", keyOutcomes: [], filesAffected: [],
+        lessons: [], sourceActionIds: [], createdAt: "",
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["crystals"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("insights category: warns on out-of-range confidence", async () => {
+      await kv.set(KV.insights, "ins_bad", {
+        id: "ins_bad", title: "t", content: "c", confidence: -0.1,
+        reinforcements: 0, sourceConceptCluster: [], sourceMemoryIds: [],
+        sourceLessonIds: [], sourceCrystalIds: [], tags: [],
+        createdAt: "", updatedAt: "", decayRate: 0.05,
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["insights"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("insight-bad-confidence:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("semantic category: warns on out-of-range confidence", async () => {
+      await kv.set(KV.semantic, "sem_bad", {
+        id: "sem_bad", fact: "f", confidence: 2.0, sourceSessionIds: [],
+        sourceMemoryIds: [], accessCount: 0, lastAccessedAt: "",
+        strength: 0, createdAt: "", updatedAt: "",
+      });
+
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["semantic"],
+      })) as { checks: DiagnosticCheck[] };
+
+      const warn = result.checks.find((c) => c.name.startsWith("semantic-bad-confidence:"));
+      expect(warn?.status).toBe("warn");
+    });
+
+    it("categories filter accepts new categories and skips others", async () => {
+      const result = (await sdk.trigger("mem::diagnose", {
+        categories: ["lessons", "summaries"],
+      })) as { checks: DiagnosticCheck[] };
+
+      expect(result.checks.every((c) => c.category === "lessons" || c.category === "summaries")).toBe(true);
+      expect(result.checks.some((c) => c.category === "lessons")).toBe(true);
+      expect(result.checks.some((c) => c.category === "summaries")).toBe(true);
+    });
+
+    describe("defensive row-shape handling (CodeRabbit #473 review)", () => {
+      it("NaN/Infinity confidence on a lesson is flagged as warn, not silently passed", async () => {
+        await kv.set(KV.lessons, "lsn_nan", {
+          id: "lsn_nan", content: "x", context: "", confidence: NaN,
+          reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+          createdAt: "", updatedAt: "", decayRate: 0.05,
+        });
+
+        const result = (await sdk.trigger("mem::diagnose", {
+          categories: ["lessons"],
+        })) as { checks: DiagnosticCheck[] };
+
+        const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:"));
+        expect(warn?.status).toBe("warn");
+      });
+
+      it("non-string summary title doesn't throw — surfaces as warn", async () => {
+        await kv.set(KV.summaries, "ses_bad_title", {
+          sessionId: "ses_bad_title",
+          project: "p",
+          createdAt: "",
+          title: null as unknown as string, // simulate corrupted row
+          narrative: "n",
+          keyDecisions: [],
+          filesModified: [],
+          concepts: [],
+          observationCount: 1,
+        });
+
+        // The bug to guard against: the old code called .trim() unconditionally,
+        // which throws on null/number, which aborts the whole diagnose run and
+        // any later category check never executes. Verify diagnose completes
+        // AND surfaces the bad row.
+        const result = (await sdk.trigger("mem::diagnose", {
+          categories: ["summaries", "lessons"],
+        })) as { checks: DiagnosticCheck[]; success?: boolean };
+
+        expect(result.success).toBe(true);
+        const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:"));
+        expect(warn?.status).toBe("warn");
+        // Later category still ran:
+        expect(result.checks.some((c) => c.category === "lessons")).toBe(true);
+      });
+
+      it("non-string crystal narrative doesn't throw — surfaces as warn", async () => {
+        await kv.set(KV.crystals, "cry_bad", {
+          id: "cry_bad",
+          narrative: undefined as unknown as string,
+          keyOutcomes: [],
+          filesAffected: [],
+          lessons: [],
+          sourceActionIds: [],
+          createdAt: "",
+        });
+
+        const result = (await sdk.trigger("mem::diagnose", {
+          categories: ["crystals"],
+        })) as { checks: DiagnosticCheck[]; success?: boolean };
+
+        expect(result.success).toBe(true);
+        const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:"));
+        expect(warn?.status).toBe("warn");
+      });
+
+      it("Infinity confidence on insight + semantic both flagged", async () => {
+        await kv.set(KV.insights, "ins_inf", {
+          id: "ins_inf",
+          title: "t",
+          content: "c",
+          confidence: Infinity,
+          reinforcements: 0,
+          sourceConceptCluster: [],
+          sourceMemoryIds: [],
+          sourceLessonIds: [],
+          sourceCrystalIds: [],
+          tags: [],
+          createdAt: "",
+          updatedAt: "",
+          decayRate: 0.05,
+        });
+        await kv.set(KV.semantic, "sem_nan", {
+          id: "sem_nan",
+          fact: "f",
+          confidence: NaN,
+          sourceSessionIds: [],
+          sourceMemoryIds: [],
+          accessCount: 0,
+          lastAccessedAt: "",
+          strength: 0,
+          createdAt: "",
+          updatedAt: "",
+        });
+
+        const result = (await sdk.trigger("mem::diagnose", {
+          categories: ["insights", "semantic"],
+        })) as { checks: DiagnosticCheck[] };
+
+        expect(result.checks.find((c) => c.name === "insight-bad-confidence:ins_inf")?.status).toBe("warn");
+        expect(result.checks.find((c) => c.name === "semantic-bad-confidence:sem_nan")?.status).toBe("warn");
+      });
+    });
+  });
 });
diff --git a/test/env-loader.test.ts b/test/env-loader.test.ts
index 9c6f2955..17ff6a8e 100644
--- a/test/env-loader.test.ts
+++ b/test/env-loader.test.ts
@@ -25,6 +25,7 @@ describe("loadEnvFile", () => {
     process.env["HOME"] = sandboxHome;
     process.env["USERPROFILE"] = sandboxHome;
     delete process.env["AGENTMEMORY_AUTO_COMPRESS"];
+    delete process.env["AGENTMEMORY_DROP_STALE_INDEX"];
     delete process.env["CONSOLIDATION_ENABLED"];
     delete process.env["GRAPH_EXTRACTION_ENABLED"];
     delete process.env["TOKEN"];
@@ -82,4 +83,10 @@ describe("loadEnvFile", () => {
     const cfg = await freshConfig();
     expect(cfg.getEnvVar("TOKEN")).toBe("abc");
   });
+
+  it("reads AGENTMEMORY_DROP_STALE_INDEX from the env file", async () => {
+    writeEnv("AGENTMEMORY_DROP_STALE_INDEX=true");
+    const cfg = await freshConfig();
+    expect(cfg.isDropStaleIndexEnabled()).toBe(true);
+  });
 });
diff --git a/test/eval-adapters.test.ts b/test/eval-adapters.test.ts
new file mode 100644
index 00000000..90f914f5
--- /dev/null
+++ b/test/eval-adapters.test.ts
@@ -0,0 +1,92 @@
+import { describe, it, expect } from "vitest";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { grepAdapter } from "../eval/runner/adapters/grep.js";
+import { aggregate, scoreQuestion } from "../eval/runner/score.js";
+import type { Question, Session } from "../eval/runner/types.js";
+
+const DATA_DIR = resolve(__dirname, "..", "eval", "data", "coding-agent-life-v1");
+const sessions = JSON.parse(readFileSync(`${DATA_DIR}/sessions.json`, "utf8")) as Session[];
+const queries = JSON.parse(readFileSync(`${DATA_DIR}/queries.json`, "utf8")) as Array<
+  Omit<Question, "haystack">
+>;
+
+describe("eval scaffold", () => {
+  it("coding-agent-life-v1 corpus is well-formed", () => {
+    expect(sessions.length).toBeGreaterThan(0);
+    expect(queries.length).toBeGreaterThan(0);
+    const sessionIds = new Set(sessions.map((s) => s.id));
+    for (const q of queries) {
+      expect(q.goldSessionIds.length).toBeGreaterThan(0);
+      for (const id of q.goldSessionIds) {
+        expect(sessionIds.has(id)).toBe(true);
+      }
+    }
+  });
+
+  it("grep adapter ranks gold session in top-5 for most queries", async () => {
+    const state = await grepAdapter.init(sessions);
+    let hits = 0;
+    for (const q of queries) {
+      const ranked = await grepAdapter.query(q.question, state, 5);
+      const topIds = new Set(ranked.map((r) => r.sessionId));
+      if (q.goldSessionIds.some((id) => topIds.has(id))) hits += 1;
+    }
+    expect(hits / queries.length).toBeGreaterThan(0.5);
+  });
+
+  it("scoreQuestion computes P@K, R@K, hit, topGoldRank", () => {
+    const q: Question = {
+      id: "test",
+      type: "single-session",
+      question: "?",
+      goldSessionIds: ["a", "b"],
+      haystack: [],
+    };
+    const ranked = [
+      { sessionId: "x", score: 0.9 },
+      { sessionId: "a", score: 0.7 },
+      { sessionId: "y", score: 0.5 },
+      { sessionId: "b", score: 0.3 },
+    ];
+    const row = scoreQuestion(q, ranked, 5, "test", 12);
+    expect(row.hit).toBe(true);
+    expect(row.recallAtK).toBe(1);
+    expect(row.precisionAtK).toBeCloseTo(2 / 5);
+    expect(row.topGoldRank).toBe(2);
+  });
+
+  it("scoreQuestion handles miss", () => {
+    const q: Question = {
+      id: "test",
+      type: "x",
+      question: "?",
+      goldSessionIds: ["a"],
+      haystack: [],
+    };
+    const ranked = [
+      { sessionId: "x", score: 1 },
+      { sessionId: "y", score: 0.5 },
+    ];
+    const row = scoreQuestion(q, ranked, 5, "test", 5);
+    expect(row.hit).toBe(false);
+    expect(row.recallAtK).toBe(0);
+    expect(row.topGoldRank).toBeNull();
+  });
+
+  it("aggregate computes per-adapter and per-type means", () => {
+    const q: Question = {
+      id: "1",
+      type: "t1",
+      question: "?",
+      goldSessionIds: ["a"],
+      haystack: [],
+    };
+    const row1 = scoreQuestion(q, [{ sessionId: "a", score: 1 }], 5, "grep", 10);
+    const row2 = scoreQuestion(q, [{ sessionId: "x", score: 1 }], 5, "grep", 20);
+    const agg = aggregate([row1, row2]);
+    expect(agg.byAdapter.grep.hit).toBe(1);
+    expect(agg.byAdapter.grep.n).toBe(2);
+    expect(agg.byType.t1.grep.n).toBe(2);
+  });
+});
diff --git a/test/export-import.test.ts b/test/export-import.test.ts
index 4426ce8e..373d2518 100644
--- a/test/export-import.test.ts
+++ b/test/export-import.test.ts
@@ -119,7 +119,7 @@ describe("Export/Import Functions", () => {
   it("export produces valid ExportData structure", async () => {
     const result = (await sdk.trigger("mem::export", {})) as ExportData;
 
-    expect(result.version).toBe("0.9.20");
+    expect(result.version).toBe("0.9.21");
     expect(result.exportedAt).toBeDefined();
     expect(result.sessions.length).toBe(1);
     expect(result.sessions[0].id).toBe("ses_1");
diff --git a/test/fs-watcher.test.ts b/test/fs-watcher.test.ts
index 76212b06..48c1b094 100644
--- a/test/fs-watcher.test.ts
+++ b/test/fs-watcher.test.ts
@@ -12,7 +12,7 @@ function wait(ms: number): Promise<void> {
   return new Promise((r) => setTimeout(r, ms));
 }
 
-describe("FilesystemWatcher", () => {
+describe("FilesystemWatcher", { retry: 2 }, () => {
   let root: string;
   const originalFetch = globalThis.fetch;
   let captured: Array<{ url: string; body: unknown; headers: Record<string, string> }>;
@@ -49,7 +49,7 @@ describe("FilesystemWatcher", () => {
     w.start();
     try {
       writeFileSync(join(root, "notes.md"), "hello world\n");
-      await wait(800);
+      await wait(1500);
       expect(captured.length).toBeGreaterThanOrEqual(1);
       const obs = captured[captured.length - 1];
       expect(obs.url).toBe("http://localhost:3111/agentmemory/observe");
@@ -87,7 +87,7 @@ describe("FilesystemWatcher", () => {
     w.start();
     try {
       unlinkSync(join(root, "old.md"));
-      await wait(800);
+      await wait(1500);
       const deletes = captured.filter(
         (c) => (c.body as { data: { changeKind: string } }).data?.changeKind === "file_delete",
       );
@@ -116,7 +116,7 @@ describe("FilesystemWatcher", () => {
     w.start();
     try {
       writeFileSync(join(root, "node_modules", "ignored.js"), "x");
-      await wait(800);
+      await wait(1500);
       const matches = captured.filter((c) =>
         (c.body as { data: { files: string[] } }).data?.files?.some((f) => f.includes("ignored.js")),
       );
@@ -136,7 +136,7 @@ describe("FilesystemWatcher", () => {
     w.start();
     try {
       writeFileSync(join(root, "secret.md"), "bearer test\n");
-      await wait(800);
+      await wait(1500);
       expect(captured.length).toBeGreaterThanOrEqual(1);
       const headers = captured[captured.length - 1].headers as Record<string, string>;
       expect(headers.authorization).toBe("Bearer shhh");
diff --git a/test/hermes-plugin.test.ts b/test/hermes-plugin.test.ts
new file mode 100644
index 00000000..f13f06f3
--- /dev/null
+++ b/test/hermes-plugin.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from "vitest";
+import { readFileSync } from "node:fs";
+
+const expectedHermesHooks = [
+  "prefetch",
+  "sync_turn",
+  "on_session_end",
+  "on_pre_compress",
+  "on_memory_write",
+  "system_prompt_block",
+];
+
+function readHermesPluginHooks(): string[] {
+  const manifest = readFileSync("integrations/hermes/plugin.yaml", "utf8");
+  const hooks: string[] = [];
+  let inHooks = false;
+
+  for (const line of manifest.split(/\r?\n/)) {
+    if (line.trim() === "hooks:") {
+      inHooks = true;
+      continue;
+    }
+    if (!inHooks) continue;
+    if (line.trim() === "") continue;
+    if (!line.startsWith(" ")) break;
+
+    const match = line.match(/^\s*-\s*([A-Za-z_][A-Za-z0-9_]*)\s*$/);
+    if (match) hooks.push(match[1]);
+  }
+
+  return hooks;
+}
+
+function isHermesLifecycleHook(methodName: string): boolean {
+  return (
+    methodName === "prefetch" ||
+    methodName === "sync_turn" ||
+    methodName === "system_prompt_block" ||
+    methodName.startsWith("on_")
+  );
+}
+
+function readAgentMemoryProviderHookMethods(): string[] {
+  const source = readFileSync("integrations/hermes/__init__.py", "utf8");
+  const methods: string[] = [];
+  const providerMethodPattern = /^    def ([a-z_][a-z0-9_]*)\(/gm;
+
+  for (const match of source.matchAll(providerMethodPattern)) {
+    const methodName = match[1];
+    if (isHermesLifecycleHook(methodName)) methods.push(methodName);
+  }
+
+  return methods;
+}
+
+describe("Hermes plugin manifest", () => {
+  it("declares every implemented lifecycle hook", () => {
+    const declaredHooks = readHermesPluginHooks();
+    const implementedHooks = readAgentMemoryProviderHookMethods();
+
+    expect([...declaredHooks].sort()).toEqual([...implementedHooks].sort());
+    expect(declaredHooks).toEqual(expectedHermesHooks);
+  });
+});
diff --git a/test/mcp-standalone-proxy.test.ts b/test/mcp-standalone-proxy.test.ts
index 0d93b227..dc08a024 100644
--- a/test/mcp-standalone-proxy.test.ts
+++ b/test/mcp-standalone-proxy.test.ts
@@ -75,6 +75,61 @@ describe("@agentmemory/mcp standalone — server proxy (issue #159)", () => {
     expect(body.results[0].id).toBe("m1");
   });
 
+  it("proxies memory_recall to POST /agentmemory/search and forwards format/token_budget (#507)", async () => {
+    const calls: Array<{ url: string; body?: unknown }> = [];
+    installFetch((url, init) => {
+      if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 });
+      const body = init?.body ? JSON.parse(init.body as string) : undefined;
+      calls.push({ url, body });
+      if (url.endsWith("/agentmemory/search")) {
+        return new Response(
+          JSON.stringify({
+            mode: "full",
+            facts: [{ id: "m1" }],
+            narrative: "n",
+            concepts: ["c"],
+            files: ["f"],
+          }),
+          { status: 200, headers: { "content-type": "application/json" } },
+        );
+      }
+      return new Response("not found", { status: 404 });
+    });
+    const res = await handleToolCall("memory_recall", {
+      query: "auth bug",
+      limit: 5,
+      format: "full",
+      token_budget: 800,
+    });
+    const body = JSON.parse(res.content[0].text);
+    expect(body.mode).toBe("full");
+    expect(body.facts[0].id).toBe("m1");
+    const searchCall = calls.find((c) => c.url.endsWith("/agentmemory/search"));
+    expect(searchCall).toBeDefined();
+    expect(searchCall?.body).toEqual({
+      query: "auth bug",
+      limit: 5,
+      format: "full",
+      token_budget: 800,
+    });
+    expect(calls.find((c) => c.url.endsWith("/agentmemory/smart-search"))).toBeUndefined();
+  });
+
+  it("memory_recall defaults format to 'full' when omitted (#507)", async () => {
+    let recallBody: Record<string, unknown> | undefined;
+    installFetch((url, init) => {
+      if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 });
+      if (url.endsWith("/agentmemory/search")) {
+        recallBody = init?.body ? JSON.parse(init.body as string) : undefined;
+        return new Response(JSON.stringify({ mode: "full", facts: [] }), { status: 200 });
+      }
+      return new Response("not found", { status: 404 });
+    });
+    await handleToolCall("memory_recall", { query: "x" });
+    expect(recallBody?.["format"]).toBe("full");
+    expect(recallBody).not.toHaveProperty("token_budget");
+  });
+
   it("proxies memory_governance_delete to the DELETE REST endpoint", async () => {
     const calls: Array<{ url: string; method: string; body?: unknown }> = [];
     installFetch((url, init) => {
diff --git a/test/mcp-transport.test.ts b/test/mcp-transport.test.ts
index bb8627dc..006ecc9e 100644
--- a/test/mcp-transport.test.ts
+++ b/test/mcp-transport.test.ts
@@ -1,5 +1,7 @@
 import { describe, it, expect, vi } from "vitest";
 import {
+  createMessageParser,
+  formatResponse,
   processLine,
   type JsonRpcResponse,
   type RequestHandler,
@@ -227,3 +229,47 @@ describe("processLine — id type validation (JSON-RPC §4)", () => {
     expect(c.out[0].result).toEqual({ method: "ping" });
   });
 });
+
+describe("stdio framing", () => {
+  it("parses Content-Length framed MCP messages split across chunks", () => {
+    const messages: string[] = [];
+    const parser = createMessageParser((message) => messages.push(message));
+    const body = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "initialize" });
+    const framed = `Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n${body}`;
+
+    parser.push(framed.slice(0, 12));
+    parser.push(framed.slice(12));
+
+    expect(messages).toEqual([body]);
+    expect(parser.isFramed()).toBe(true);
+  });
+
+  it("parses newline-delimited JSON for existing clients", () => {
+    const messages: string[] = [];
+    const parser = createMessageParser((message) => messages.push(message));
+    const first = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/list" });
+    const second = JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized" });
+
+    parser.push(`${first}\n${second}\n`);
+
+    expect(messages).toEqual([first, second]);
+    expect(parser.isFramed()).toBe(false);
+  });
+
+  it("formats responses with Content-Length framing when requested", () => {
+    const response: JsonRpcResponse = {
+      jsonrpc: "2.0",
+      id: 1,
+      result: { ok: true },
+    };
+    const formatted = formatResponse(response, true);
+
+    expect(Array.isArray(formatted)).toBe(true);
+    if (!Array.isArray(formatted)) throw new Error("expected framed response");
+    const header = formatted[0].toString("ascii");
+    const body = formatted[1].toString("utf8");
+
+    expect(header).toBe(`Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n`);
+    expect(JSON.parse(body)).toEqual(response);
+  });
+});
diff --git a/test/onboarding.test.ts b/test/onboarding.test.ts
new file mode 100644
index 00000000..053085b8
--- /dev/null
+++ b/test/onboarding.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, it } from "vitest";
+
+import { buildAgentOptions, getInitialAgentValues } from "../src/cli/onboarding.js";
+
+describe("first-run onboarding", () => {
+  it("offers GitHub Copilot CLI as a native setup target", () => {
+    const options = buildAgentOptions();
+    expect(options).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          value: "copilot-cli",
+          label: expect.stringContaining("GitHub Copilot CLI"),
+          hint: "native plugin",
+        }),
+      ]),
+    );
+  });
+
+  it("selects GitHub Copilot CLI by default when running inside Copilot CLI", () => {
+    expect(getInitialAgentValues({ COPILOT_CLI: "1" })).toEqual(["copilot-cli"]);
+    expect(getInitialAgentValues({ COPILOT_AGENT_SESSION_ID: "session" })).toEqual(["copilot-cli"]);
+  });
+
+  it("keeps Claude Code as the default outside known agent environments", () => {
+    expect(getInitialAgentValues({})).toEqual(["claude-code"]);
+  });
+});
diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts
index 4f22d1a9..9d0c94e0 100644
--- a/test/smart-search.test.ts
+++ b/test/smart-search.test.ts
@@ -193,4 +193,102 @@ describe("Smart Search Function", () => {
     } | null;
     expect(log?.count).toBe(1);
   });
+
+  describe("lesson inclusion (#lesson-visibility)", () => {
+    it("compact mode returns lessons array alongside observation results", async () => {
+      sdk.registerFunction("mem::lesson-recall", async (payload: any) => ({
+        success: true,
+        lessons: [
+          { id: "lsn_a", content: "always rebase before push", confidence: 0.9, createdAt: "2026-04-01T00:00:00Z", project: "p", tags: ["git"], score: 0.81 },
+          { id: "lsn_b", content: "never force-push to main", confidence: 0.95, createdAt: "2026-04-02T00:00:00Z", project: "p", tags: ["git"], score: 0.76 },
+        ],
+      }));
+
+      const result = (await sdk.trigger("mem::smart-search", {
+        query: "rebase",
+      })) as { mode: string; results: CompactSearchResult[]; lessons?: any[] };
+
+      expect(result.mode).toBe("compact");
+      expect(result.results.length).toBe(2); // observations unchanged
+      expect(result.lessons).toBeDefined();
+      expect(result.lessons!.length).toBe(2);
+      expect(result.lessons![0]).toMatchObject({
+        lessonId: "lsn_a",
+        confidence: 0.9,
+        score: 0.81,
+      });
+      expect(result.lessons![0].tags).toEqual(["git"]);
+    });
+
+    it("compact mode truncates long lesson content for preview", async () => {
+      const long = "x".repeat(500);
+      sdk.registerFunction("mem::lesson-recall", async () => ({
+        success: true,
+        lessons: [{ id: "lsn_long", content: long, confidence: 0.5, createdAt: "", tags: [], score: 0.4 }],
+      }));
+
+      const result = (await sdk.trigger("mem::smart-search", {
+        query: "x",
+      })) as { lessons: any[] };
+
+      expect(result.lessons[0].content.length).toBeLessThan(long.length);
+      expect(result.lessons[0].content).toMatch(/…$/);
+    });
+
+    it("includeLessons:false omits the lessons array entirely", async () => {
+      // No lesson-recall handler registered — would throw if invoked.
+      const result = (await sdk.trigger("mem::smart-search", {
+        query: "auth",
+        includeLessons: false,
+      })) as { mode: string; results: CompactSearchResult[]; lessons?: unknown };
+
+      expect(result.results.length).toBe(2);
+      expect(result.lessons).toBeUndefined();
+    });
+
+    it("forwards project filter to mem::lesson-recall", async () => {
+      let receivedPayload: any = null;
+      sdk.registerFunction("mem::lesson-recall", async (payload: any) => {
+        receivedPayload = payload;
+        return { success: true, lessons: [] };
+      });
+
+      await sdk.trigger("mem::smart-search", {
+        query: "rebase",
+        project: "gitops-assistant",
+      });
+
+      expect(receivedPayload).toMatchObject({
+        query: "rebase",
+        project: "gitops-assistant",
+      });
+    });
+
+    it("tolerates mem::lesson-recall failure: returns empty lessons, observations unchanged", async () => {
+      sdk.registerFunction("mem::lesson-recall", async () => {
+        throw new Error("lessons store unavailable");
+      });
+
+      const result = (await sdk.trigger("mem::smart-search", {
+        query: "auth",
+      })) as { results: CompactSearchResult[]; lessons: any[] };
+
+      expect(result.results.length).toBe(2);
+      expect(result.lessons).toEqual([]);
+    });
+
+    it("tolerates non-success lesson-recall response shape", async () => {
+      sdk.registerFunction("mem::lesson-recall", async () => ({
+        success: false,
+        error: "query is required",
+      }));
+
+      const result = (await sdk.trigger("mem::smart-search", {
+        query: "auth",
+      })) as { results: CompactSearchResult[]; lessons: any[] };
+
+      expect(result.results.length).toBe(2);
+      expect(result.lessons).toEqual([]);
+    });
+  });
 });
diff --git a/test/summarize.test.ts b/test/summarize.test.ts
new file mode 100644
index 00000000..03aa1926
--- /dev/null
+++ b/test/summarize.test.ts
@@ -0,0 +1,417 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+
+vi.mock("../src/logger.js", () => ({
+  logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+vi.mock("../src/state/schema.js", () => ({
+  KV: {
+    sessions: "sessions",
+    summaries: "summaries",
+    observations: (sessionId: string) => `obs:${sessionId}`,
+    audit: "audit",
+  },
+}));
+
+vi.mock("../src/eval/schemas.js", () => ({
+  SummaryOutputSchema: {},
+}));
+
+vi.mock("../src/eval/validator.js", () => ({
+  validateOutput: () => ({ valid: true, result: { errors: [] } }),
+}));
+
+vi.mock("../src/eval/quality.js", () => ({
+  scoreSummary: () => 100,
+}));
+
+vi.mock("../src/functions/audit.js", () => ({
+  safeAudit: vi.fn(),
+}));
+
+import { registerSummarizeFunction } from "../src/functions/summarize.js";
+import type {
+  CompressedObservation,
+  Session,
+  MemoryProvider,
+} from "../src/types.js";
+
+function mockKV() {
+  const store = new Map<string, Map<string, unknown>>();
+  return {
+    store,
+    get: async <T>(scope: string, key: string): Promise<T | null> =>
+      (store.get(scope)?.get(key) as T) ?? null,
+    set: async <T>(scope: string, key: string, data: T): Promise<T> => {
+      if (!store.has(scope)) store.set(scope, new Map());
+      store.get(scope)!.set(key, data);
+      return data;
+    },
+    delete: async (scope: string, key: string): Promise<void> => {
+      store.get(scope)?.delete(key);
+    },
+    list: async <T>(scope: string): Promise<T[]> => {
+      const entries = store.get(scope);
+      return entries ? (Array.from(entries.values()) as T[]) : [];
+    },
+  };
+}
+
+function mockSdk() {
+  const functions = new Map<string, Function>();
+  return {
+    functions,
+    registerFunction: (id: string, handler: Function) => {
+      functions.set(id, handler);
+    },
+    registerTrigger: () => {},
+    trigger: async () => ({}),
+  };
+}
+
+function makeObs(i: number, sessionId: string): CompressedObservation {
+  return {
+    id: `obs_${i}`,
+    sessionId,
+    timestamp: new Date().toISOString(),
+    type: "conversation",
+    title: `obs ${i}`,
+    facts: [`fact ${i}`],
+    narrative: `narrative for obs ${i}`,
+    concepts: [],
+    files: [`src/file_${i}.ts`],
+    importance: 5,
+  };
+}
+
+function makeProvider(responses: string[]): MemoryProvider & {
+  calls: Array<{ system: string; user: string }>;
+} {
+  const calls: Array<{ system: string; user: string }> = [];
+  let i = 0;
+  return {
+    name: "test",
+    calls,
+    compress: async () => "",
+    summarize: async (system: string, user: string) => {
+      calls.push({ system, user });
+      const r = responses[i] ?? responses[responses.length - 1];
+      i += 1;
+      return r;
+    },
+  };
+}
+
+function summaryXml(opts: {
+  title: string;
+  narrative?: string;
+  decisions?: string[];
+  files?: string[];
+  concepts?: string[];
+}): string {
+  const d = (opts.decisions ?? []).map((x) => `<decision>${x}</decision>`).join("");
+  const f = (opts.files ?? []).map((x) => `<file>${x}</file>`).join("");
+  const c = (opts.concepts ?? []).map((x) => `<concept>${x}</concept>`).join("");
+  return `<summary>
+<title>${opts.title}</title>
+<narrative>${opts.narrative ?? "narrative"}</narrative>
+<decisions>${d}</decisions>
+<files>${f}</files>
+<concepts>${c}</concepts>
+</summary>`;
+}
+
+async function setupHandler(opts: {
+  sessionId: string;
+  obsCount: number;
+  provider: MemoryProvider;
+}) {
+  const sdk = mockSdk();
+  const kv = mockKV();
+  const session: Session = {
+    id: opts.sessionId,
+    project: "test-project",
+    cwd: "/tmp",
+    startedAt: new Date().toISOString(),
+    status: "completed",
+    observationCount: opts.obsCount,
+  };
+  await kv.set("sessions", opts.sessionId, session);
+  for (let i = 0; i < opts.obsCount; i++) {
+    const o = makeObs(i, opts.sessionId);
+    await kv.set(`obs:${opts.sessionId}`, o.id, o);
+  }
+  registerSummarizeFunction(sdk as any, kv as any, opts.provider);
+  const handler = sdk.functions.get("mem::summarize")!;
+  return { handler, kv };
+}
+
+describe("mem::summarize chunking", () => {
+  const ORIGINAL_ENV = { ...process.env };
+
+  beforeEach(() => {
+    delete process.env.SUMMARIZE_CHUNK_SIZE;
+    delete process.env.SUMMARIZE_CHUNK_CONCURRENCY;
+  });
+
+  afterEach(() => {
+    process.env = { ...ORIGINAL_ENV };
+  });
+
+  it("small session takes the single-call path (no chunking, no reduce)", async () => {
+    const provider = makeProvider([
+      summaryXml({
+        title: "Small session",
+        decisions: ["decision A"],
+        files: ["src/a.ts"],
+        concepts: ["concept-a"],
+      }),
+    ]);
+    const { handler, kv } = await setupHandler({
+      sessionId: "ses_small",
+      obsCount: 10,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_small" });
+
+    expect(result.success).toBe(true);
+    expect(provider.calls).toHaveLength(1);
+    expect(provider.calls[0].user).toContain("Session observations (10 total)");
+    const stored: any = await kv.get("summaries", "ses_small");
+    expect(stored?.title).toBe("Small session");
+  });
+
+  it("large session map-reduces: N chunk calls + 1 reduce call", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; // serial keeps call ordering deterministic
+    const provider = makeProvider([
+      summaryXml({ title: "Chunk 1", decisions: ["dA"], files: ["src/a.ts"], concepts: ["ca"] }),
+      summaryXml({ title: "Chunk 2", decisions: ["dB"], files: ["src/b.ts"], concepts: ["cb"] }),
+      summaryXml({ title: "Chunk 3", decisions: ["dC"], files: ["src/c.ts"], concepts: ["cc"] }),
+      summaryXml({
+        title: "Merged",
+        decisions: ["dA", "dB", "dC"],
+        files: ["src/a.ts", "src/b.ts", "src/c.ts"],
+        concepts: ["ca", "cb", "cc"],
+      }),
+    ]);
+    const { handler, kv } = await setupHandler({
+      sessionId: "ses_large",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_large" });
+
+    expect(result.success).toBe(true);
+    expect(provider.calls).toHaveLength(4);
+    // First three are chunk calls (use the summary system prompt).
+    expect(provider.calls[0].system).toContain("session summarizer");
+    expect(provider.calls[2].system).toContain("session summarizer");
+    // Last is the reduce call (uses the merge system prompt).
+    expect(provider.calls[3].system).toContain("merging multiple partial summaries");
+    expect(provider.calls[3].user).toContain("Chunk 1 of 3");
+    expect(provider.calls[3].user).toContain("Chunk 3 of 3");
+
+    const stored: any = await kv.get("summaries", "ses_large");
+    expect(stored?.title).toBe("Merged");
+    // observationCount on the persisted summary should reflect the full session,
+    // not just the final chunk.
+    expect(stored?.observationCount).toBe(250);
+    expect(stored?.keyDecisions).toEqual(["dA", "dB", "dC"]);
+  });
+
+  it("SUMMARIZE_CHUNK_SIZE env override is respected", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "50";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    const provider = makeProvider([
+      summaryXml({ title: "chunk" }),
+      summaryXml({ title: "chunk" }),
+      summaryXml({ title: "chunk" }),
+      summaryXml({ title: "chunk" }),
+      summaryXml({ title: "merged" }),
+    ]);
+    const { handler } = await setupHandler({
+      sessionId: "ses_env",
+      obsCount: 175,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_env" });
+
+    expect(result.success).toBe(true);
+    // 175 obs ÷ 50 = 4 chunks (last chunk has 25) + 1 reduce = 5 calls.
+    expect(provider.calls).toHaveLength(5);
+  });
+
+  it("flaky chunk: parse fails once, retried, then succeeds — no skip", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    const provider = makeProvider([
+      summaryXml({ title: "ok1" }),
+      "<garbage/>",                  // chunk 2 attempt 1: parse-fail
+      summaryXml({ title: "ok2" }),  // chunk 2 attempt 2 (retry): success
+      summaryXml({ title: "ok3" }),
+      summaryXml({ title: "merged" }),
+    ]);
+    const { handler, kv } = await setupHandler({
+      sessionId: "ses_flaky",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_flaky" });
+
+    expect(result.success).toBe(true);
+    // 3 chunks × 1 attempt + 1 retry on chunk 2 + 1 reduce = 5 calls.
+    expect(provider.calls).toHaveLength(5);
+    const stored: any = await kv.get("summaries", "ses_flaky");
+    expect(stored?.title).toBe("merged");
+  });
+
+  it("persistently-broken chunk is skipped, reduce still runs on remaining partials", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    const provider = makeProvider([
+      summaryXml({ title: "ok1" }),
+      "<garbage/>", "<garbage/>",   // chunk 2: both attempts parse-fail
+      summaryXml({ title: "ok3" }),
+      summaryXml({ title: "merged-with-skip" }),
+    ]);
+    const { handler, kv } = await setupHandler({
+      sessionId: "ses_skip",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_skip" });
+
+    expect(result.success).toBe(true);
+    // 1 ok + (1 + 1 retry skip) + 1 ok + 1 reduce = 5 calls.
+    expect(provider.calls).toHaveLength(5);
+    // Reduce input should mention only 2 of 3 chunks (chunk 2 skipped) —
+    // but the chunk indices in the reduce labels should reflect chunk 1 and 3,
+    // preserving chronological boundaries.
+    const reduceCall = provider.calls[4];
+    expect(reduceCall.user).toContain("Chunk 1 of 2");
+    expect(reduceCall.user).toContain("Chunk 2 of 2");
+    expect(reduceCall.user).toContain("obs 1-100");        // first surviving chunk
+    expect(reduceCall.user).toContain("obs 201-250");      // third surviving chunk (was idx 2, range 201-250)
+    const stored: any = await kv.get("summaries", "ses_skip");
+    expect(stored?.title).toBe("merged-with-skip");
+  });
+
+  it("too many skipped chunks bails out with a clear error", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    // 3 chunks, 2 fully broken → >50% skipped → bail.
+    const provider = makeProvider([
+      summaryXml({ title: "ok1" }),
+      "<garbage/>", "<garbage/>",
+      "<garbage/>", "<garbage/>",
+    ]);
+    const { handler } = await setupHandler({
+      sessionId: "ses_too_broken",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_too_broken" });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/too_many_chunks_skipped: 2\/3/);
+  });
+
+  it("provider error on one chunk after retry is skipped, not propagated", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    let i = 0;
+    const provider: MemoryProvider & { calls: any[] } = {
+      name: "test",
+      calls: [],
+      compress: async () => "",
+      summarize: async (system: string, user: string) => {
+        (provider as any).calls.push({ system, user });
+        i += 1;
+        if (i === 1) return summaryXml({ title: "ok1" });
+        // chunk 2: both attempts throw (e.g. provider 400)
+        if (i === 2 || i === 3) throw new Error("OpenAI API error (400): content rejected");
+        if (i === 4) return summaryXml({ title: "ok3" });
+        return summaryXml({ title: "merged-with-skip" });
+      },
+    };
+    const { handler, kv } = await setupHandler({
+      sessionId: "ses_net",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_net" });
+
+    expect(result.success).toBe(true);
+    // 1 ok + 2 fail + 1 ok + 1 reduce = 5 calls.
+    expect((provider as any).calls.length).toBe(5);
+    const stored: any = await kv.get("summaries", "ses_net");
+    expect(stored?.title).toBe("merged-with-skip");
+  });
+
+  it("every chunk failing on provider error trips too_many_chunks_skipped", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+    // 3 chunks, all chunk calls throw → 3/3 skipped → bail.
+    const provider: MemoryProvider & { calls: any[] } = {
+      name: "test",
+      calls: [],
+      compress: async () => "",
+      summarize: async (system: string, user: string) => {
+        (provider as any).calls.push({ system, user });
+        throw new Error("OpenAI API error (400): invalid request");
+      },
+    };
+    const { handler } = await setupHandler({
+      sessionId: "ses_all_400",
+      obsCount: 250,
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_all_400" });
+
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/too_many_chunks_skipped: 3\/3/);
+  });
+
+  it("chunks run in parallel batches according to SUMMARIZE_CHUNK_CONCURRENCY", async () => {
+    process.env.SUMMARIZE_CHUNK_SIZE = "100";
+    process.env.SUMMARIZE_CHUNK_CONCURRENCY = "2";
+    let inflight = 0;
+    let maxInflight = 0;
+    const provider: MemoryProvider & { calls: any[] } = {
+      name: "test",
+      calls: [],
+      compress: async () => "",
+      summarize: async (system: string, user: string) => {
+        (provider as any).calls.push({ system, user });
+        inflight += 1;
+        maxInflight = Math.max(maxInflight, inflight);
+        // Yield to event loop so siblings can also enter before we resolve.
+        await new Promise((r) => setTimeout(r, 5));
+        inflight -= 1;
+        if (system.includes("merging")) return summaryXml({ title: "merged" });
+        return summaryXml({ title: "ok" });
+      },
+    };
+    const { handler } = await setupHandler({
+      sessionId: "ses_par",
+      obsCount: 400, // 4 chunks at chunkSize=100
+      provider,
+    });
+
+    const result: any = await handler({ sessionId: "ses_par" });
+
+    expect(result.success).toBe(true);
+    // 4 chunks at concurrency 2 → max 2 in flight at once during the chunk phase.
+    // Reduce is a single call so doesn't bump it.
+    expect(maxInflight).toBe(2);
+  });
+});