diff --git a/.env.example b/.env.example
index f1c207c1..77ca0f3a 100644
--- a/.env.example
+++ b/.env.example
@@ -98,6 +98,8 @@
# AGENTMEMORY_GRAPH_WEIGHT=0.2 # Graph traversal bonus on smart-search ranking
# TOKEN_BUDGET=2000 # Max tokens injected via mem::context per session
# MAX_OBS_PER_SESSION=500 # Per-session observation cap before consolidation kicks in
+# SUMMARIZE_CHUNK_SIZE=400 # When mem::summarize sees a session larger than this, it chunks observations and map-reduces (chunk-summarize → reduce-merge) to stay within the LLM's context window. Default 400 ≈ 50k tokens per chunk at ~110 tok/obs. Native sessions are capped by MAX_OBS_PER_SESSION; chunking primarily matters for bulk-imported jsonl sessions, which bypass that cap.
+# SUMMARIZE_CHUNK_CONCURRENCY=6 # Parallel chunk LLM calls during chunked summarize. Default 6 fits ~100-chunk sessions under iii's 180s function-invocation timeout at typical ~8s/call. High-throughput providers (Novita, DeepInfra, DeepSeek) commonly allow 100+ concurrent — bump this for very large imported sessions.
# -----------------------------------------------------------------------------
# 5. Behaviour flags
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..a2f5e0c5
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: [rohitg00]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 41c99434..b9671280 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,19 +1,62 @@
name: CI
+# `paths-ignore` keeps doc-only / website / README / CHANGELOG churn from
+# burning runner minutes. Source / config / workflow changes always run.
+# `workflow_dispatch` gives a manual re-run button for flake debugging.
on:
push:
branches: [main]
+ paths-ignore:
+ - "README.md"
+ - "CHANGELOG.md"
+ - "AGENTS.md"
+ - "ROADMAP.md"
+ - "website/**"
+ - "docs/**"
+ - "assets/**"
+ - "deploy/**/README.md"
+ - "**/*.md"
+ - "**/*.mdx"
pull_request:
branches: [main]
+ paths-ignore:
+ - "README.md"
+ - "CHANGELOG.md"
+ - "AGENTS.md"
+ - "ROADMAP.md"
+ - "website/**"
+ - "docs/**"
+ - "assets/**"
+ - "deploy/**/README.md"
+ - "**/*.md"
+ - "**/*.mdx"
+ workflow_dispatch:
+
+# Cancel in-flight PR runs when a force-push lands. Keep push runs to
+# protect against partial state on main.
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
test:
- runs-on: ubuntu-latest
+ runs-on: ${{ matrix.os }}
strategy:
+ # Don't bail the whole matrix on one cell's failure — we want to
+ # see whether the same failure reproduces across OSes (e.g.
+ # whether a flake is platform-specific or universal).
+ fail-fast: false
matrix:
+ # Windows held back: test/obsidian-export.test.ts has hardcoded
+ # POSIX paths (`/tmp/...`) that fail on D:\ drive runners.
+ # src/functions/obsidian-export.ts needs os.tmpdir() + path.join
+ # rework before Windows can be added back. Tracked as follow-up.
+ os: [ubuntu-latest, macos-latest]
node-version: [20, 22]
steps:
- uses: actions/checkout@v6
+ with:
+ persist-credentials: false
- uses: actions/setup-node@v6
with:
node-version: ${{ matrix.node-version }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 62dc8925..00003399 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -10,15 +10,25 @@ on:
required: false
default: "agentmemory,mcp,fs-watcher"
+# Workflow-level permissions stay minimal — only `contents: read`
+# is required to check out the repo. `id-token: write` is granted on
+# the publish job for npm's --provenance Sigstore OIDC mint.
permissions:
contents: read
- id-token: write
jobs:
publish:
runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ id-token: write
steps:
- uses: actions/checkout@v6
+ with:
+ # Don't persist the GITHUB_TOKEN to .git/config — the
+ # publish steps don't push back to the repo, so the token
+ # only needs to live in memory for this checkout.
+ persist-credentials: false
- uses: actions/setup-node@v6
with:
diff --git a/.gitignore b/.gitignore
index 9a9260b8..ba6af995 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,10 @@ dist/
plugin/scripts/*.map
plugin/scripts/*.d.mts
data/
+!eval/data/
+!eval/data/**
+data-*/
+agentmemory-debug/
.gstack/
# Lock files — never commit (see feedback_no_lockfiles memory)
@@ -20,3 +24,8 @@ package-lock.json
pnpm-lock.yaml
yarn.lock
integrations/hermes/__pycache__/
+
+# Eval reports (transient; published scorecards live in docs/benchmarks/)
+eval/reports/
+# LongMemEval download is 278MB; fetched on demand
+eval/data/longmemeval/
diff --git a/AGENTS.md b/AGENTS.md
index ebcf3584..24e74245 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -19,6 +19,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng
5. `test/mcp-standalone.test.ts` — tool count assertion
6. `README.md` — tool counts (search for "MCP tools")
7. `plugin/.claude-plugin/plugin.json` — tool count in description
+8. `plugin/plugin.json` and `plugin/.mcp.copilot.json` (when present) — tool count or MCP exposure
**When adding REST endpoints, you MUST update:**
1. `src/triggers/api.ts` — endpoint registration
@@ -32,6 +33,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng
4. `src/functions/export-import.ts` — supportedVersions set
5. `test/export-import.test.ts` — version assertion
6. `plugin/.claude-plugin/plugin.json` — version field
+7. `plugin/plugin.json` (when present) — version field
**When adding new KV scopes:**
1. `src/state/schema.ts` — add to the KV object
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c73c185..0188e05a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,48 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
## [Unreleased]
+## [0.9.21] — 2026-05-19
+
+Quality + integration wave. Headline: native OpenCode plugin with full Claude Code hook parity ([#237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer)). Ten more PRs alongside: `memory_recall` returning the wrong shape, env-file `AGENTMEMORY_DROP_STALE_INDEX` silently ignored, hook scripts crashing on Windows usernames with spaces, viewer search inputs interrupting CJK IME composition, large sessions silently failing at the LLM context limit, lessons invisible to smart-search, Hermes plugin manifest missing hooks, cli onboarding crashing in non-TTY contexts, rebuildIndex blocking boot on large corpora, 25h embed-loop bottleneck during rebuild, and the v0.9.19 iii-console installer workaround can come out now that upstream is fixed.
+
+### Added
+
+- **OpenCode plugin with 22 auto-capture hooks** ([PR #237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer), closes [#236](https://github.com/rohitg00/agentmemory/issues/236) + [#244](https://github.com/rohitg00/agentmemory/issues/244)). Complete OpenCode plugin in `plugin/opencode/` matching Claude Code hook parity. Covers session lifecycle (8 hooks), messages (3), tool lifecycle (2), part tracking, permissions, task tracking, plus a two-layer enrichment pipeline (memory context on first turn, file enrichment on subsequent turns) and two slash commands (`/recall`, `/remember`). Full gap analysis in `plugin/opencode/README.md`.
+
+### Fixed
+
+- **`memory_recall` endpoint + format/token_budget forwarding** ([PR #516](https://github.com/rohitg00/agentmemory/pull/516) by [@serhiizghama](https://github.com/serhiizghama), closes [#507](https://github.com/rohitg00/agentmemory/issues/507) + [#440](https://github.com/rohitg00/agentmemory/issues/440)). MCP `memory_recall` always returned compact mode and dropped `format` + `token_budget` params. Two root causes fixed: standalone shim routed through `/agentmemory/smart-search` instead of `/agentmemory/search`, and the local-fallback path didn't read either param. Now routes correctly, forwards both params end-to-end, defaults `format` to `"full"` matching the MCP schema.
+
+- **env-file `AGENTMEMORY_DROP_STALE_INDEX` flag now honored** ([PR #461](https://github.com/rohitg00/agentmemory/pull/461) by [@honor2030](https://github.com/honor2030), closes [#456](https://github.com/rohitg00/agentmemory/issues/456)). Setting the flag in `~/.agentmemory/.env` was silently ignored because the boot path read `process.env` directly. New `isDropStaleIndexEnabled()` helper reads merged env. Combined with [#455](https://github.com/rohitg00/agentmemory/issues/455) + [#469](https://github.com/rohitg00/agentmemory/issues/469) reports, this is the unblock path for the stale-index server-crash recovery loop.
+
+- **Windows hook scripts quote plugin paths correctly** ([PR #487](https://github.com/rohitg00/agentmemory/pull/487) by [@honor2030](https://github.com/honor2030), closes [#477](https://github.com/rohitg00/agentmemory/issues/477)). Hook command strings referenced `${CLAUDE_PLUGIN_ROOT}/scripts/*.mjs` without quotes — Windows users with spaces in their username had every hook crash. Quotes added + regression test.
+
+- **Viewer search inputs honor IME composition** ([PR #517](https://github.com/rohitg00/agentmemory/pull/517) by [@jonathanzhan1975](https://github.com/jonathanzhan1975)). CJK users typing in the viewer's search inputs hit mid-character interruption — every keystroke fired the `oninput=` re-render handler, breaking IME composition mid-syllable. New `bindImeSafeSearch` helper defers re-render until `compositionend`.
+
+- **Chunk large sessions to fit LLM context window** ([PR #472](https://github.com/rohitg00/agentmemory/pull/472) by [@efenex](https://github.com/efenex)). Sessions with >7000 observations silently failed at the LLM provider's context limit — the consolidation pipeline silently skipped the session. New chunking splits oversized sessions across multiple compress calls + restitches the narrative via a `REDUCE_SYSTEM` prompt. Legacy single-call path preserved when obs count is under the chunk size. Backfill script under `scripts/` for users hitting the pre-fix bug.
+
+- **Surface lessons in smart-search + diagnose tally** ([PR #473](https://github.com/rohitg00/agentmemory/pull/473) by [@efenex](https://github.com/efenex)). Closes the lesson round-trip with [#458](https://github.com/rohitg00/agentmemory/pull/458) (lessons auto-injected into `mem::context`): lessons are now also returned alongside hybrid search results in a separate `lessons` field on `smart-search`, and the `diagnose` health surface tallies per-store counts so the trust-shock pattern (save succeeds, recall empty, diagnose says 0) goes away.
+
+- **Declare all Hermes plugin hooks** ([PR #486](https://github.com/rohitg00/agentmemory/pull/486) by [@honor2030](https://github.com/honor2030)). The Hermes `plugin.yaml` manifest only declared 3 of the 6 implemented hooks. All 6 now declared (`prefetch`, `sync_turn`, `on_session_end`, `on_pre_compress`, `on_memory_write`, `system_prompt_block`).
+
+- **`rebuildIndex` non-blocking on boot** ([PR #500](https://github.com/rohitg00/agentmemory/pull/500) by [@efenex](https://github.com/efenex)). Boot path previously `await`-ed `rebuildIndex(kv)`, so the viewer + later boot steps stalled — on large corpora this was 25h+ of blocked startup. Replaced with `void rebuildIndex(kv).then(...).catch(...)` so the rebuild runs in the background.
+
+- **Batched embed calls in `rebuildIndex` (25h → 3h on large corpora)** ([PR #504](https://github.com/rohitg00/agentmemory/pull/504) by [@efenex](https://github.com/efenex)). The rebuild loop made one embed call per observation, paying full HTTP RTT per item. New `vectorIndexAddBatchGuarded` helper batches embeds (default 32, configurable via `REBUILD_EMBED_BATCH_SIZE`) and try/catches per-item failures. Measured 25h → 3h on a 250k-observation corpus.
+
+- **CLI skips onboarding prompts without a tty** ([PR #491](https://github.com/rohitg00/agentmemory/pull/491) by [@honor2030](https://github.com/honor2030)). Onboarding prompts crashed in non-interactive contexts (CI, `docker run -d`, piped input). New guard short-circuits with sensible defaults when stdin/stdout aren't TTYs or `CI=1`.
+
+### Changed
+
+- **Drop iii-console installer `--next` workaround** ([PR #546](https://github.com/rohitg00/agentmemory/pull/546)). v0.9.19 routed first-run iii-console install through `bash -s -- --next` to dodge an upstream tag-prefix bug at [iii-hq/iii#1652](https://github.com/iii-hq/iii/issues/1652). Upstream [iii-hq/iii#1660](https://github.com/iii-hq/iii/pull/1660) shipped 2026-05-19; `install.iii.dev/console/main/install.sh` is a CDN proxy serving upstream main HEAD so the fix is live without an iii release tag. Reverted to canonical bare `curl ... | sh`.
+
+### Infrastructure
+
+- 95 test files (was 92), **1067 tests pass** (was 1038) on `chore(release): v0.9.21`.
+- Bundles 11 PRs: 1 contributor feature + 9 bug fixes across MCP / hooks / viewer / summarize / lessons / Hermes / rebuildIndex / CLI + 1 upstream-installer revert.
+- New contributors landing first PRs this release: [@cl0ckt0wer](https://github.com/cl0ckt0wer), [@serhiizghama](https://github.com/serhiizghama), [@jonathanzhan1975](https://github.com/jonathanzhan1975).
+
+[0.9.21]: https://github.com/rohitg00/agentmemory/compare/v0.9.20...v0.9.21
+
## [0.9.20] — 2026-05-18
Hotfix: revert the Codex Stop → session-end chain shipped in v0.9.19.
diff --git a/README.md b/README.md
index ef840011..fc6300fb 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
Your coding agent remembers everything. No more re-explaining.
Built on iii engine
- Persistent memory for Claude Code, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client.
+ Persistent memory for Claude Code, GitHub Copilot CLI, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client.
@@ -34,6 +34,7 @@
+
@@ -72,10 +73,12 @@
## Install
```bash
-npm install -g @agentmemory/agentmemory # once — bare `agentmemory` on PATH
-agentmemory # start the memory server on :3111
-agentmemory demo # seed sample sessions + prove recall
-agentmemory connect claude-code # wire your agent (also: codex, cursor, gemini-cli, ...)
+npm install -g @agentmemory/agentmemory # once — bare `agentmemory` on PATH
+# If you hit EACCES on macOS/Linux system Node installs, retry with:
+# sudo npm install -g @agentmemory/agentmemory
+agentmemory # start the memory server on :3111
+agentmemory demo # seed sample sessions + prove recall
+agentmemory connect claude-code # wire your agent (also: copilot-cli, codex, cursor, gemini-cli, ...)
```
Or via `npx` (no install):
@@ -107,6 +110,11 @@ agentmemory works with any agent that supports hooks, MCP, or REST API. All agen
native plugin + 6 hooks + MCP
+
+GitHub Copilot CLI
+MCP + plugin hooks/skills
+
+
OpenClaw
native plugin + MCP
@@ -207,6 +215,15 @@ npx @agentmemory/agentmemory
### Retrieval Accuracy
+**coding-agent-life-v1** (in-house corpus, sandbox-reproducible)
+
+| Adapter | P@5 | R@5 | Top-5 hit rate | p50 latency |
+|---|---|---|---|---|
+| **agentmemory hybrid** | **0.578** | **0.967** | **15 / 15** | 14 ms |
+| grep baseline | 0.267 | 0.967 | 15 / 15 | 0 ms |
+
+100% top-5 hit rate. **2.2×** better precision than the grep baseline on identical input. Full per-type breakdown: [`docs/benchmarks/2026-05-20-coding-agent-life-v1.md`](docs/benchmarks/2026-05-20-coding-agent-life-v1.md).
+
**LongMemEval-S** (ICLR 2025, 500 questions)
| System | R@5 | R@10 | MRR |
@@ -232,6 +249,8 @@ npx @agentmemory/agentmemory
> Embedding model: `all-MiniLM-L6-v2` (local, free, no API key). Full reports: [`benchmark/LONGMEMEVAL.md`](benchmark/LONGMEMEVAL.md), [`benchmark/QUALITY.md`](benchmark/QUALITY.md), [`benchmark/SCALE.md`](benchmark/SCALE.md). Competitor comparison: [`benchmark/COMPARISON.md`](benchmark/COMPARISON.md) — agentmemory vs mem0, Letta, Khoj, claude-mem, Hippo.
+**Reproduce locally:** [`eval/README.md`](eval/README.md) — adapter-pluggable harness for LongMemEval `_s` (public 500-Q) + `coding-agent-life-v1` (in-house 15-session corpus). Grep / vector / agentmemory adapters score side-by-side, NDJSON output, published scorecards land in [`docs/benchmarks/`](docs/benchmarks/).
+
---
@@ -349,6 +368,8 @@ Open `http://localhost:3113` to watch the memory build live.
```bash
npm install -g @agentmemory/agentmemory
+# If you hit EACCES on macOS/Linux system Node installs, retry with:
+# sudo npm install -g @agentmemory/agentmemory
agentmemory # start the server (same as the npx form)
agentmemory stop # tear it down
agentmemory remove # uninstall everything we created
@@ -418,6 +439,30 @@ The Codex plugin ships from the same `plugin/` directory as the Claude Code plug
Codex's hook engine injects `CLAUDE_PLUGIN_ROOT` into hook subprocesses (per [`codex-rs/hooks/src/engine/discovery.rs`](https://github.com/openai/codex/blob/main/codex-rs/hooks/src/engine/discovery.rs)), so the same hook scripts work across both hosts without duplication. Subagent / SessionEnd / Notification / TaskCompleted / PostToolUseFailure events are Claude-Code-only and are not registered for Codex.
+#### Codex Desktop: plugin hooks currently silent (workaround available)
+
+`CodexHooks` and `PluginHooks` are both stable + default-enabled in [`codex-rs/features/src/lib.rs`](https://github.com/openai/codex/blob/main/codex-rs/features/src/lib.rs), but Codex Desktop builds currently do not dispatch plugin-local `hooks.json` ([openai/codex#16430](https://github.com/openai/codex/issues/16430)). MCP tools still work; only the lifecycle observations are missing.
+
+Until upstream lands the fix, mirror the same hook commands into the global `~/.codex/hooks.json`:
+
+```bash
+agentmemory connect codex --with-hooks
+```
+
+This adds an idempotent block to `~/.codex/hooks.json` referencing absolute paths to the bundled scripts (no `${CLAUDE_PLUGIN_ROOT}` expansion needed at user-scope). Re-run the same command after upgrading agentmemory to refresh paths. User entries in the same file are preserved; only previous agentmemory entries are replaced.
+
+### GitHub Copilot CLI
+
+```bash
+# MCP-only wiring
+agentmemory connect copilot-cli
+
+# Full hooks/skills plugin from the GitHub subdir
+copilot plugin install rohitg00/agentmemory:plugin
+```
+
+`agentmemory connect copilot-cli` merges `mcpServers.agentmemory` into `~/.copilot/mcp-config.json` (or `$COPILOT_HOME/mcp-config.json` when `COPILOT_HOME` is set) and preserves existing servers. This adapter is Windows-safe even though other `connect` adapters still require manual Windows setup. Copilot picks up the MCP server on next launch or after `/mcp`. Install the plugin as well when you want the full hook/skill experience.
+
OpenClaw (paste this prompt)
@@ -490,9 +535,11 @@ The agentmemory entry is the **same MCP server block** across every host that us
| **Cline / Roo Code / Kilo Code** | Cline MCP settings (Settings UI → MCP Servers → Edit) | Same `mcpServers` block. |
| **Windsurf** | `~/.codeium/windsurf/mcp_config.json` | Same `mcpServers` block. |
| **Gemini CLI** | `~/.gemini/settings.json` | `gemini mcp add agentmemory npx -y @agentmemory/mcp --scope user` (auto-merges). |
+| **GitHub Copilot CLI (MCP only)** | `~/.copilot/mcp-config.json` | `agentmemory connect copilot-cli` merges `mcpServers.agentmemory`; Copilot picks it up on next launch or `/mcp`. |
+| **GitHub Copilot CLI (full plugin)** | Copilot plugin install | `copilot plugin install rohitg00/agentmemory:plugin` for the plugin from the GitHub subdir. |
| **OpenClaw** | OpenClaw MCP config | Same `mcpServers` block, or use the deeper [memory plugin](integrations/openclaw/). |
| **Codex CLI (MCP only)** | `.codex/config.toml` | TOML shape: `codex mcp add agentmemory -- npx -y @agentmemory/mcp`, or add `[mcp_servers.agentmemory]` manually. |
-| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. |
+| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. On Codex Desktop, also run `agentmemory connect codex --with-hooks` until [openai/codex#16430](https://github.com/openai/codex/issues/16430) lands — plugin hooks are currently silent there. |
| **OpenCode (MCP only)** | `opencode.json` | Different shape — top-level `mcp` key, command as array: `{"mcp": {"agentmemory": {"type": "local", "command": ["npx", "-y", "@agentmemory/mcp"], "enabled": true}}}`. |
| **OpenCode (full plugin)** | `plugin/opencode/` | 22 auto-capture hooks covering session lifecycle, messages, tools, errors. Two slash commands (`/recall`, `/remember`). Copy `plugin/opencode/` into your OpenCode workspace and add the plugin entry to `opencode.json`. See [`plugin/opencode/README.md`](plugin/opencode/README.md) for the full hook table + gap analysis. |
| **pi** | `~/.pi/agent/extensions/agentmemory` | Copy [`integrations/pi`](integrations/pi/) and restart pi. |
@@ -1035,7 +1082,7 @@ Full registry: [workers.iii.dev](https://workers.iii.dev). Every worker there co
### LLM Providers
-agentmemory auto-detects from your environment. No API key needed if you have a Claude subscription.
+agentmemory auto-detects from your environment. By default, no LLM calls are made unless you configure a provider or explicitly opt in to the Claude subscription fallback.
| Provider | Config | Notes |
|----------|--------|-------|
@@ -1046,6 +1093,33 @@ agentmemory auto-detects from your environment. No API key needed if you have a
| OpenRouter | `OPENROUTER_API_KEY` | Any model |
| Claude subscription fallback | `AGENTMEMORY_ALLOW_AGENT_SDK=true` | Opt-in only. Spawns `@anthropic-ai/claude-agent-sdk` sessions — used to cause unbounded Stop-hook recursion (#149 follow-up) so it is no longer the default. |
+### Config File
+
+Put agentmemory runtime configuration in `~/.agentmemory/.env` instead of exporting variables in every shell. If the viewer shows a setup hint like `export ANTHROPIC_API_KEY=...`, copy it into this file as `ANTHROPIC_API_KEY=...` without the `export` prefix, then restart agentmemory.
+
+Process environment variables still work and take precedence over values in the file.
+
+On Windows, the same file lives at `%USERPROFILE%\.agentmemory\.env`:
+
+```powershell
+New-Item -ItemType Directory -Force $HOME\.agentmemory
+notepad $HOME\.agentmemory\.env
+```
+
+To test with a Claude Code Pro/Max subscription instead of an API key, opt in explicitly:
+
+```env
+AGENTMEMORY_ALLOW_AGENT_SDK=true
+AGENTMEMORY_AUTO_COMPRESS=true
+```
+
+Turn on graph or consolidation features in the same file if you want them:
+
+```env
+GRAPH_EXTRACTION_ENABLED=true
+CONSOLIDATION_ENABLED=true
+```
+
### Environment Variables
Create `~/.agentmemory/.env`:
diff --git a/docs/benchmarks/2026-05-20-coding-agent-life-v1.md b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md
new file mode 100644
index 00000000..f280b27d
--- /dev/null
+++ b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md
@@ -0,0 +1,76 @@
+# 2026-05-20 — coding-agent-life-v1 (v0.9.21)
+
+**Commit:** `e9dc710`
+**Bench:** coding-agent-life-v1 (15 sessions, 15 queries)
+**N:** 15
+**K:** 5
+**Hardware:** macOS 15 (Apple Silicon)
+**agentmemory:** v0.9.21
+**iii-engine:** v0.11.2
+**Embedding provider:** local default
+**Sandbox:** isolated data dir at `/tmp/agentmemory-eval-sandbox/`, ports 3411/3412
+
+## Headline
+
+`agentmemory-hybrid` hits **100% top-5 hit rate**, R@5 = **0.967**, P@5 = **0.578**.
+
+Same corpus, grep baseline: R@5 = 0.967, P@5 = 0.267 — same recall, but **2.2× worse precision**. Hybrid's top-5 is mostly gold; grep's top-5 is half noise.
+
+## Per-adapter
+
+| Adapter | P@5 | R@5 | Hit rate | p50 latency |
+|---|---|---|---|---|
+| grep (tokenized substring) | 0.267 | 0.967 | 15 / 15 | 0 ms |
+| `agentmemory-hybrid` | **0.578** | **0.967** | **15 / 15** | 14 ms |
+
+`agentmemory-hybrid` runs through the production smart-search endpoint (`POST /agentmemory/smart-search`) so it exercises the full BM25 + embedding + reranker stack.
+
+## Per-question-type
+
+P@5, grep vs `agentmemory-hybrid`:
+
+| Type | grep | hybrid | hybrid lift |
+|---|---|---|---|
+| single-session-bug | 0.20 | 0.33 | 1.7× |
+| single-session-infra (n=2) | 0.20 | 0.50 | 2.5× |
+| single-session-refactor | 0.20 | 0.50 | 2.5× |
+| single-session-feature | 0.50 | 0.50 | tie |
+| single-session-test | 0.20 | 0.33 | 1.7× |
+| single-session-perf | 0.20 | 0.50 | 2.5× |
+| single-session-api | 0.20 | 0.50 | 2.5× |
+| single-session-db | 0.20 | 0.50 | 2.5× |
+| single-session-release | 0.20 | 0.33 | 1.7× |
+| multi-session-causal | 0.40 | 0.40 | tie |
+| preference (n=2) | 0.20 | 0.42 | 2.1× |
+| multi-session-review | 0.40 | 0.67 | 1.7× |
+| temporal (R@5 = 0.50 grep / 1.00 hybrid) | 0.50 | 0.67 | 1.3× |
+
+Temporal queries (`What was shipped on April 8th 2026?`) need both gold sessions to score full recall. grep finds 1/2; hybrid finds 2/2.
+
+## Methodology
+
+- 15 fictional Claude Code sessions across a 10-day stretch of a Rust CLI project (`shipctl`) — bug fixes, refactors, infra, perf, schema migrations, preferences, post-mortem
+- 15 hand-graded queries with `goldSessionIds[]` covering single-session, multi-session causal, multi-session review, preference, temporal
+- Each session ingested via `POST /agentmemory/remember` with `type=eval-session` and `concepts=[session_id]`
+- Each query hits `POST /agentmemory/smart-search` with `limit=50`; dedupe by session ID; truncate to K=5
+- No LLM in the retrieval loop
+- Sandbox: clean `~/.agentmemory` via `HOME` override + alt ports (3411/3412) so no cross-contamination from a user's real store
+
+## Reproduce
+
+```sh
+git checkout e9dc710
+npm install --legacy-peer-deps
+npm run build
+
+source eval/scripts/sandbox.sh
+npm run eval:coding-life -- --adapters grep,agentmemory
+```
+
+Outputs land in `eval/reports/coding-life/`: `scores.ndjson` (per-query rows) and `summary.json` (per-adapter and per-type aggregates).
+
+## Notes
+
+- The single-session-feature tie (`Which PR introduced helm chart support?`) is interesting: query says `PR introduced helm chart` and gold session has `helm chart` literally — grep wins on lexical exactness, hybrid matches but doesn't outperform.
+- The corpus is intentionally small for fast iteration. Hardening targets: paraphrased queries, synonym substitution, in-corpus distractors with shared keywords, longer multi-session chains.
+- Vector adapter not measured here — requires `OPENAI_API_KEY`; will be added in a follow-up scorecard alongside LongMemEval `_s`.
diff --git a/docs/benchmarks/TEMPLATE.md b/docs/benchmarks/TEMPLATE.md
new file mode 100644
index 00000000..b830e24e
--- /dev/null
+++ b/docs/benchmarks/TEMPLATE.md
@@ -0,0 +1,54 @@
+# —
+
+**Commit:** ``
+**Bench:** LongMemEval `_s` / coding-agent-life-v1 / ...
+**N:** 500 / 15 / ...
+**K:** 5
+**Hardware:** macos-15 / ubuntu-22.04 / ...
+**OpenAI model:** text-embedding-3-small
+**Anthropic model:** N/A (no LLM in retrieval loop)
+
+## Headline
+
+agentmemory-hybrid: **R@5 = XX.XX%**, P@5 = XX.XX%, p50 latency = XXms
+
+Beats grep baseline by +X.Xpt R@5, vector by +X.Xpt R@5.
+
+## Per-adapter
+
+| Adapter | P@5 | R@5 | Hit rate | p50 latency |
+|---|---|---|---|---|
+| grep | | | | |
+| vector | | | | |
+| agentmemory-hybrid | | | | |
+
+## Per-question-type
+
+| Type | grep R@5 | vector R@5 | agentmemory R@5 |
+|---|---|---|---|
+| single-session-bug | | | |
+| single-session-refactor | | | |
+| preference | | | |
+| multi-session-causal | | | |
+| temporal | | | |
+
+## Methodology
+
+- Sessions ingested via `POST /agentmemory/remember` with `type=eval-session`
+- Queries hit `POST /agentmemory/smart-search` with `limit=k*4`
+- No LLM in retrieval loop. Direct rank from hybrid scoring.
+- Ranks dedup by sessionId before truncating to K
+- Latency measured as init+query for LongMemEval (per-question fresh state), query-only for coding-life (shared state)
+
+## Reproduce
+
+```sh
+git checkout
+npm install --legacy-peer-deps
+OPENAI_API_KEY=sk-... AGENTMEMORY_BASE_URL=http://localhost:3111 \
+ npm run eval:longmemeval -- --stratify 10
+```
+
+## Notes
+
+
diff --git a/eval/README.md b/eval/README.md
new file mode 100644
index 00000000..7f295367
--- /dev/null
+++ b/eval/README.md
@@ -0,0 +1,111 @@
+# agentmemory-evals
+
+Public benchmarks for agentmemory's hybrid memory stack (BM25 + embeddings + consolidation + graph).
+
+Two families, both reproducible:
+
+- **LongMemEval** — public 500-question retrieval benchmark over multi-session chat
+- **coding-agent-life-v1** — in-house corpus of 15 fictional Claude Code sessions for a Rust CLI project (`shipctl`), with 15 hand-graded queries covering bug fixes, refactors, preferences, and multi-session causal reasoning
+
+## Adapters
+
+| Adapter | Backend | API key needed |
+|---|---|---|
+| `grep` | Tokenized substring match | none |
+| `vector` | OpenAI `text-embedding-3-small` + cosine | `OPENAI_API_KEY` |
+| `agentmemory` | Running agentmemory server, smart-search endpoint | none (auth optional via `AGENTMEMORY_SECRET`) |
+
+## Sandbox first
+
+Running the `agentmemory` adapter against your real `~/.agentmemory` directory pollutes the eval with pre-existing memories AND pollutes your real store with eval test data. Always sandbox.
+
+`eval/scripts/sandbox.sh` spins up a clean agentmemory + iii-engine on ports 3411/3412 with state in `/tmp/agentmemory-eval-sandbox/`, exports `AGENTMEMORY_BASE_URL`, and tears down on exit.
+
+```sh
+source eval/scripts/sandbox.sh
+npm run eval:coding-life -- --adapters grep,agentmemory
+```
+
+Requires iii v0.11.2 on PATH (agentmemory pin). If you already have a different version installed, install the pinned build into `~/.local/bin` and make sure that directory comes first on `PATH`:
+
+```sh
+mkdir -p ~/.local/bin
+curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin
+export PATH="$HOME/.local/bin:$PATH" # add to ~/.zshrc or ~/.bashrc for persistence
+```
+
+## Quickstart
+
+### coding-agent-life-v1 (in-house, no download)
+
+```sh
+# grep baseline, no sandbox needed
+npm run eval:coding-life -- --adapters grep
+
+# add agentmemory + vector (sandbox + OpenAI key)
+source eval/scripts/sandbox.sh
+OPENAI_API_KEY=sk-... npm run eval:coding-life -- --adapters grep,vector,agentmemory
+```
+
+### LongMemEval `_s` (public, 278MB download)
+
+```sh
+mkdir -p ~/datasets/longmemeval
+curl -Lo ~/datasets/longmemeval/longmemeval_s.json \
+ https://huggingface.co/datasets/xiaowu0162/longmemeval/resolve/main/longmemeval_s
+
+source eval/scripts/sandbox.sh
+
+# Stratified sample of 10 per type (fast iteration, ~$0.20 OpenAI cost)
+OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \
+ npm run eval:longmemeval -- --stratify 10
+
+# Full 500 questions × 3 adapters (~$2 OpenAI cost)
+OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \
+ npm run eval:longmemeval
+```
+
+## Repo layout
+
+```text
+eval/
+├── README.md
+├── runner/
+│ ├── types.ts Adapter, Question, RankedDoc, ScoreRow
+│ ├── score.ts P@K, R@K, aggregation
+│ ├── load.ts LongMemEval JSON → Question[]
+│ ├── adapters/
+│ │ ├── grep.ts tokenized substring baseline
+│ │ ├── vector.ts OpenAI embeddings + cosine
+│ │ └── agentmemory.ts POST /agentmemory/{remember,smart-search}
+│ ├── longmemeval.ts public benchmark runner
+│ └── coding-life.ts in-house benchmark runner
+└── data/
+ └── coding-agent-life-v1/
+ ├── sessions.json 15 fictional sessions (~6KB)
+ └── queries.json 15 queries with gold session IDs
+```
+
+Reports land in `eval/reports//` (gitignored): `scores.ndjson` + `summary.json`.
+
+Published scorecards land in `docs/benchmarks/YYYY-MM-DD-.md`.
+
+## Writing a new adapter
+
+1. Implement `Adapter` from `eval/runner/types.ts`:
+ ```ts
+ import type { Adapter } from "../types.js";
+ export const myAdapter: Adapter = {
+ name: "my-adapter",
+ async init(sessions, config) { /* index */ return state; },
+ async query(q, state, k) { /* search */ return ranked; },
+ };
+ ```
+2. Register in `eval/runner/{longmemeval,coding-life}.ts` `ADAPTERS` map.
+3. Run against `coding-agent-life-v1` to sanity-check before committing OpenAI spend on LongMemEval.
+
+## Why a benchmark for agentmemory
+
+agentmemory ships BM25 + embeddings + consolidation + graph retrieval. Numbers from those layers should be measured against grep/vector baselines so the value of each layer is provable.
+
+The in-house corpus is small on purpose (15 sessions) — covers single-session, multi-session, preference, and temporal question types without taking 15 minutes to run. LongMemEval gives the public-comparison axis.
diff --git a/eval/data/coding-agent-life-v1/queries.json b/eval/data/coding-agent-life-v1/queries.json
new file mode 100644
index 00000000..5603e8a0
--- /dev/null
+++ b/eval/data/coding-agent-life-v1/queries.json
@@ -0,0 +1,107 @@
+[
+ {
+ "id": "q-001",
+ "type": "single-session-bug",
+ "question": "Where did we land the auth env var precedence fix?",
+ "answer": "PR #11 with SHIPCTL_TOKEN > SHIP_TOKEN > SC_TOKEN precedence",
+ "goldSessionIds": ["sess-001"]
+ },
+ {
+ "id": "q-002",
+ "type": "single-session-infra",
+ "question": "What was the multi-arch Docker fix?",
+ "answer": "Added --platform=$BUILDPLATFORM and BUILDX_PLATFORMS for amd64+arm64",
+ "goldSessionIds": ["sess-002"]
+ },
+ {
+ "id": "q-003",
+ "type": "single-session-refactor",
+ "question": "Where did we consolidate the retry logic?",
+ "answer": "src/retry.rs with exponential backoff base=200ms cap=30s full jitter",
+ "goldSessionIds": ["sess-003"]
+ },
+ {
+ "id": "q-004",
+ "type": "single-session-feature",
+ "question": "Which PR introduced helm chart support?",
+ "answer": "PR #14",
+ "goldSessionIds": ["sess-004"]
+ },
+ {
+ "id": "q-005",
+ "type": "single-session-test",
+ "question": "Which test was flaky on macos and how was it fixed?",
+ "answer": "fs-watcher emits_changekind_file_delete; bumped wait to 1500ms + retry: 2",
+ "goldSessionIds": ["sess-005"]
+ },
+ {
+ "id": "q-006",
+ "type": "single-session-perf",
+ "question": "How did we fix the memory leak?",
+ "answer": "Replaced unbounded HashMap with LruCache cap=10k in src/cache.rs (PR #16)",
+ "goldSessionIds": ["sess-006"]
+ },
+ {
+ "id": "q-007",
+ "type": "single-session-api",
+ "question": "How did we handle the github API rate limit?",
+ "answer": "Conditional requests with If-None-Match etag and 304 caching via http-cache",
+ "goldSessionIds": ["sess-007"]
+ },
+ {
+ "id": "q-008",
+ "type": "single-session-db",
+ "question": "What was the schema migration approach for run_history?",
+ "answer": "Three-phase: nullable column + dual-write, backfill + flip reads, drop old column",
+ "goldSessionIds": ["sess-008"]
+ },
+ {
+ "id": "q-009",
+ "type": "single-session-infra",
+ "question": "How is the docs site deployed?",
+ "answer": "GitHub Actions docs.yml workflow + mdbook build + Cloudflare Pages on shipctl.dev",
+ "goldSessionIds": ["sess-009"]
+ },
+ {
+ "id": "q-010",
+ "type": "single-session-release",
+ "question": "Which PR set up the cross-platform release pipeline?",
+ "answer": "PR #19 with cross-rs for linux and native macos/windows builds",
+ "goldSessionIds": ["sess-010"]
+ },
+ {
+ "id": "q-011",
+ "type": "multi-session-causal",
+ "question": "What was the root cause of the staging incident, and where was it fixed?",
+ "answer": "SHIPCTL_TOKEN unset caused fallback to bad SC_TOKEN; fixed in PR #11 (sess-001) with precedence test; documented in post-mortem (sess-014)",
+ "goldSessionIds": ["sess-001", "sess-014"]
+ },
+ {
+ "id": "q-012",
+ "type": "preference",
+ "question": "Which async runtime does the team prefer for new code?",
+ "answer": "async-std (decided in arch review, not tokio)",
+ "goldSessionIds": ["sess-015"]
+ },
+ {
+ "id": "q-013",
+ "type": "preference",
+ "question": "What are the user's formatting preferences?",
+ "answer": "2-space indent in yaml, tab in rust, rust-analyzer over rls, cargo clippy before commit",
+ "goldSessionIds": ["sess-012"]
+ },
+ {
+ "id": "q-014",
+ "type": "multi-session-review",
+ "question": "What was the race condition Aria fixed in the schema migration?",
+ "answer": "Dual-write step had race when two writers hit same row; Aria added per-row advisory lock",
+ "goldSessionIds": ["sess-008", "sess-013"]
+ },
+ {
+ "id": "q-015",
+ "type": "temporal",
+ "question": "What was shipped on April 8th 2026?",
+ "answer": "Auth env-var precedence fix in PR #11, then started docker arm64 work",
+ "goldSessionIds": ["sess-001", "sess-011"]
+ }
+]
diff --git a/eval/data/coding-agent-life-v1/sessions.json b/eval/data/coding-agent-life-v1/sessions.json
new file mode 100644
index 00000000..71134134
--- /dev/null
+++ b/eval/data/coding-agent-life-v1/sessions.json
@@ -0,0 +1,77 @@
+[
+ {
+ "id": "sess-001",
+ "timestamp": "2026-04-08T09:14:00Z",
+ "content": "[user] auth check broken on staging when SHIPCTL_TOKEN unset, falling back to wrong env var\n[assistant] looked at src/auth.rs:42. precedence is SHIPCTL_TOKEN > SHIP_TOKEN > legacy SC_TOKEN. legacy path didn't validate. fixing precedence + adding length check.\n[user] yeah ship it\n[assistant] PR #11 opened with fix + regression test. covers all three env names."
+ },
+ {
+ "id": "sess-002",
+ "timestamp": "2026-04-08T14:30:00Z",
+ "content": "[user] docker build failing for arm64 on M2 runner\n[assistant] missing target in Dockerfile FROM line. need --platform=$BUILDPLATFORM on builder stage. updated Dockerfile + Makefile to set BUILDX_PLATFORMS=linux/amd64,linux/arm64. tested locally on apple silicon, builds clean."
+ },
+ {
+ "id": "sess-003",
+ "timestamp": "2026-04-09T10:02:00Z",
+ "content": "[user] retry logic is duplicated across handlers. consolidate.\n[assistant] extracted retry_with_backoff into src/retry.rs. exponential backoff base=200ms cap=30s jitter=full. all 4 callsites now use it. cuts ~80 lines."
+ },
+ {
+ "id": "sess-004",
+ "timestamp": "2026-04-10T11:45:00Z",
+ "content": "[user] add helm chart support\n[assistant] new subcommand `shipctl helm install`. wraps helm CLI, threads through --namespace and --values. PR #14 has the changes + 12 integration tests. Aria reviewed and approved."
+ },
+ {
+ "id": "sess-005",
+ "timestamp": "2026-04-11T16:20:00Z",
+ "content": "[user] test flake on macos-22 runner. emits_changekind_file_delete keeps failing\n[assistant] fsevents propagation slower on apple silicon, the 800ms wait isn't enough. bumped to 1500ms + describe-level retry: 2. all 15 fs-watcher tests pass now."
+ },
+ {
+ "id": "sess-006",
+ "timestamp": "2026-04-12T09:50:00Z",
+ "content": "[user] memory leak in long-running daemon. RSS grows 5MB/min.\n[assistant] traced to unbounded HashMap in src/cache.rs. swapped for LruCache with 10k cap. RSS stable at 180MB after 6h soak test. PR #16."
+ },
+ {
+ "id": "sess-007",
+ "timestamp": "2026-04-13T13:15:00Z",
+ "content": "[user] github API rate-limiting us when listing PRs. 5000/hr isn't enough during ci\n[assistant] added conditional requests with If-None-Match etag. cache 304 responses. drops actual API calls by ~70%. used http-cache crate."
+ },
+ {
+ "id": "sess-008",
+ "timestamp": "2026-04-15T10:30:00Z",
+ "content": "[user] schema migration for run_history table needs zero-downtime\n[assistant] three-phase migration. phase 1: add nullable new column + dual-write. phase 2: backfill + flip reads. phase 3: drop old column. avoids ALTER TABLE locks. ran on staging, took 4s for 2M rows."
+ },
+ {
+ "id": "sess-009",
+ "timestamp": "2026-04-16T15:40:00Z",
+ "content": "[user] docs site is stale. set up auto-deploy from main\n[assistant] added .github/workflows/docs.yml. mdbook build + cloudflare pages deploy. domain shipctl.dev wired up. preview deploys on PRs."
+ },
+ {
+ "id": "sess-010",
+ "timestamp": "2026-04-17T11:00:00Z",
+ "content": "[user] release pipeline. cut v0.3.0 with binaries for linux/macos/windows\n[assistant] new .github/workflows/release.yml. cross-rs for static linux, native for macos + windows. uploads to gh release + crates.io publish. tag-triggered. PR #19 has the workflow."
+ },
+ {
+ "id": "sess-011",
+ "timestamp": "2026-04-08T17:00:00Z",
+ "content": "[user] standup notes: shipped #11 auth fix this morning. starting on docker arm64 next."
+ },
+ {
+ "id": "sess-012",
+ "timestamp": "2026-04-10T18:30:00Z",
+ "content": "[user] preferences: I like rust analyzer over rls. always run cargo clippy before commit. prefer 2-space indent in yaml but tab in rust."
+ },
+ {
+ "id": "sess-013",
+ "timestamp": "2026-04-15T19:00:00Z",
+ "content": "[assistant] reviewed Aria's PR #18 (schema migration). flagged race condition in dual-write step when two writers hit same row. Aria added per-row advisory lock. lgtm now."
+ },
+ {
+ "id": "sess-014",
+ "timestamp": "2026-04-16T20:10:00Z",
+ "content": "[user] post-mortem from prod incident last week: SHIPCTL_TOKEN was unset in staging, fell back to bad SC_TOKEN which had wrong perms. delivery delayed 40min. action items: (1) precedence test (done in #11), (2) startup validation, (3) alert on auth fallback."
+ },
+ {
+ "id": "sess-015",
+ "timestamp": "2026-04-17T16:45:00Z",
+ "content": "[user] preferences: stick to async-std not tokio for new code. team agreed in arch review."
+ }
+]
diff --git a/eval/runner/adapters/agentmemory.ts b/eval/runner/adapters/agentmemory.ts
new file mode 100644
index 00000000..38028a7d
--- /dev/null
+++ b/eval/runner/adapters/agentmemory.ts
@@ -0,0 +1,93 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface AgentMemoryState {
+ baseUrl: string;
+ secret?: string;
+ sessions: Session[];
+ observationToSession: Map;
+}
+
+interface RememberResponse {
+ memory?: { id?: string };
+ observationId?: string;
+ id?: string;
+ observation?: { id?: string };
+}
+
+interface SmartSearchResponse {
+ results?: Array<{
+ obsId?: string;
+ id?: string;
+ observationId?: string;
+ sessionId?: string;
+ score?: number;
+ content?: string;
+ }>;
+ observations?: Array<{
+ obsId?: string;
+ id?: string;
+ sessionId?: string;
+ score?: number;
+ content?: string;
+ }>;
+}
+
+function authHeaders(secret?: string): Record {
+ const h: Record = { "Content-Type": "application/json" };
+ if (secret) h.Authorization = `Bearer ${secret}`;
+ return h;
+}
+
+export const agentmemoryAdapter: Adapter = {
+ name: "agentmemory-hybrid",
+ async init(sessions, config) {
+ const baseUrl = (config?.baseUrl as string) ?? process.env.AGENTMEMORY_BASE_URL ?? "http://localhost:3111";
+ const secret = (config?.secret as string) ?? process.env.AGENTMEMORY_SECRET;
+ const observationToSession = new Map();
+ for (const s of sessions) {
+ const res = await fetch(`${baseUrl}/agentmemory/remember`, {
+ method: "POST",
+ headers: authHeaders(secret),
+ body: JSON.stringify({
+ content: s.content,
+ type: "eval-session",
+ concepts: [s.id],
+ }),
+ });
+ if (!res.ok) {
+ throw new Error(`remember failed for ${s.id}: ${res.status} ${await res.text()}`);
+ }
+ const body = (await res.json()) as RememberResponse;
+ const obsId =
+ body.memory?.id ?? body.observationId ?? body.id ?? body.observation?.id;
+ if (obsId) observationToSession.set(obsId, s.id);
+ }
+ return { baseUrl, secret, sessions, observationToSession };
+ },
+ async query(q, state, k) {
+ const res = await fetch(`${state.baseUrl}/agentmemory/smart-search`, {
+ method: "POST",
+ headers: authHeaders(state.secret),
+ body: JSON.stringify({ query: q, limit: Math.max(k * 10, 50) }),
+ });
+ if (!res.ok) {
+ throw new Error(`smart-search failed: ${res.status} ${await res.text()}`);
+ }
+ const body = (await res.json()) as SmartSearchResponse;
+ const rows = body.results ?? body.observations ?? [];
+ const ranked: RankedDoc[] = [];
+ const seen = new Set();
+ for (const row of rows) {
+ let sessionId = row.sessionId;
+ if (!sessionId) {
+ const memId = row.obsId ?? row.id ?? row.observationId;
+ sessionId = memId ? state.observationToSession.get(memId) : undefined;
+ }
+ if (!sessionId || seen.has(sessionId)) continue;
+ seen.add(sessionId);
+ ranked.push({ sessionId, score: row.score ?? 0 });
+ if (ranked.length >= k) break;
+ }
+ return ranked;
+ },
+};
diff --git a/eval/runner/adapters/grep.ts b/eval/runner/adapters/grep.ts
new file mode 100644
index 00000000..28b18ea6
--- /dev/null
+++ b/eval/runner/adapters/grep.ts
@@ -0,0 +1,36 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface GrepState {
+ sessions: Session[];
+}
+
+function tokenize(s: string): string[] {
+ return s
+ .toLowerCase()
+ .replace(/[^a-z0-9_]+/g, " ")
+ .split(/\s+/)
+ .filter((t) => t.length > 2);
+}
+
+export const grepAdapter: Adapter = {
+ name: "grep",
+ async init(sessions) {
+ return { sessions };
+ },
+ async query(q, state, k) {
+ const terms = tokenize(q);
+ const scored: RankedDoc[] = [];
+ for (const s of state.sessions) {
+ const body = s.content.toLowerCase();
+ let hits = 0;
+ for (const t of terms) {
+ if (body.includes(t)) hits += 1;
+ }
+ if (hits > 0) {
+ scored.push({ sessionId: s.id, score: hits });
+ }
+ }
+ scored.sort((a, b) => b.score - a.score);
+ return scored.slice(0, k);
+ },
+};
diff --git a/eval/runner/adapters/vector.ts b/eval/runner/adapters/vector.ts
new file mode 100644
index 00000000..c40e414d
--- /dev/null
+++ b/eval/runner/adapters/vector.ts
@@ -0,0 +1,108 @@
+import type { Adapter, RankedDoc, Session } from "../types.js";
+
+interface VectorState {
+ sessions: Session[];
+ embeddings: Float32Array[];
+}
+
+const OPENAI_URL = "https://api.openai.com/v1/embeddings";
+const MODEL = "text-embedding-3-small";
+const DIM = 1536;
+
+async function embed(text: string, apiKey: string): Promise {
+ const res = await fetch(OPENAI_URL, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: `Bearer ${apiKey}`,
+ },
+ body: JSON.stringify({ input: text, model: MODEL }),
+ });
+ if (!res.ok) {
+ throw new Error(`OpenAI embed failed: ${res.status} ${await res.text()}`);
+ }
+ const data = (await res.json()) as { data: Array<{ embedding: number[] }> };
+ return Float32Array.from(data.data[0].embedding);
+}
+
+async function embedBatch(texts: string[], apiKey: string): Promise {
+ const res = await fetch(OPENAI_URL, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: `Bearer ${apiKey}`,
+ },
+ body: JSON.stringify({ input: texts, model: MODEL }),
+ });
+ if (!res.ok) {
+ throw new Error(`OpenAI batch embed failed: ${res.status} ${await res.text()}`);
+ }
+ const data = (await res.json()) as { data: Array<{ embedding: number[]; index: number }> };
+ if (!Array.isArray(data.data) || data.data.length !== texts.length) {
+ throw new Error(
+ `OpenAI batch embed: expected ${texts.length} embeddings, got ${data.data?.length ?? 0}`,
+ );
+ }
+ const out = new Array(texts.length);
+ for (const row of data.data) {
+ if (
+ !Number.isInteger(row.index) ||
+ row.index < 0 ||
+ row.index >= texts.length ||
+ out[row.index] !== undefined
+ ) {
+ throw new Error(`OpenAI batch embed: invalid or duplicate index ${row.index}`);
+ }
+ if (!Array.isArray(row.embedding) || row.embedding.length === 0) {
+ throw new Error(`OpenAI batch embed: empty embedding at index ${row.index}`);
+ }
+ out[row.index] = Float32Array.from(row.embedding);
+ }
+ return out;
+}
+
+function cosine(a: Float32Array, b: Float32Array): number {
+ let dot = 0;
+ let na = 0;
+ let nb = 0;
+ for (let i = 0; i < a.length; i++) {
+ dot += a[i] * b[i];
+ na += a[i] * a[i];
+ nb += b[i] * b[i];
+ }
+ const denom = Math.sqrt(na) * Math.sqrt(nb);
+ return denom === 0 ? 0 : dot / denom;
+}
+
+export const vectorAdapter: Adapter = {
+ name: "vector",
+ async init(sessions) {
+ const apiKey = process.env.OPENAI_API_KEY;
+ if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter");
+ const embeddings: Float32Array[] = new Array(sessions.length);
+ const BATCH = 50;
+ for (let i = 0; i < sessions.length; i += BATCH) {
+ const batch = sessions.slice(i, i + BATCH);
+ const vecs = await embedBatch(
+ batch.map((s) => s.content.slice(0, 8000)),
+ apiKey,
+ );
+ for (let j = 0; j < vecs.length; j++) embeddings[i + j] = vecs[j];
+ }
+ if (embeddings.length > 0 && embeddings[0].length !== DIM) {
+ throw new Error(`unexpected embedding dim: ${embeddings[0].length}`);
+ }
+ return { sessions, embeddings };
+ },
+ async query(q, state, k) {
+ const apiKey = process.env.OPENAI_API_KEY;
+ if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter");
+ const qvec = await embed(q, apiKey);
+ const scored: RankedDoc[] = state.sessions.map((s, i) => ({
+ sessionId: s.id,
+ score: cosine(qvec, state.embeddings[i]),
+ }));
+ scored.sort((a, b) => b.score - a.score);
+ return scored.slice(0, k);
+ },
+};
diff --git a/eval/runner/coding-life.ts b/eval/runner/coding-life.ts
new file mode 100644
index 00000000..753ca87f
--- /dev/null
+++ b/eval/runner/coding-life.ts
@@ -0,0 +1,101 @@
+import { readFileSync, existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { parseArgs } from "node:util";
+import { agentmemoryAdapter } from "./adapters/agentmemory.js";
+import { grepAdapter } from "./adapters/grep.js";
+import { vectorAdapter } from "./adapters/vector.js";
+import { aggregate, scoreQuestion } from "./score.js";
+import type { Adapter, Question, ScoreRow, Session } from "./types.js";
+
+const ADAPTERS: Record = {
+ grep: grepAdapter as unknown as Adapter,
+ vector: vectorAdapter as unknown as Adapter,
+ agentmemory: agentmemoryAdapter as unknown as Adapter,
+};
+
+interface CliOptions {
+ data: string;
+ adapters: string;
+ k: string;
+ out: string;
+}
+
+function parse(): CliOptions {
+ const { values } = parseArgs({
+ options: {
+ data: { type: "string", default: "eval/data/coding-agent-life-v1" },
+ adapters: { type: "string", default: "grep,vector,agentmemory" },
+ k: { type: "string", default: "5" },
+ out: { type: "string", default: "eval/reports/coding-life" },
+ },
+ });
+ return values as unknown as CliOptions;
+}
+
+async function main(): Promise {
+ const opts = parse();
+ const k = Number(opts.k);
+ if (!Number.isInteger(k) || k <= 0) {
+ console.error(`--k must be a positive integer, got: ${opts.k}`);
+ process.exit(2);
+ }
+ const sessions = JSON.parse(
+ readFileSync(resolve(opts.data, "sessions.json"), "utf8"),
+ ) as Session[];
+ const queriesRaw = JSON.parse(
+ readFileSync(resolve(opts.data, "queries.json"), "utf8"),
+ ) as Array>;
+ const questions: Question[] = queriesRaw.map((q) => ({ ...q, haystack: sessions }));
+ const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean);
+ for (const a of adapterNames) {
+ if (!ADAPTERS[a]) {
+ console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`);
+ process.exit(2);
+ }
+ }
+ console.log(
+ `loaded ${sessions.length} sessions, ${questions.length} queries, adapters: ${adapterNames.join(",")}, k=${k}`,
+ );
+
+ const outDir = resolve(opts.out);
+ mkdirSync(outDir, { recursive: true });
+ const ndjsonPath = `${outDir}/scores.ndjson`;
+ if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, "");
+
+ const rows: ScoreRow[] = [];
+ for (const adapterName of adapterNames) {
+ const adapter = ADAPTERS[adapterName];
+ console.log(`\n== ${adapter.name} ==`);
+ const state = await adapter.init(sessions);
+ try {
+ for (const q of questions) {
+ const t0 = performance.now();
+ const ranked = await adapter.query(q.question, state, k);
+ const latencyMs = performance.now() - t0;
+ const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs);
+ rows.push(row);
+ appendFileSync(ndjsonPath, JSON.stringify(row) + "\n");
+ const mark = row.hit ? "+" : "-";
+ console.log(
+ ` ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`,
+ );
+ }
+ } finally {
+ if (adapter.teardown) await adapter.teardown(state);
+ }
+ }
+
+ const agg = aggregate(rows);
+ writeFileSync(`${outDir}/summary.json`, JSON.stringify(agg, null, 2));
+ console.log("\n=== Summary ===");
+ for (const [adapter, stats] of Object.entries(agg.byAdapter)) {
+ console.log(
+ ` ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`,
+ );
+ }
+}
+
+main().catch((err) => {
+ console.error(err);
+ process.exit(1);
+});
diff --git a/eval/runner/load.ts b/eval/runner/load.ts
new file mode 100644
index 00000000..aece2452
--- /dev/null
+++ b/eval/runner/load.ts
@@ -0,0 +1,54 @@
+import { readFileSync } from "node:fs";
+import type { Question, Session } from "./types.js";
+
+interface LongMemEvalRaw {
+ question_id: string;
+ question_type: string;
+ question: string;
+ answer?: string;
+ answer_session_ids: string[];
+ haystack_session_ids: string[];
+ haystack_sessions: Array>;
+}
+
+function flattenSession(turns: Array<{ role: string; content: string }>): string {
+ return turns.map((t) => `[${t.role}] ${t.content}`).join("\n\n");
+}
+
+export function loadLongMemEval(path: string, limit?: number): Question[] {
+ const raw = JSON.parse(readFileSync(path, "utf8")) as LongMemEvalRaw[];
+ const slice = typeof limit === "number" ? raw.slice(0, limit) : raw;
+ const questions: Question[] = [];
+ for (const r of slice) {
+ if (r.haystack_session_ids.length !== r.haystack_sessions.length) {
+ throw new Error(
+ `LongMemEval row ${r.question_id}: haystack_session_ids (${r.haystack_session_ids.length}) and haystack_sessions (${r.haystack_sessions.length}) length mismatch`,
+ );
+ }
+ const haystack: Session[] = r.haystack_session_ids.map((id, i) => ({
+ id,
+ content: flattenSession(r.haystack_sessions[i]),
+ }));
+ questions.push({
+ id: r.question_id,
+ type: r.question_type,
+ question: r.question,
+ answer: r.answer,
+ goldSessionIds: r.answer_session_ids,
+ haystack,
+ });
+ }
+ return questions;
+}
+
+export function stratifySample(questions: Question[], perType: number): Question[] {
+ const buckets: Record = {};
+ for (const q of questions) {
+ (buckets[q.type] ??= []).push(q);
+ }
+ const out: Question[] = [];
+ for (const type of Object.keys(buckets).sort()) {
+ out.push(...buckets[type].slice(0, perType));
+ }
+ return out;
+}
diff --git a/eval/runner/longmemeval.ts b/eval/runner/longmemeval.ts
new file mode 100644
index 00000000..a906fa21
--- /dev/null
+++ b/eval/runner/longmemeval.ts
@@ -0,0 +1,126 @@
+import { existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { parseArgs } from "node:util";
+import { agentmemoryAdapter } from "./adapters/agentmemory.js";
+import { grepAdapter } from "./adapters/grep.js";
+import { vectorAdapter } from "./adapters/vector.js";
+import { loadLongMemEval, stratifySample } from "./load.js";
+import { aggregate, scoreQuestion } from "./score.js";
+import type { Adapter, ScoreRow } from "./types.js";
+
+const ADAPTERS: Record = {
+ grep: grepAdapter as unknown as Adapter,
+ vector: vectorAdapter as unknown as Adapter,
+ agentmemory: agentmemoryAdapter as unknown as Adapter,
+};
+
+interface CliOptions {
+ data: string;
+ adapters: string;
+ k: string;
+ limit?: string;
+ stratify?: string;
+ out: string;
+}
+
+function parse(): CliOptions {
+ const { values } = parseArgs({
+ options: {
+ data: { type: "string", default: process.env.LONGMEMEVAL_PATH ?? "" },
+ adapters: { type: "string", default: "grep,vector,agentmemory" },
+ k: { type: "string", default: "5" },
+ limit: { type: "string" },
+ stratify: { type: "string" },
+ out: { type: "string", default: "eval/reports/longmemeval" },
+ },
+ });
+ return values as unknown as CliOptions;
+}
+
+async function main(): Promise {
+ const opts = parse();
+ if (!opts.data) {
+ console.error("--data required (or LONGMEMEVAL_PATH env)");
+ process.exit(2);
+ }
+ const k = Number(opts.k);
+ if (!Number.isInteger(k) || k <= 0) {
+ console.error(`--k must be a positive integer, got: ${opts.k}`);
+ process.exit(2);
+ }
+ let limit: number | undefined;
+ if (opts.limit !== undefined) {
+ limit = Number(opts.limit);
+ if (!Number.isInteger(limit) || limit <= 0) {
+ console.error(`--limit must be a positive integer, got: ${opts.limit}`);
+ process.exit(2);
+ }
+ }
+ let perType: number | undefined;
+ if (opts.stratify !== undefined) {
+ perType = Number(opts.stratify);
+ if (!Number.isInteger(perType) || perType <= 0) {
+ console.error(`--stratify must be a positive integer, got: ${opts.stratify}`);
+ process.exit(2);
+ }
+ }
+ const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean);
+ for (const a of adapterNames) {
+ if (!ADAPTERS[a]) {
+ console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`);
+ process.exit(2);
+ }
+ }
+ let questions = loadLongMemEval(resolve(opts.data), limit);
+ if (perType) questions = stratifySample(questions, perType);
+ console.log(
+ `loaded ${questions.length} questions, adapters: ${adapterNames.join(",")}, k=${k}`,
+ );
+
+ const outDir = resolve(opts.out);
+ mkdirSync(outDir, { recursive: true });
+ const ndjsonPath = `${outDir}/scores.ndjson`;
+ if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, "");
+ mkdirSync(dirname(ndjsonPath), { recursive: true });
+
+ const rows: ScoreRow[] = [];
+ for (const adapterName of adapterNames) {
+ const adapter = ADAPTERS[adapterName];
+ console.log(`\n== ${adapter.name} ==`);
+ for (const q of questions) {
+ const t0 = performance.now();
+ const state = await adapter.init(q.haystack);
+ try {
+ const ranked = await adapter.query(q.question, state, k);
+ const latencyMs = performance.now() - t0;
+ const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs);
+ rows.push(row);
+ appendFileSync(ndjsonPath, JSON.stringify(row) + "\n");
+ const mark = row.hit ? "+" : "-";
+ console.log(
+ ` ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`,
+ );
+ } finally {
+ if (adapter.teardown) await adapter.teardown(state);
+ }
+ }
+ }
+
+ const agg = aggregate(rows);
+ const summaryPath = `${outDir}/summary.json`;
+ writeFileSync(summaryPath, JSON.stringify(agg, null, 2));
+
+ console.log("\n=== Summary ===");
+ for (const [adapter, stats] of Object.entries(agg.byAdapter)) {
+ console.log(
+ ` ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`,
+ );
+ }
+ console.log(`\nwrote ${ndjsonPath}`);
+ console.log(`wrote ${summaryPath}`);
+}
+
+main().catch((err) => {
+ console.error(err);
+ process.exit(1);
+});
diff --git a/eval/runner/score.ts b/eval/runner/score.ts
new file mode 100644
index 00000000..b21d30ca
--- /dev/null
+++ b/eval/runner/score.ts
@@ -0,0 +1,78 @@
+import type { Question, RankedDoc, ScoreRow } from "./types.js";
+
+export function scoreQuestion(
+ q: Question,
+ ranked: RankedDoc[],
+ k: number,
+ adapter: string,
+ latencyMs: number,
+): ScoreRow {
+ const topK = ranked.slice(0, k).map((r) => r.sessionId);
+ const gold = new Set(q.goldSessionIds);
+ const hits = topK.filter((id) => gold.has(id)).length;
+ const precisionAtK = k > 0 ? hits / k : 0;
+ const recallAtK = gold.size === 0 ? 0 : hits / gold.size;
+ const hit = hits > 0;
+ let topGoldRank: number | null = null;
+ for (let i = 0; i < ranked.length; i++) {
+ if (gold.has(ranked[i].sessionId)) {
+ topGoldRank = i + 1;
+ break;
+ }
+ }
+ return {
+ questionId: q.id,
+ questionType: q.type,
+ adapter,
+ k,
+ precisionAtK,
+ recallAtK,
+ hit,
+ topGoldRank,
+ latencyMs,
+ };
+}
+
+export function aggregate(rows: ScoreRow[]): {
+ byAdapter: Record;
+ byType: Record>;
+} {
+ const byAdapter: Record<
+ string,
+ { p: number; r: number; hit: number; n: number; latencyP50: number }
+ > = {};
+ const latencies: Record = {};
+ for (const r of rows) {
+ const a = (byAdapter[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0, latencyP50: 0 });
+ a.p += r.precisionAtK;
+ a.r += r.recallAtK;
+ a.hit += r.hit ? 1 : 0;
+ a.n += 1;
+ (latencies[r.adapter] ??= []).push(r.latencyMs);
+ }
+ for (const adapter of Object.keys(byAdapter)) {
+ const a = byAdapter[adapter];
+ a.p = a.p / a.n;
+ a.r = a.r / a.n;
+ const sorted = latencies[adapter].slice().sort((x, y) => x - y);
+ a.latencyP50 = sorted[Math.floor(sorted.length / 2)] ?? 0;
+ }
+ const byType: Record> =
+ {};
+ for (const r of rows) {
+ const t = (byType[r.questionType] ??= {});
+ const a = (t[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0 });
+ a.p += r.precisionAtK;
+ a.r += r.recallAtK;
+ a.hit += r.hit ? 1 : 0;
+ a.n += 1;
+ }
+ for (const t of Object.keys(byType)) {
+ for (const adapter of Object.keys(byType[t])) {
+ const a = byType[t][adapter];
+ a.p = a.p / a.n;
+ a.r = a.r / a.n;
+ }
+ }
+ return { byAdapter, byType };
+}
diff --git a/eval/runner/types.ts b/eval/runner/types.ts
new file mode 100644
index 00000000..e72a6408
--- /dev/null
+++ b/eval/runner/types.ts
@@ -0,0 +1,38 @@
+export interface Session {
+ id: string;
+ timestamp?: string;
+ content: string;
+}
+
+export interface Question {
+ id: string;
+ type: string;
+ question: string;
+ answer?: string;
+ goldSessionIds: string[];
+ haystack: Session[];
+}
+
+export interface RankedDoc {
+ sessionId: string;
+ score: number;
+}
+
+export interface Adapter {
+ name: string;
+ init(sessions: Session[], config?: Record): Promise;
+ query(q: string, state: State, k: number): Promise;
+ teardown?(state: State): Promise;
+}
+
+export interface ScoreRow {
+ questionId: string;
+ questionType: string;
+ adapter: string;
+ k: number;
+ precisionAtK: number;
+ recallAtK: number;
+ hit: boolean;
+ topGoldRank: number | null;
+ latencyMs: number;
+}
diff --git a/eval/scripts/sandbox.sh b/eval/scripts/sandbox.sh
new file mode 100755
index 00000000..5d402330
--- /dev/null
+++ b/eval/scripts/sandbox.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# Boot a sandboxed agentmemory + iii-engine on alt ports with a clean data dir,
+# so eval runs aren't polluted by (and don't pollute) your real ~/.agentmemory.
+# Source it: `source eval/scripts/sandbox.sh` then run eval scripts;
+# the sandbox is torn down on EXIT.
+
+set -euo pipefail
+
+SANDBOX_ROOT="${SANDBOX_ROOT:-/tmp/agentmemory-eval-sandbox}"
+SANDBOX_PORT="${SANDBOX_PORT:-3411}"
+SANDBOX_STREAM_PORT="${SANDBOX_STREAM_PORT:-3412}"
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+if ! command -v iii >/dev/null 2>&1; then
+ echo "iii binary not on PATH. Install pinned version:"
+ echo " curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin"
+ exit 1
+fi
+
+iii_ver=$(iii --version 2>&1 | head -1)
+if [[ "$iii_ver" != "0.11.2" ]]; then
+ echo "warning: iii version on PATH is $iii_ver; agentmemory pins 0.11.2"
+fi
+
+if [[ ! -f "$REPO_ROOT/dist/index.mjs" ]]; then
+ echo "dist/ missing. Run: npm run build" >&2
+ exit 1
+fi
+
+if [[ -z "${SANDBOX_ROOT:-}" || "$SANDBOX_ROOT" == "/" || "$SANDBOX_ROOT" != /tmp/* ]]; then
+ echo "refusing to wipe SANDBOX_ROOT='$SANDBOX_ROOT' — must be non-empty and under /tmp/" >&2
+ exit 1
+fi
+rm -rf "$SANDBOX_ROOT"
+mkdir -p "$SANDBOX_ROOT/data" "$SANDBOX_ROOT/.agentmemory"
+
+cat > "$SANDBOX_ROOT/iii-config.yaml" < "$SANDBOX_ROOT/iii.log" 2>&1 &
+SANDBOX_PID=$!
+
+cleanup() {
+ echo "tearing down sandbox (pid $SANDBOX_PID)"
+ kill "$SANDBOX_PID" 2>/dev/null || true
+ sleep 1
+ kill -9 "$SANDBOX_PID" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# wait for livez
+for i in $(seq 1 30); do
+ if curl -sS --max-time 1 "http://localhost:$SANDBOX_PORT/agentmemory/livez" 2>/dev/null | grep -q '"status":"ok"'; then
+ export AGENTMEMORY_BASE_URL="http://localhost:$SANDBOX_PORT"
+ echo "sandbox ready: $AGENTMEMORY_BASE_URL"
+ echo " state: $SANDBOX_ROOT/data/"
+ echo " logs: $SANDBOX_ROOT/iii.log"
+ return 0 2>/dev/null || exit 0
+ fi
+ sleep 1
+done
+
+echo "sandbox failed to come up within 30s. last log lines:" >&2
+tail -10 "$SANDBOX_ROOT/iii.log" >&2
+exit 1
diff --git a/integrations/hermes/plugin.yaml b/integrations/hermes/plugin.yaml
index b4f32151..9ea5cb98 100644
--- a/integrations/hermes/plugin.yaml
+++ b/integrations/hermes/plugin.yaml
@@ -4,6 +4,9 @@ description: "Persistent cross-session memory for Hermes Agent via agentmemory.
author: "Rohit Ghumare"
homepage: "https://github.com/rohitg00/agentmemory"
hooks:
+ - prefetch
+ - sync_turn
- on_session_end
- on_pre_compress
- on_memory_write
+ - system_prompt_block
diff --git a/package.json b/package.json
index 820fc8f7..bc245a2f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@agentmemory/agentmemory",
- "version": "0.9.20",
+ "version": "0.9.21",
"description": "Persistent memory for AI coding agents, powered by iii-engine's three primitives",
"type": "module",
"main": "dist/index.mjs",
@@ -25,7 +25,9 @@
"test:watch": "vitest --exclude test/integration.test.ts",
"test:integration": "vitest run test/integration.test.ts",
"test:all": "vitest run",
- "bench:load": "node --import tsx benchmark/load-100k.ts"
+ "bench:load": "node --import tsx benchmark/load-100k.ts",
+ "eval:longmemeval": "tsx eval/runner/longmemeval.ts",
+ "eval:coding-life": "tsx eval/runner/coding-life.ts"
},
"keywords": [
"ai",
@@ -60,7 +62,7 @@
"@anthropic-ai/sdk": "^0.39.0",
"@clack/prompts": "^1.2.0",
"dotenv": "^17.4.2",
- "iii-sdk": "^0.11.2",
+ "iii-sdk": "0.11.2",
"zod": "^4.0.0"
},
"optionalDependencies": {
diff --git a/packages/mcp/package.json b/packages/mcp/package.json
index 403295dd..96da3ae4 100644
--- a/packages/mcp/package.json
+++ b/packages/mcp/package.json
@@ -1,6 +1,6 @@
{
"name": "@agentmemory/mcp",
- "version": "0.9.20",
+ "version": "0.9.21",
"description": "Standalone MCP server for agentmemory — thin shim that re-exposes @agentmemory/agentmemory's MCP entrypoint",
"type": "module",
"bin": {
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index a18860e4..e53f8088 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
{
"name": "agentmemory",
- "version": "0.9.20",
+ "version": "0.9.21",
"description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 51 MCP tools, 4 skills, real-time viewer.",
"author": {
"name": "Rohit Ghumare",
diff --git a/plugin/.codex-plugin/plugin.json b/plugin/.codex-plugin/plugin.json
index f8d676f6..0a7cc173 100644
--- a/plugin/.codex-plugin/plugin.json
+++ b/plugin/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
{
"name": "agentmemory",
- "version": "0.9.20",
+ "version": "0.9.21",
"description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 6 hooks, 51 MCP tools, 4 skills, real-time viewer.",
"author": {
"name": "Rohit Ghumare",
diff --git a/plugin/.mcp.copilot.json b/plugin/.mcp.copilot.json
new file mode 100644
index 00000000..01d03f7d
--- /dev/null
+++ b/plugin/.mcp.copilot.json
@@ -0,0 +1,14 @@
+{
+ "mcpServers": {
+ "agentmemory": {
+ "type": "local",
+ "command": "npx",
+ "args": ["-y", "@agentmemory/mcp"],
+ "env": {
+ "AGENTMEMORY_URL": "${AGENTMEMORY_URL}",
+ "AGENTMEMORY_SECRET": "${AGENTMEMORY_SECRET}"
+ },
+ "tools": ["*"]
+ }
+ }
+}
diff --git a/plugin/hooks/hooks.codex.json b/plugin/hooks/hooks.codex.json
index 73e43c66..d2c3a3b6 100644
--- a/plugin/hooks/hooks.codex.json
+++ b/plugin/hooks/hooks.codex.json
@@ -5,7 +5,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs",
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\"",
"statusMessage": "agentmemory: loading session context"
}
]
@@ -16,7 +16,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs",
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\"",
"statusMessage": "agentmemory: recalling relevant memories"
}
]
@@ -28,7 +28,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\""
}
]
}
@@ -38,7 +38,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\""
}
]
}
@@ -48,7 +48,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\""
}
]
}
@@ -58,7 +58,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\""
}
]
}
diff --git a/plugin/hooks/hooks.copilot.json b/plugin/hooks/hooks.copilot.json
new file mode 100644
index 00000000..b7d09f8b
--- /dev/null
+++ b/plugin/hooks/hooks.copilot.json
@@ -0,0 +1,72 @@
+{
+ "version": 1,
+ "hooks": {
+ "sessionStart": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-start.mjs"
+ }
+ ],
+ "userPromptSubmitted": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/prompt-submit.mjs"
+ }
+ ],
+ "preToolUse": [
+ {
+ "type": "command",
+ "matcher": "edit|write|create|read|view|glob|grep",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+ }
+ ],
+ "postToolUse": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+ }
+ ],
+ "postToolUseFailure": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-failure.mjs"
+ }
+ ],
+ "preCompact": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+ }
+ ],
+ "agentStop": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/stop.mjs"
+ }
+ ],
+ "sessionEnd": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-end.mjs"
+ }
+ ],
+ "subagentStart": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-start.mjs"
+ }
+ ],
+ "subagentStop": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-stop.mjs"
+ }
+ ],
+ "notification": [
+ {
+ "type": "command",
+ "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/notification.mjs"
+ }
+ ]
+ }
+}
diff --git a/plugin/hooks/hooks.json b/plugin/hooks/hooks.json
index d60d664a..a13c9973 100644
--- a/plugin/hooks/hooks.json
+++ b/plugin/hooks/hooks.json
@@ -5,7 +5,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\""
}
]
}
@@ -15,7 +15,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\""
}
]
}
@@ -26,7 +26,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\""
}
]
}
@@ -36,7 +36,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\""
}
]
}
@@ -46,7 +46,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs\""
}
]
}
@@ -56,7 +56,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\""
}
]
}
@@ -66,7 +66,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs\""
}
]
}
@@ -76,7 +76,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs\""
}
]
}
@@ -86,7 +86,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs\""
}
]
}
@@ -96,7 +96,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs\""
}
]
}
@@ -106,7 +106,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\""
}
]
}
@@ -116,7 +116,7 @@
"hooks": [
{
"type": "command",
- "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs"
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs\""
}
]
}
diff --git a/plugin/plugin.json b/plugin/plugin.json
new file mode 100644
index 00000000..4dd30bb7
--- /dev/null
+++ b/plugin/plugin.json
@@ -0,0 +1,15 @@
+{
+ "name": "agentmemory",
+ "version": "0.9.21",
+ "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 53 MCP tools, 4 skills, real-time viewer.",
+ "author": {
+ "name": "Rohit Ghumare",
+ "url": "https://github.com/rohitg00"
+ },
+ "license": "Apache-2.0",
+ "homepage": "https://github.com/rohitg00/agentmemory",
+ "repository": "https://github.com/rohitg00/agentmemory",
+ "skills": "skills/",
+ "mcpServers": ".mcp.copilot.json",
+ "hooks": "hooks/hooks.copilot.json"
+}
diff --git a/plugin/scripts/notification.mjs b/plugin/scripts/notification.mjs
index a318848d..8ba2c9b0 100755
--- a/plugin/scripts/notification.mjs
+++ b/plugin/scripts/notification.mjs
@@ -22,8 +22,10 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- if (data.notification_type !== "permission_prompt") return;
- const sessionId = data.session_id || "unknown";
+ const notificationType = data.notification_type ?? data.notificationType;
+ if (notificationType !== "permission_prompt") return;
+ const rawSessionId = data.session_id ?? data.sessionId;
+ const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown";
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
@@ -35,7 +37,7 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
data: {
- notification_type: data.notification_type,
+ notification_type: notificationType,
title: data.title,
message: data.message
}
diff --git a/plugin/scripts/post-tool-failure.mjs b/plugin/scripts/post-tool-failure.mjs
index 3a593f3a..902a0930 100755
--- a/plugin/scripts/post-tool-failure.mjs
+++ b/plugin/scripts/post-tool-failure.mjs
@@ -22,8 +22,11 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- if (data.is_interrupt) return;
- const sessionId = data.session_id || "unknown";
+ if (data.is_interrupt || data.isInterrupt) return;
+ const sessionId = data.session_id || data.sessionId || "unknown";
+ const toolName = data.tool_name ?? data.toolName;
+ const toolInput = data.tool_input ?? data.toolArgs;
+ const error = data.error ?? data.errorMessage;
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
@@ -35,9 +38,9 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
data: {
- tool_name: data.tool_name,
- tool_input: typeof data.tool_input === "string" ? data.tool_input.slice(0, 4e3) : JSON.stringify(data.tool_input ?? "").slice(0, 4e3),
- error: typeof data.error === "string" ? data.error.slice(0, 4e3) : JSON.stringify(data.error ?? "").slice(0, 4e3)
+ tool_name: toolName,
+ tool_input: typeof toolInput === "string" ? toolInput.slice(0, 4e3) : JSON.stringify(toolInput ?? "").slice(0, 4e3),
+ error: typeof error === "string" ? error.slice(0, 4e3) : JSON.stringify(error ?? "").slice(0, 4e3)
}
}),
signal: AbortSignal.timeout(3e3)
diff --git a/plugin/scripts/post-tool-use.mjs b/plugin/scripts/post-tool-use.mjs
index 5ebec645..68a78ef7 100755
--- a/plugin/scripts/post-tool-use.mjs
+++ b/plugin/scripts/post-tool-use.mjs
@@ -22,8 +22,10 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
- const { imageData, cleanOutput } = extractImageData(data.tool_output);
+ const sessionId = data.session_id || data.sessionId || "unknown";
+ const toolName = data.tool_name ?? data.toolName;
+ const toolInput = data.tool_input ?? data.toolArgs;
+ const { imageData, cleanOutput } = extractImageData(toolOutput(data));
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
@@ -35,8 +37,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
data: {
- tool_name: data.tool_name,
- tool_input: data.tool_input,
+ tool_name: toolName,
+ tool_input: toolInput,
tool_output: truncate(cleanOutput, 8e3),
...imageData ? { image_data: imageData } : {}
}
@@ -45,6 +47,16 @@ async function main() {
});
} catch {}
}
+function toolOutput(data) {
+ if (data.tool_response !== void 0) return data.tool_response;
+ if (data.tool_output !== void 0) return data.tool_output;
+ const result = data.tool_result ?? data.toolResult;
+ if (typeof result === "object" && result !== null) {
+ const obj = result;
+ return obj.text_result_for_llm ?? obj.textResultForLlm ?? result;
+ }
+ return result;
+}
function isBase64Image(val) {
return typeof val === "string" && (val.startsWith("data:image/") || val.startsWith("iVBORw0KGgo") || val.startsWith("/9j/"));
}
diff --git a/plugin/scripts/pre-compact.mjs b/plugin/scripts/pre-compact.mjs
index bff9e7fa..b68bf025 100755
--- a/plugin/scripts/pre-compact.mjs
+++ b/plugin/scripts/pre-compact.mjs
@@ -22,7 +22,7 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
const project = data.cwd || process.cwd();
if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") try {
await fetch(`${REST_URL}/agentmemory/claude-bridge/sync`, {
diff --git a/plugin/scripts/pre-tool-use.mjs b/plugin/scripts/pre-tool-use.mjs
index 561b6b0d..16892fcd 100755
--- a/plugin/scripts/pre-tool-use.mjs
+++ b/plugin/scripts/pre-tool-use.mjs
@@ -24,18 +24,22 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const toolName = data.tool_name;
+ const toolName = typeof data.tool_name === "string" ? data.tool_name : typeof data.toolName === "string" ? data.toolName : void 0;
if (!toolName) return;
+ const normalizedToolName = toolName.toLowerCase();
if (![
- "Edit",
- "Write",
- "Read",
- "Glob",
- "Grep"
- ].includes(toolName)) return;
- const toolInput = data.tool_input || {};
+ "edit",
+ "write",
+ "create",
+ "read",
+ "view",
+ "glob",
+ "grep"
+ ].includes(normalizedToolName)) return;
+ const rawToolInput = data.tool_input ?? data.toolArgs;
+ const toolInput = typeof rawToolInput === "object" && rawToolInput !== null && !Array.isArray(rawToolInput) ? rawToolInput : {};
const files = [];
- const fileKeys = toolName === "Grep" ? ["path", "file"] : [
+ const fileKeys = normalizedToolName === "grep" ? ["path", "file"] : [
"file_path",
"path",
"file",
@@ -47,11 +51,12 @@ async function main() {
}
if (files.length === 0) return;
const terms = [];
- if (toolName === "Grep" || toolName === "Glob") {
+ if (normalizedToolName === "grep" || normalizedToolName === "glob") {
const pattern = toolInput["pattern"];
if (typeof pattern === "string" && pattern.length > 0) terms.push(pattern);
}
- const sessionId = data.session_id || "unknown";
+ const rawSessionId = data.session_id || data.sessionId;
+ const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown";
try {
const res = await fetch(`${REST_URL}/agentmemory/enrich`, {
method: "POST",
diff --git a/plugin/scripts/prompt-submit.mjs b/plugin/scripts/prompt-submit.mjs
index 18aa040a..a8a61192 100755
--- a/plugin/scripts/prompt-submit.mjs
+++ b/plugin/scripts/prompt-submit.mjs
@@ -22,7 +22,7 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
@@ -33,7 +33,7 @@ async function main() {
project: data.cwd || process.cwd(),
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
- data: { prompt: data.prompt }
+ data: { prompt: data.prompt ?? data.userPrompt }
}),
signal: AbortSignal.timeout(3e3)
});
diff --git a/plugin/scripts/session-end.mjs b/plugin/scripts/session-end.mjs
index 8e1de092..7707e357 100755
--- a/plugin/scripts/session-end.mjs
+++ b/plugin/scripts/session-end.mjs
@@ -22,7 +22,7 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/session/end`, {
method: "POST",
diff --git a/plugin/scripts/session-start.mjs b/plugin/scripts/session-start.mjs
index 9e573e24..f1ec1be6 100755
--- a/plugin/scripts/session-start.mjs
+++ b/plugin/scripts/session-start.mjs
@@ -25,7 +25,7 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || `ses_${Date.now().toString(36)}`;
+ const sessionId = data.session_id || data.sessionId || `ses_${Date.now().toString(36)}`;
const project = data.cwd || process.cwd();
const url = `${REST_URL}/agentmemory/session/start`;
const init = {
diff --git a/plugin/scripts/stop.mjs b/plugin/scripts/stop.mjs
index e0ffa350..3fe5cb36 100755
--- a/plugin/scripts/stop.mjs
+++ b/plugin/scripts/stop.mjs
@@ -22,7 +22,7 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/summarize`, {
method: "POST",
diff --git a/plugin/scripts/subagent-start.mjs b/plugin/scripts/subagent-start.mjs
index db143459..c0d0b5eb 100755
--- a/plugin/scripts/subagent-start.mjs
+++ b/plugin/scripts/subagent-start.mjs
@@ -23,7 +23,9 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
+ const agentId = data.agent_id || data.agentName;
+ const agentType = data.agent_type || data.agentDisplayName || data.agentName;
fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
headers: authHeaders(),
@@ -34,8 +36,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
data: {
- agent_id: data.agent_id,
- agent_type: data.agent_type
+ agent_id: agentId,
+ agent_type: agentType
}
}),
signal: AbortSignal.timeout(TIMEOUT_MS)
diff --git a/plugin/scripts/subagent-stop.mjs b/plugin/scripts/subagent-stop.mjs
index 7ec66a7d..8765756d 100755
--- a/plugin/scripts/subagent-stop.mjs
+++ b/plugin/scripts/subagent-stop.mjs
@@ -22,7 +22,9 @@ async function main() {
return;
}
if (isSdkChildContext(data)) return;
- const sessionId = data.session_id || "unknown";
+ const sessionId = data.session_id || data.sessionId || "unknown";
+ const agentId = data.agent_id || data.agentName;
+ const agentType = data.agent_type || data.agentDisplayName || data.agentName;
const lastMsg = typeof data.last_assistant_message === "string" ? data.last_assistant_message.slice(0, 4e3) : "";
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -35,8 +37,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
data: {
- agent_id: data.agent_id,
- agent_type: data.agent_type,
+ agent_id: agentId,
+ agent_type: agentType,
last_message: lastMsg
}
}),
diff --git a/scripts/backfill-imported-sessions.sh b/scripts/backfill-imported-sessions.sh
new file mode 100755
index 00000000..a247a57e
--- /dev/null
+++ b/scripts/backfill-imported-sessions.sh
@@ -0,0 +1,259 @@
+#!/usr/bin/env bash
+# Backfill memory artifacts for sessions imported via `agentmemory import-jsonl`.
+#
+# The import path only persists Session + Observation rows (via synthetic,
+# zero-LLM compression) and the deterministic crystal/lesson derivation.
+# It does NOT call mem::summarize, so the semantic/procedural/reflect tiers
+# of the consolidation pipeline have nothing to roll up.
+#
+# This script walks every session tagged `jsonl-import` and:
+# 1. POSTs /agentmemory/summarize per session (LLM call)
+# 2. POSTs /agentmemory/consolidate-pipeline once at the end
+#
+# Graph extraction (/agentmemory/graph/extract) is intentionally skipped —
+# its API takes a per-observation payload, which is cost-prohibitive for
+# bulk imports. `reflect` falls back to a no-graph clustering mode.
+#
+# Usage:
+# scripts/backfill-imported-sessions.sh --dry-run
+# scripts/backfill-imported-sessions.sh --limit 5
+# scripts/backfill-imported-sessions.sh # process all
+
+set -euo pipefail
+
+URL="${AGENTMEMORY_URL:-http://localhost:3111}"
+DRY_RUN=0
+LIMIT=0 # 0 = no limit
+ONLY_TAG="jsonl-import"
+SKIP_CONSOLIDATE=0
+SKIP_AGENTS=0 # drop sessions whose project starts with "agent-"
+MAX_OBS=0 # 0 = no cap; skip sessions with more observations than this
+DEBUG_ON_ERROR=0 # on failure, dump session metadata + obs to DEBUG_DIR
+DEBUG_DIR="${AGENTMEMORY_DEBUG_DIR:-./agentmemory-debug}"
+PROJECT_PATTERN="" # jq test() regex against .project; "" means no filter
+
+# Cost-estimate knobs (defaults tuned for DeepSeek V4 Flash on DeepInfra:
+# $0.14 / 1M input, $0.28 / 1M output). Override via env if needed.
+COST_IN_PER_1M="${AGENTMEMORY_COST_IN_PER_1M:-0.14}"
+COST_OUT_PER_1M="${AGENTMEMORY_COST_OUT_PER_1M:-0.28}"
+# Rough token weight per compressed observation, derived from inspecting
+# real synthetic-compression payloads in the kv store (mostly 100-300 tok,
+# heavy-tailed). Override if your sessions are unusually verbose.
+TOKENS_PER_OBS="${AGENTMEMORY_TOKENS_PER_OBS:-200}"
+# Reserved per-call output budget (XML summary is small).
+TOKENS_OUT_PER_SESSION="${AGENTMEMORY_TOKENS_OUT_PER_SESSION:-500}"
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --dry-run) DRY_RUN=1; shift ;;
+ --limit) LIMIT="${2:?--limit needs a number}"; shift 2 ;;
+ --tag) ONLY_TAG="${2:?--tag needs a value (use empty string for all)}"; shift 2 ;;
+ --skip-consolidate) SKIP_CONSOLIDATE=1; shift ;;
+ --skip-agents) SKIP_AGENTS=1; shift ;;
+ --max-obs) MAX_OBS="${2:?--max-obs needs a number}"; shift 2 ;;
+ --debug-on-error) DEBUG_ON_ERROR=1; shift ;;
+ --project-pattern) PROJECT_PATTERN="${2:?--project-pattern needs a regex}"; shift 2 ;;
+ -h|--help)
+ sed -n '2,28p' "$0"
+ exit 0 ;;
+ *) echo "unknown flag: $1" >&2; exit 2 ;;
+ esac
+done
+
+for bin in curl jq; do
+ command -v "$bin" >/dev/null || { echo "missing dependency: $bin" >&2; exit 1; }
+done
+
+# Curl timeout profiles. Metadata reads (livez, sessions list, observations
+# pull for debug dumps) should fail fast and retry transient blips. The LLM
+# work calls (summarize, consolidate) intentionally have no --retry and a
+# wide --max-time: each call can legitimately take minutes for chunked
+# summarize on large sessions, and retrying a half-finished LLM job is
+# expensive both in dollars and in duplicated server-side work.
+META_CURL_OPTS=(--connect-timeout 10 --max-time 30 --retry 2 --retry-delay 1)
+WORK_CURL_OPTS=(--connect-timeout 10 --max-time 1800)
+
+echo "agentmemory backfill — server: $URL"
+[[ "$DRY_RUN" == 1 ]] && echo "DRY RUN: no POSTs will be made."
+
+# --- liveness ---
+if ! curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/livez" >/dev/null; then
+ echo "server not reachable at $URL (try: npx @agentmemory/agentmemory)" >&2
+ exit 1
+fi
+
+# --- collect session ids ---
+sessions_json="$(curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/sessions")"
+filter='.sessions[] | select(.status=="completed")'
+if [[ -n "$ONLY_TAG" ]]; then
+ filter+=" | select((.tags // []) | index(\"$ONLY_TAG\"))"
+fi
+if [[ "$SKIP_AGENTS" == 1 ]]; then
+ filter+=' | select((.project // "") | startswith("agent-") | not)'
+fi
+if [[ -n "$PROJECT_PATTERN" ]]; then
+ # jq's test() applies a regex against the project string.
+ filter+=" | select((.project // \"\") | test(\"$PROJECT_PATTERN\"))"
+fi
+if [[ "$MAX_OBS" -gt 0 ]]; then
+ filter+=" | select((.observationCount // 0) <= $MAX_OBS)"
+fi
+filter+=' | "\(.id)\t\(.observationCount // 0)\t\(.project // "")"'
+
+rows=()
+while IFS= read -r line; do
+ rows+=("$line")
+done < <(echo "$sessions_json" | jq -r "$filter")
+total="${#rows[@]}"
+
+if [[ "$total" -eq 0 ]]; then
+ echo "no sessions matched (tag='$ONLY_TAG'); nothing to do."
+ exit 0
+fi
+
+if [[ "$LIMIT" -gt 0 && "$LIMIT" -lt "$total" ]]; then
+ rows=("${rows[@]:0:$LIMIT}")
+fi
+
+echo "matched $total session(s); will process ${#rows[@]}."
+total_obs=0
+for row in "${rows[@]}"; do
+ obs="$(cut -f2 <<<"$row")"
+ total_obs=$(( total_obs + obs ))
+done
+est_in=$(( total_obs * TOKENS_PER_OBS + ${#rows[@]} * 500 ))
+est_out=$(( ${#rows[@]} * TOKENS_OUT_PER_SESSION ))
+est_cost="$(awk -v i="$est_in" -v o="$est_out" -v ci="$COST_IN_PER_1M" -v co="$COST_OUT_PER_1M" \
+ 'BEGIN { printf "%.2f", (i*ci + o*co) / 1000000 }')"
+
+echo "≈ ${#rows[@]} summarize LLM calls (one per session, covering $total_obs observations)"
+printf '≈ %d input tok + %d output tok → $%s (rates: in=$%s/1M out=$%s/1M, %s tok/obs)\n' \
+ "$est_in" "$est_out" "$est_cost" "$COST_IN_PER_1M" "$COST_OUT_PER_1M" "$TOKENS_PER_OBS"
+echo
+
+if [[ "$DRY_RUN" == 1 ]]; then
+ printf '%-40s %10s %s\n' "session" "obs" "project"
+ for row in "${rows[@]}"; do
+ id="$(cut -f1 <<<"$row")"
+ obs="$(cut -f2 <<<"$row")"
+ proj="$(cut -f3 <<<"$row")"
+ printf '%-40s %10s %s\n' "$id" "$obs" "$proj"
+ done
+ echo
+ echo "(dry run) next steps if you re-run without --dry-run:"
+ echo " for each session above: POST $URL/agentmemory/summarize {sessionId}"
+ if [[ "$SKIP_CONSOLIDATE" == 0 ]]; then
+ echo " then: POST $URL/agentmemory/consolidate-pipeline {}"
+ fi
+ exit 0
+fi
+
+# --- summarize loop ---
+if [[ "$DEBUG_ON_ERROR" == 1 ]]; then
+ mkdir -p "$DEBUG_DIR"
+ echo "debug mode: failed calls will dump to $DEBUG_DIR/"
+ echo
+fi
+
+dump_failure() {
+ local id="$1" obs="$2" resp="$3"
+ # Replace anything outside [A-Za-z0-9._-] with `_` before joining with
+ # DEBUG_DIR. Session IDs from the API are UUIDs in practice, but the
+ # server doesn't enforce that — a hostile or buggy id containing `/` or
+ # `..` would otherwise escape the debug directory.
+ local safe_id
+ safe_id="$(printf '%s' "$id" | tr -c 'A-Za-z0-9._-' '_')"
+ local file="$DEBUG_DIR/${safe_id}.json"
+ # Pull the raw observations (what would have gone into the prompt) so the
+ # operator can reconstruct the upstream payload locally. We also compute
+ # narrative size stats so size-related rejections are immediately visible.
+ # Stream observations through stdin (avoids exec-arg overflow on
+ # multi-thousand-obs sessions — macOS argv ceiling is ~256k).
+ # `--get --data-urlencode` percent-encodes the session id so special
+ # characters can't corrupt the query string.
+ curl -fsS "${META_CURL_OPTS[@]}" --get \
+ --data-urlencode "sessionId=$id" \
+ "$URL/agentmemory/observations" \
+ | jq \
+ --arg id "$id" \
+ --argjson obsCount "$obs" \
+ --arg url "$URL/agentmemory/summarize" \
+ --argjson response "$resp" \
+ '. as $root
+ | .observations as $obs
+ | {
+ sessionId: $id,
+ observationCount: $obsCount,
+ request: { url: $url, method: "POST", body: { sessionId: $id } },
+ response: $response,
+ observations: $obs,
+ stats: {
+ totalNarrativeBytes: ($obs | map(.narrative // "" | length) | add // 0),
+ maxNarrativeBytes: ($obs | map(.narrative // "" | length) | max // 0),
+ titleHistogram: ($obs | group_by(.title) | map({title: .[0].title, count: length}) | sort_by(-.count))
+ }
+ }' >"$file"
+ echo " → $file"
+}
+
+ok=0; skipped=0; failed=0
+i=0
+for row in "${rows[@]}"; do
+ i=$(( i + 1 ))
+ id="$(cut -f1 <<<"$row")"
+ obs="$(cut -f2 <<<"$row")"
+
+ body="$(jq -nc --arg id "$id" '{sessionId:$id}')"
+ resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/summarize" \
+ -H 'content-type: application/json' --data "$body" || echo '{"success":false,"error":"curl_failed"}')"
+ # iii's HTTP layer occasionally returns non-JSON (HTML 5xx, empty body
+ # on timeout, etc.). Validate before parsing so `set -e` doesn't abort
+ # the whole backfill loop on a single bad response.
+ if jq -e . >/dev/null 2>&1 <<<"$resp"; then
+ status="$(jq -r '.success // false' <<<"$resp")"
+ err="$(jq -r '.error // ""' <<<"$resp")"
+ title="$(jq -r '.summary.title // ""' <<<"$resp")"
+ else
+ status="false"
+ err="invalid_json_response"
+ title=""
+ fi
+
+ if [[ "$status" == "true" ]]; then
+ ok=$(( ok + 1 ))
+ printf '[%3d/%3d] OK %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$title"
+ elif [[ "$err" == "no_observations" || "$err" == "no_provider" ]]; then
+ skipped=$(( skipped + 1 ))
+ printf '[%3d/%3d] SKIP %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err"
+ else
+ failed=$(( failed + 1 ))
+ printf '[%3d/%3d] FAIL %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err"
+ [[ "$DEBUG_ON_ERROR" == 1 ]] && dump_failure "$id" "$obs" "$resp"
+ fi
+done
+
+echo
+echo "summarize: ok=$ok skipped=$skipped failed=$failed"
+
+# --- consolidate ---
+if [[ "$SKIP_CONSOLIDATE" == 1 ]]; then
+ echo "skipping consolidate-pipeline (--skip-consolidate)"
+ exit 0
+fi
+
+if [[ "$ok" -eq 0 ]]; then
+ echo "no summaries produced; skipping consolidate-pipeline."
+ exit 0
+fi
+
+echo
+echo "running consolidate-pipeline …"
+resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/consolidate-pipeline" \
+ -H 'content-type: application/json' --data '{}' || echo '{"success":false,"error":"curl_failed"}')"
+if jq -e . >/dev/null 2>&1 <<<"$resp"; then
+ echo "$resp" | jq .
+else
+ echo "consolidate-pipeline returned non-JSON (likely a timeout or upstream error):"
+ printf '%s\n' "$resp" | head -c 500
+ echo
+fi
diff --git a/src/cli.ts b/src/cli.ts
index 5eca18ce..d3d33855 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -117,8 +117,9 @@ Usage: agentmemory [command] [options]
Commands:
(default) Start agentmemory worker
init Copy bundled .env.example to ~/.agentmemory/.env if absent
- connect [agent] Wire agentmemory into an installed agent (claude-code, codex,
- cursor, gemini-cli, openclaw, hermes, pi, openhuman).
+ connect [agent] Wire agentmemory into an installed agent (claude-code,
+ copilot-cli, codex, cursor, gemini-cli, openclaw,
+ hermes, pi, openhuman).
No arg = interactive picker. --all wires every detected agent.
--dry-run shows what would change. --force re-installs.
status Show connection status, memory count, flags, and health
@@ -195,9 +196,36 @@ function getBaseUrl(): string {
return `http://localhost:${getRestPort()}`;
}
+let discoveredViewerPort: number | null = null;
+
+export async function discoverViewerPort(): Promise {
+ if (discoveredViewerPort !== null) return;
+ try {
+ const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, {
+ signal: AbortSignal.timeout(1000),
+ });
+ if (res.ok) {
+ const data = await res.json() as { viewerPort?: number | null };
+ if (typeof data.viewerPort === "number") {
+ discoveredViewerPort = data.viewerPort;
+ }
+ }
+ } catch {}
+}
+
function getViewerUrl(): string {
const envUrl = process.env["AGENTMEMORY_VIEWER_URL"];
if (envUrl) return envUrl.replace(/\/+$/, "");
+
+ if (discoveredViewerPort !== null) {
+ try {
+ const u = new URL(getBaseUrl());
+ return `${u.protocol}//${u.hostname}:${discoveredViewerPort}`;
+ } catch {
+ return `http://localhost:${discoveredViewerPort}`;
+ }
+ }
+
try {
const u = new URL(getBaseUrl());
const vPort =
@@ -257,7 +285,18 @@ async function isAgentmemoryReady(): Promise {
const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, {
signal: AbortSignal.timeout(2000),
});
- return res.ok;
+ if (!res.ok) return false;
+ try {
+ const data = await res.json() as { viewerPort?: number | null; viewerSkipped?: boolean };
+ if (typeof data.viewerPort === "number") {
+ discoveredViewerPort = data.viewerPort;
+ return true;
+ }
+ if (data.viewerSkipped) return true;
+ return false;
+ } catch {
+ return false;
+ }
} catch {
return false;
}
@@ -497,17 +536,8 @@ function detectIiiConsole(): IiiConsoleState {
return { kind: "missing" };
}
-// install.iii.dev/console/main/install.sh has a bug in its release-tag
-// filter that rejects every stable release for iii-hq/iii: the jq
-// predicate uses `startswith("v")` while the actual tags are
-// `iii/v0.12.0` (slash-prefixed). The `--next` path uses a regex
-// without the startswith constraint and therefore works today,
-// installing the most recent prerelease (e.g. iii/v0.14.0-next.1).
-//
-// Pass `--next` until the upstream fix lands (iii-hq/iii#1652).
-// Switch back to the bare invocation once the script is patched.
const III_CONSOLE_INSTALL_CMD =
- "curl -fsSL https://install.iii.dev/console/main/install.sh | bash -s -- --next";
+ "curl -fsSL https://install.iii.dev/console/main/install.sh | sh";
async function ensureIiiConsole(): Promise {
const state = detectIiiConsole();
@@ -1101,6 +1131,9 @@ async function runStatus() {
apiFetch(base, "config/flags"),
]);
+ if (typeof healthRes?.viewerPort === "number") {
+ discoveredViewerPort = healthRes.viewerPort;
+ }
const h = healthRes?.health;
const status = healthRes?.status || "unknown";
const version = healthRes?.version || "?";
@@ -1260,6 +1293,7 @@ function buildDoctorEffects(): DoctorEffects {
iiiBinaryVersion: (binPath: string) => iiiBinVersion(binPath),
viewerReachable: async (timeoutMs = 2000) => {
try {
+ await discoverViewerPort();
const res = await fetch(getViewerUrl(), {
signal: AbortSignal.timeout(timeoutMs),
});
@@ -1975,8 +2009,8 @@ async function runUpgrade() {
label: "Refreshing dependencies (pnpm install)",
});
requireSuccess(installOk, "pnpm install");
- runCommand(pnpmBin, ["up", "iii-sdk@latest"], {
- label: "Upgrading iii-sdk to latest",
+ runCommand(pnpmBin, ["up", "iii-sdk@0.11.2"], {
+ label: "Pinning iii-sdk@0.11.2",
optional: true,
});
} else if (npmBin) {
@@ -1984,8 +2018,8 @@ async function runUpgrade() {
label: "Refreshing dependencies (npm install)",
});
requireSuccess(installOk, "npm install");
- runCommand(npmBin, ["install", "iii-sdk@latest"], {
- label: "Upgrading iii-sdk to latest",
+ runCommand(npmBin, ["install", "iii-sdk@0.11.2"], {
+ label: "Pinning iii-sdk@0.11.2",
optional: true,
});
} else {
diff --git a/src/cli/connect/codex-hooks.ts b/src/cli/connect/codex-hooks.ts
new file mode 100644
index 00000000..14b8284a
--- /dev/null
+++ b/src/cli/connect/codex-hooks.ts
@@ -0,0 +1,107 @@
+import { existsSync, readFileSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+/**
+ * Workaround for openai/codex#16430 — Codex Desktop does not dispatch
+ * plugin-local `hooks.json` even though both `CodexHooks` and `PluginHooks`
+ * feature flags are stable + default-enabled in
+ * `codex-rs/features/src/lib.rs`. Until upstream fixes plugin-scope
+ * dispatch, the same hook commands can be mirrored into the global
+ * `~/.codex/hooks.json`, which is loaded reliably.
+ *
+ * This module builds that mirror, with `${CLAUDE_PLUGIN_ROOT}` resolved to
+ * the bundled `plugin/` directory so the user-scope file does not depend
+ * on env-var expansion (Codex only injects `CLAUDE_PLUGIN_ROOT` for
+ * plugin-scope hooks).
+ *
+ * Identification on re-install: every command we write contains the
+ * resolved `/scripts/` prefix, so subsequent installs can
+ * strip our entries and re-add cleanly without touching the user's other
+ * hook entries.
+ */
+
+type HookHandler = { type: string; command: string };
+type HookEntry = { matcher?: string; hooks: HookHandler[] };
+export type HookManifest = { hooks: Record };
+
+/**
+ * Locate the bundled `plugin/` directory at runtime. Walks up from the
+ * module's own location looking for `plugin/scripts/` + `plugin/hooks/`,
+ * both shipped via the npm `files` field. Works for both `dist/cli.mjs`
+ * (bundled) and `src/cli/connect/codex-hooks.ts` (dev) layouts.
+ */
+export function findPluginRoot(startUrl: string = import.meta.url): string {
+ const here = dirname(fileURLToPath(startUrl));
+ let dir = here;
+ for (let i = 0; i < 12; i++) {
+ if (
+ existsSync(join(dir, "plugin", "scripts")) &&
+ existsSync(join(dir, "plugin", "hooks"))
+ ) {
+ return resolve(join(dir, "plugin"));
+ }
+ const parent = dirname(dir);
+ if (parent === dir) break;
+ dir = parent;
+ }
+ throw new Error(
+ `agentmemory: could not locate bundled plugin/ directory (searched up from ${here})`,
+ );
+}
+
+/**
+ * Build the merged hooks.json content.
+ *
+ * 1. Strip any entry from `existing` whose first hook command points
+ * under `/scripts/`. This lets us re-install idempotently
+ * without leaving stale references.
+ * 2. Append fresh entries from the bundled Codex manifest with
+ * `${CLAUDE_PLUGIN_ROOT}` rewritten to the absolute plugin path.
+ * Matcher values from the bundled manifest are preserved so PreToolUse
+ * event routing keeps working.
+ */
+export function buildMergedHooks(
+ existing: HookManifest | null,
+ pluginRoot: string,
+): HookManifest {
+ const codexManifestPath = join(pluginRoot, "hooks", "hooks.codex.json");
+ const ours = JSON.parse(readFileSync(codexManifestPath, "utf-8")) as HookManifest;
+ const scriptsDir = join(pluginRoot, "scripts");
+
+ const out: HookManifest = { hooks: {} };
+
+ if (existing?.hooks) {
+ for (const [event, entries] of Object.entries(existing.hooks)) {
+ const kept = entries.filter((entry) => !isAgentmemoryEntry(entry, scriptsDir));
+ if (kept.length > 0) out.hooks[event] = kept;
+ }
+ }
+
+ for (const [event, entries] of Object.entries(ours.hooks)) {
+ const resolvedEntries: HookEntry[] = entries.map((entry) => {
+ const next: HookEntry = {
+ hooks: entry.hooks.map((handler) => ({
+ type: handler.type,
+ command: handler.command.replace(/\$\{CLAUDE_PLUGIN_ROOT\}/g, pluginRoot),
+ })),
+ };
+ if (entry.matcher !== undefined) next.matcher = entry.matcher;
+ return next;
+ });
+ out.hooks[event] = [...(out.hooks[event] ?? []), ...resolvedEntries];
+ }
+
+ return out;
+}
+
+function isAgentmemoryEntry(entry: HookEntry, scriptsDir: string): boolean {
+ const normalizedScriptsDir = normalizePathForCommandMatch(scriptsDir);
+ return entry.hooks.some((handler) =>
+ normalizePathForCommandMatch(handler.command).includes(normalizedScriptsDir),
+ );
+}
+
+function normalizePathForCommandMatch(value: string): string {
+ return value.replace(/\\/g, "/");
+}
diff --git a/src/cli/connect/codex.ts b/src/cli/connect/codex.ts
index 003dc99a..a87b2858 100644
--- a/src/cli/connect/codex.ts
+++ b/src/cli/connect/codex.ts
@@ -8,10 +8,18 @@ import {
logAlreadyWired,
logBackup,
logInstalled,
+ readJsonSafe,
+ writeJsonAtomic,
} from "./util.js";
+import {
+ buildMergedHooks,
+ findPluginRoot,
+ type HookManifest,
+} from "./codex-hooks.js";
const CODEX_DIR = join(homedir(), ".codex");
const CODEX_TOML = join(CODEX_DIR, "config.toml");
+const CODEX_HOOKS = join(CODEX_DIR, "hooks.json");
const TOML_BLOCK = `[mcp_servers.agentmemory]
command = "npx"
@@ -57,7 +65,7 @@ export const adapter: ConnectAdapter = {
displayName: "Codex CLI",
docs: "https://github.com/rohitg00/agentmemory#codex-cli-codex-plugin-platform",
protocolNote:
- "→ Using MCP. Hooks are also available — see docs/codex.md.",
+ "→ Using MCP. Hooks ship via the Codex plugin; on Codex Desktop, also pass --with-hooks to install the global hooks.json workaround for openai/codex#16430.",
detect(): boolean {
return existsSync(CODEX_DIR);
@@ -77,6 +85,7 @@ export const adapter: ConnectAdapter = {
p.log.info(
`[dry-run] Would ${wired ? "rewrite" : "append"} [mcp_servers.agentmemory] in ${CODEX_TOML}`,
);
+ if (opts.withHooks) installCodexHooks(opts);
return { kind: "installed", mutatedPath: CODEX_TOML };
}
@@ -105,6 +114,16 @@ export const adapter: ConnectAdapter = {
p.log.info(
"Codex picks up MCP servers on next launch. For the deeper plugin install, run: codex plugin marketplace add rohitg00/agentmemory && codex plugin install agentmemory",
);
+
+ if (opts.withHooks) {
+ const hookResult = installCodexHooks(opts);
+ if (hookResult.kind === "skipped") {
+ p.log.warn(
+ `Codex hooks fallback skipped: ${hookResult.reason}. MCP wiring still applied.`,
+ );
+ }
+ }
+
return {
kind: "installed",
mutatedPath: CODEX_TOML,
@@ -112,3 +131,50 @@ export const adapter: ConnectAdapter = {
};
},
};
+
+/**
+ * Install the global `~/.codex/hooks.json` fallback. See
+ * `codex-hooks.ts` for context (openai/codex#16430). Returns a result
+ * describing the side effect for the caller's summary; failures here do
+ * not roll back the MCP wiring.
+ */
+function installCodexHooks(opts: ConnectOptions): ConnectResult {
+ let pluginRoot: string;
+ try {
+ pluginRoot = findPluginRoot();
+ } catch (err) {
+ return {
+ kind: "skipped",
+ reason: err instanceof Error ? err.message : String(err),
+ };
+ }
+
+ const existing = readJsonSafe(CODEX_HOOKS);
+ const merged = buildMergedHooks(existing, pluginRoot);
+
+ if (opts.dryRun) {
+ p.log.info(
+ `[dry-run] Would ${existing ? "merge" : "create"} ${CODEX_HOOKS} with ${Object.keys(merged.hooks).length} event(s)`,
+ );
+ return { kind: "installed", mutatedPath: CODEX_HOOKS };
+ }
+
+ let backupPath: string | undefined;
+ if (existsSync(CODEX_HOOKS)) {
+ backupPath = backupFile(CODEX_HOOKS, "codex-hooks", "json");
+ logBackup(backupPath);
+ }
+
+ writeJsonAtomic(CODEX_HOOKS, merged);
+
+ logInstalled("Codex hooks (workaround for openai/codex#16430)", CODEX_HOOKS);
+ p.log.info(
+ "User-scope hooks reference absolute paths under the bundled plugin/ dir. Re-run `agentmemory connect codex --with-hooks` after upgrading agentmemory to refresh them.",
+ );
+
+ return {
+ kind: "installed",
+ mutatedPath: CODEX_HOOKS,
+ ...(backupPath !== undefined && { backupPath }),
+ };
+}
diff --git a/src/cli/connect/copilot-cli.ts b/src/cli/connect/copilot-cli.ts
new file mode 100644
index 00000000..8cce5a54
--- /dev/null
+++ b/src/cli/connect/copilot-cli.ts
@@ -0,0 +1,91 @@
+import { existsSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join } from "node:path";
+import * as p from "@clack/prompts";
+import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js";
+import {
+ AGENTMEMORY_COPILOT_MCP_BLOCK,
+ backupFile,
+ logAlreadyWired,
+ logBackup,
+ logInstalled,
+ readJsonSafe,
+ writeJsonAtomic,
+} from "./util.js";
+
+const COPILOT_DIR = process.env["COPILOT_HOME"] || join(homedir(), ".copilot");
+const COPILOT_MCP_JSON = join(COPILOT_DIR, "mcp-config.json");
+
+type CopilotMcpEntry = typeof AGENTMEMORY_COPILOT_MCP_BLOCK;
+type CopilotConfig = {
+ mcpServers?: Record;
+ [key: string]: unknown;
+};
+
+function entryMatches(entry: unknown): boolean {
+ if (!entry || typeof entry !== "object") return false;
+ return JSON.stringify(entry) === JSON.stringify(AGENTMEMORY_COPILOT_MCP_BLOCK);
+}
+
+export const adapter: ConnectAdapter = {
+ name: "copilot-cli",
+ displayName: "GitHub Copilot CLI",
+ docs: "https://github.com/rohitg00/agentmemory#github-copilot-cli",
+ protocolNote:
+ "→ Using MCP. Install the plugin too for full hooks/skills coverage.",
+
+ detect(): boolean {
+ return existsSync(COPILOT_DIR);
+ },
+
+ async install(opts: ConnectOptions): Promise {
+ const existing = readJsonSafe(COPILOT_MCP_JSON);
+ const next: CopilotConfig = existing ? { ...existing } : {};
+ const servers: Record = {
+ ...((next.mcpServers as Record) ?? {}),
+ };
+
+ const alreadyHas = entryMatches(servers["agentmemory"]);
+ if (alreadyHas && !opts.force) {
+ logAlreadyWired("GitHub Copilot CLI", COPILOT_MCP_JSON);
+ return { kind: "already-wired", mutatedPath: COPILOT_MCP_JSON };
+ }
+
+ if (opts.dryRun) {
+ p.log.info(
+ `[dry-run] Would ${alreadyHas ? "overwrite" : "add"} mcpServers.agentmemory in ${COPILOT_MCP_JSON}`,
+ );
+ return { kind: "installed", mutatedPath: COPILOT_MCP_JSON };
+ }
+
+ let backupPath: string | undefined;
+ if (existsSync(COPILOT_MCP_JSON)) {
+ backupPath = backupFile(COPILOT_MCP_JSON, "copilot-cli");
+ logBackup(backupPath);
+ } else {
+ mkdirSync(dirname(COPILOT_MCP_JSON), { recursive: true });
+ }
+
+ servers["agentmemory"] = AGENTMEMORY_COPILOT_MCP_BLOCK;
+ next.mcpServers = servers;
+ writeJsonAtomic(COPILOT_MCP_JSON, next);
+
+ const verify = readJsonSafe(COPILOT_MCP_JSON);
+ if (!entryMatches(verify?.mcpServers?.["agentmemory"])) {
+ p.log.error(
+ `Verification failed: ${COPILOT_MCP_JSON} did not contain mcpServers.agentmemory after write.`,
+ );
+ return { kind: "skipped", reason: "verification-failed" };
+ }
+
+ logInstalled("GitHub Copilot CLI", COPILOT_MCP_JSON);
+ p.log.info(
+ "Copilot picks up MCP servers on next launch or after `/mcp`. Install the plugin too for full hooks/skills.",
+ );
+ return {
+ kind: "installed",
+ mutatedPath: COPILOT_MCP_JSON,
+ ...(backupPath !== undefined && { backupPath }),
+ };
+ },
+};
diff --git a/src/cli/connect/index.ts b/src/cli/connect/index.ts
index 17aedf8f..48f86817 100644
--- a/src/cli/connect/index.ts
+++ b/src/cli/connect/index.ts
@@ -2,6 +2,7 @@ import { platform } from "node:os";
import * as p from "@clack/prompts";
import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js";
import { adapter as claudeCode } from "./claude-code.js";
+import { adapter as copilotCli } from "./copilot-cli.js";
import { adapter as codex } from "./codex.js";
import { adapter as cursor } from "./cursor.js";
import { adapter as geminiCli } from "./gemini-cli.js";
@@ -12,6 +13,7 @@ import { adapter as pi } from "./pi.js";
export const ADAPTERS: readonly ConnectAdapter[] = [
claudeCode,
+ copilotCli,
codex,
cursor,
geminiCli,
@@ -34,19 +36,22 @@ function parseFlags(args: string[]): {
dryRun: boolean;
force: boolean;
all: boolean;
+ withHooks: boolean;
positional: string[];
} {
const positional: string[] = [];
let dryRun = false;
let force = false;
let all = false;
+ let withHooks = false;
for (const a of args) {
if (a === "--dry-run") dryRun = true;
else if (a === "--force") force = true;
else if (a === "--all") all = true;
+ else if (a === "--with-hooks") withHooks = true;
else if (!a.startsWith("-")) positional.push(a);
}
- return { dryRun, force, all, positional };
+ return { dryRun, force, all, withHooks, positional };
}
export async function runAdapter(
@@ -74,7 +79,10 @@ export async function runAdapter(
}
export async function runConnect(args: string[]): Promise {
- if (platform() === "win32") {
+ const { dryRun, force, all, withHooks, positional } = parseFlags(args);
+ const allowWindowsAdapter =
+ positional.length === 1 && positional[0]?.toLowerCase() === "copilot-cli";
+ if (platform() === "win32" && !allowWindowsAdapter) {
p.intro("agentmemory connect");
p.log.warn(
"Windows: automated `connect` is not supported yet. See https://github.com/rohitg00/agentmemory#other-agents for manual install steps.",
@@ -83,8 +91,7 @@ export async function runConnect(args: string[]): Promise {
return;
}
- const { dryRun, force, all, positional } = parseFlags(args);
- const opts: ConnectOptions = { dryRun, force };
+ const opts: ConnectOptions = { dryRun, force, withHooks };
p.intro("agentmemory connect");
diff --git a/src/cli/connect/types.ts b/src/cli/connect/types.ts
index 4f64c867..8abd2745 100644
--- a/src/cli/connect/types.ts
+++ b/src/cli/connect/types.ts
@@ -1,6 +1,13 @@
export type ConnectOptions = {
dryRun: boolean;
force: boolean;
+ /**
+ * When true, the Codex adapter additionally writes a global
+ * `~/.codex/hooks.json` block referencing absolute paths to bundled hook
+ * scripts. Workaround for openai/codex#16430, which prevents plugin-local
+ * hooks from dispatching on Codex Desktop. No-op for other adapters.
+ */
+ withHooks?: boolean;
};
export type ConnectAdapter = {
diff --git a/src/cli/connect/util.ts b/src/cli/connect/util.ts
index 6d5f61ac..8902e3ef 100644
--- a/src/cli/connect/util.ts
+++ b/src/cli/connect/util.ts
@@ -26,6 +26,27 @@ export const AGENTMEMORY_MCP_BLOCK = {
},
};
+const COPILOT_MCP_COMMAND =
+ process.platform === "win32"
+ ? {
+ command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe",
+ args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"],
+ }
+ : {
+ command: "npx",
+ args: ["-y", "@agentmemory/mcp"],
+ };
+
+export const AGENTMEMORY_COPILOT_MCP_BLOCK = {
+ type: "local" as const,
+ ...COPILOT_MCP_COMMAND,
+ env: {
+ AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+ AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+ },
+ tools: ["*"],
+};
+
export function backupsDir(): string {
return join(homedir(), ".agentmemory", "backups");
}
diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts
index 92b23d62..2e148a1b 100644
--- a/src/cli/onboarding.ts
+++ b/src/cli/onboarding.ts
@@ -36,6 +36,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
// where they overlap; the rest fall back to the generic `◇`.
const NATIVE_AGENTS: { value: string; label: string; glyph: string }[] = [
{ value: "claude-code", label: "Claude Code", glyph: "⟁" },
+ { value: "copilot-cli", label: "GitHub Copilot CLI", glyph: "◈" },
{ value: "codex", label: "Codex", glyph: "◎" },
{ value: "openhuman", label: "OpenHuman", glyph: "◇" },
{ value: "openclaw", label: "OpenClaw", glyph: "◇" },
@@ -67,7 +68,7 @@ const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [
{ value: "skip", label: "Skip — BM25-only mode (no LLM key)", envKey: null },
];
-function buildAgentOptions(): { value: string; label: string; hint?: string }[] {
+export function buildAgentOptions(): { value: string; label: string; hint?: string }[] {
return [
...NATIVE_AGENTS.map((a) => ({
value: a.value,
@@ -82,6 +83,15 @@ function buildAgentOptions(): { value: string; label: string; hint?: string }[]
];
}
+export function getInitialAgentValues(
+ env: Record = process.env,
+): string[] {
+ if (env["COPILOT_CLI"] === "1" || env["COPILOT_AGENT_SESSION_ID"]) {
+ return ["copilot-cli"];
+ }
+ return ["claude-code"];
+}
+
// Mirror src/cli.ts findEnvExample so onboarding ships the same .env
// skeleton whether called directly or via `agentmemory init`. We
// duplicate (rather than import) so the onboarding module doesn't
@@ -137,7 +147,31 @@ export interface OnboardingResult {
provider: string | null;
}
+function shouldSkipInteractiveOnboarding(): boolean {
+ const ci = process.env["CI"];
+ return (
+ process.stdin.isTTY !== true ||
+ process.stdout.isTTY !== true ||
+ (ci !== undefined && ci !== "" && ci !== "0" && ci.toLowerCase() !== "false")
+ );
+}
+
+function writeDefaultOnboardingPrefs(): OnboardingResult {
+ writePrefs({
+ lastAgent: null,
+ lastAgents: [],
+ lastProvider: null,
+ skipSplash: true,
+ firstRunAt: new Date().toISOString(),
+ });
+ return { agents: [], provider: null };
+}
+
export async function runOnboarding(): Promise {
+ if (shouldSkipInteractiveOnboarding()) {
+ return writeDefaultOnboardingPrefs();
+ }
+
p.note(
[
"Welcome to agentmemory.",
@@ -153,7 +187,7 @@ export async function runOnboarding(): Promise {
message: "Which agents will use agentmemory? (space to toggle, enter to confirm)",
options: buildAgentOptions(),
required: false,
- initialValues: ["claude-code"],
+ initialValues: getInitialAgentValues(),
});
if (p.isCancel(agentsPicked)) {
p.cancel("Setup cancelled. Re-run any time with: agentmemory --reset");
@@ -166,7 +200,7 @@ export async function runOnboarding(): Promise {
[
"━ how this works ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
"All selected agents share the same memory at :3111.",
- "A memory saved by Claude Code is visible to Codex + Cursor instantly.",
+ "A memory saved by Claude Code is visible to Copilot + Codex + Cursor instantly.",
"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
].join("\n"),
);
diff --git a/src/config.ts b/src/config.ts
index 4a416ed1..eed5725e 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -159,6 +159,10 @@ export function getEnvVar(key: string): string | undefined {
return getMergedEnv()[key];
}
+export function isDropStaleIndexEnabled(): boolean {
+ return getMergedEnv()["AGENTMEMORY_DROP_STALE_INDEX"] === "true";
+}
+
export function detectLlmProviderKind(): "llm" | "noop" {
const env = getMergedEnv();
if (
diff --git a/src/functions/diagnostics.ts b/src/functions/diagnostics.ts
index 42f822cb..a63d7959 100644
--- a/src/functions/diagnostics.ts
+++ b/src/functions/diagnostics.ts
@@ -7,8 +7,14 @@ import type {
Action,
ActionEdge,
DiagnosticCheck,
+ Insight,
Lease,
+ Lesson,
Checkpoint,
+ Crystal,
+ ProceduralMemory,
+ SemanticMemory,
+ SessionSummary,
Signal,
Sentinel,
Sketch,
@@ -25,6 +31,12 @@ const ALL_CATEGORIES = [
"signals",
"sessions",
"memories",
+ "lessons",
+ "summaries",
+ "semantic",
+ "procedural",
+ "crystals",
+ "insights",
"mesh",
];
@@ -354,6 +366,186 @@ export function registerDiagnosticsFunction(sdk: ISdk, kv: StateKV): void {
}
}
+ if (categories.includes("lessons")) {
+ // Counts only live lessons (deleted=true rows are tombstoned).
+ // Catches bad confidence values that would silently break recall
+ // scoring (memory_lesson_recall multiplies by confidence).
+ const lessons = await kv.list(KV.lessons);
+ const live = lessons.filter((l) => !l.deleted);
+ let lessonIssues = 0;
+ for (const l of live) {
+ // Number.isFinite rejects NaN / Infinity / non-numbers; a
+ // corrupted row passing those would silently survive the < / >
+ // range check (e.g. NaN < 0 is false, NaN > 1 is false, so the
+ // bad row would be "healthy") and skew memory_lesson_recall's
+ // scoring downstream. Surface as warning.
+ if (
+ !Number.isFinite(l.confidence) ||
+ l.confidence < 0 ||
+ l.confidence > 1
+ ) {
+ checks.push({
+ name: `lesson-bad-confidence:${l.id}`,
+ category: "lessons",
+ status: "warn",
+ message: `Lesson ${l.id} has confidence ${l.confidence} (expected finite number in 0..1)`,
+ fixable: false,
+ });
+ lessonIssues++;
+ }
+ }
+ if (lessonIssues === 0) {
+ checks.push({
+ name: "lessons-ok",
+ category: "lessons",
+ status: "pass",
+ message: `All ${live.length} lessons are healthy (${lessons.length - live.length} tombstoned)`,
+ fixable: false,
+ });
+ }
+ }
+
+ if (categories.includes("summaries")) {
+ const summaries = await kv.list(KV.summaries);
+ let summaryIssues = 0;
+ for (const s of summaries) {
+ // typeof guard before .trim() — a corrupted row with title=null
+ // or title=42 would otherwise throw and abort the whole diagnose
+ // run before later categories get checked.
+ if (typeof s.title !== "string" || s.title.trim().length === 0) {
+ checks.push({
+ name: `summary-missing-title:${s.sessionId}`,
+ category: "summaries",
+ status: "warn",
+ message: `Summary for session ${s.sessionId} has no title`,
+ fixable: false,
+ });
+ summaryIssues++;
+ }
+ }
+ if (summaryIssues === 0) {
+ checks.push({
+ name: "summaries-ok",
+ category: "summaries",
+ status: "pass",
+ message: `All ${summaries.length} session summaries are consistent`,
+ fixable: false,
+ });
+ }
+ }
+
+ if (categories.includes("semantic")) {
+ const semantic = await kv.list(KV.semantic);
+ let semanticIssues = 0;
+ for (const s of semantic) {
+ if (
+ !Number.isFinite(s.confidence) ||
+ s.confidence < 0 ||
+ s.confidence > 1
+ ) {
+ checks.push({
+ name: `semantic-bad-confidence:${s.id}`,
+ category: "semantic",
+ status: "warn",
+ message: `Semantic fact ${s.id} has confidence ${s.confidence} (expected finite number in 0..1)`,
+ fixable: false,
+ });
+ semanticIssues++;
+ }
+ }
+ if (semanticIssues === 0) {
+ checks.push({
+ name: "semantic-ok",
+ category: "semantic",
+ status: "pass",
+ message: `All ${semantic.length} semantic memories are consistent`,
+ fixable: false,
+ });
+ }
+ }
+
+ if (categories.includes("procedural")) {
+ const procedural = await kv.list(KV.procedural);
+ let proceduralIssues = 0;
+ for (const p of procedural) {
+ if (!Array.isArray(p.steps) || p.steps.length === 0) {
+ checks.push({
+ name: `procedural-empty-steps:${p.id}`,
+ category: "procedural",
+ status: "warn",
+ message: `Procedural memory "${p.name}" (${p.id}) has no steps`,
+ fixable: false,
+ });
+ proceduralIssues++;
+ }
+ }
+ if (proceduralIssues === 0) {
+ checks.push({
+ name: "procedural-ok",
+ category: "procedural",
+ status: "pass",
+ message: `All ${procedural.length} procedural memories are consistent`,
+ fixable: false,
+ });
+ }
+ }
+
+ if (categories.includes("crystals")) {
+ const crystals = await kv.list(KV.crystals);
+ let crystalIssues = 0;
+ for (const c of crystals) {
+ if (typeof c.narrative !== "string" || c.narrative.trim().length === 0) {
+ checks.push({
+ name: `crystal-empty-narrative:${c.id}`,
+ category: "crystals",
+ status: "warn",
+ message: `Crystal ${c.id} has empty narrative`,
+ fixable: false,
+ });
+ crystalIssues++;
+ }
+ }
+ if (crystalIssues === 0) {
+ checks.push({
+ name: "crystals-ok",
+ category: "crystals",
+ status: "pass",
+ message: `All ${crystals.length} crystals are consistent`,
+ fixable: false,
+ });
+ }
+ }
+
+ if (categories.includes("insights")) {
+ const insights = await kv.list(KV.insights);
+ let insightIssues = 0;
+ for (const i of insights) {
+ if (
+ !Number.isFinite(i.confidence) ||
+ i.confidence < 0 ||
+ i.confidence > 1
+ ) {
+ checks.push({
+ name: `insight-bad-confidence:${i.id}`,
+ category: "insights",
+ status: "warn",
+ message: `Insight ${i.id} has confidence ${i.confidence} (expected finite number in 0..1)`,
+ fixable: false,
+ });
+ insightIssues++;
+ }
+ }
+ if (insightIssues === 0) {
+ checks.push({
+ name: "insights-ok",
+ category: "insights",
+ status: "pass",
+ message: `All ${insights.length} insights are consistent`,
+ fixable: false,
+ });
+ }
+ }
+
if (categories.includes("mesh")) {
const peers = await kv.list(KV.mesh);
let meshIssues = 0;
diff --git a/src/functions/export-import.ts b/src/functions/export-import.ts
index 674b14da..4c997630 100644
--- a/src/functions/export-import.ts
+++ b/src/functions/export-import.ts
@@ -176,7 +176,7 @@ export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void {
const strategy = data.strategy || "merge";
const importData = data.exportData;
- const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20"]);
+ const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20", "0.9.21"]);
if (!supportedVersions.has(importData.version)) {
return {
success: false,
diff --git a/src/functions/search.ts b/src/functions/search.ts
index 74af9ff1..b4444b48 100644
--- a/src/functions/search.ts
+++ b/src/functions/search.ts
@@ -86,6 +86,99 @@ export async function vectorIndexAddGuarded(
}
}
+// Batched variant: calls EmbeddingProvider.embedBatch ONCE for the whole
+// batch, then writes each resulting vector. Use this for bulk paths
+// (rebuildIndex, future bulk-add APIs) where per-item serial awaits
+// dominate wallclock. A batch of N has roughly the latency of a single
+// embed (network + GPU setup amortized), so backfilling a 500k-obs
+// corpus drops from days to hours on a per-batch endpoint like vLLM.
+//
+// Per-item failure shape:
+// - whole-batch network/provider error → all skipped, single warn line
+// - per-item dimension mismatch → that item skipped, others continue
+export async function vectorIndexAddBatchGuarded(
+ items: Array<{
+ id: string
+ sessionId: string
+ text: string
+ context: { kind: "memory" | "observation" | "synthetic"; logId: string }
+ }>,
+): Promise<{ ok: number; fail: number }> {
+ const vi = vectorIndex
+ const ep = currentEmbeddingProvider
+ if (!vi || !ep || items.length === 0) return { ok: 0, fail: 0 }
+
+ let embeddings: Float32Array[]
+ try {
+ embeddings = await ep.embedBatch(items.map((i) => clipEmbedInput(i.text)))
+ } catch (err) {
+ logger.warn("vector-index add batch: embed failed — skipping batch", {
+ batchSize: items.length,
+ provider: ep.name,
+ error: err instanceof Error ? err.message : String(err),
+ })
+ return { ok: 0, fail: items.length }
+ }
+
+ if (embeddings.length !== items.length) {
+ logger.warn(
+ "vector-index add batch: provider returned wrong length — skipping batch",
+ {
+ batchSize: items.length,
+ returned: embeddings.length,
+ provider: ep.name,
+ },
+ )
+ return { ok: 0, fail: items.length }
+ }
+
+ let ok = 0
+ let fail = 0
+ for (let i = 0; i < items.length; i++) {
+ const item = items[i]
+ const embedding = embeddings[i]
+ if (embedding.length !== ep.dimensions) {
+ logger.warn("vector-index add batch: dimension mismatch — skipping item", {
+ kind: item.context.kind,
+ id: item.context.logId,
+ provider: ep.name,
+ expected: ep.dimensions,
+ received: embedding.length,
+ })
+ fail++
+ continue
+ }
+ try {
+ vi.add(item.id, item.sessionId, embedding)
+ ok++
+ } catch (err) {
+ logger.warn("vector-index add batch: index write failed — skipping item", {
+ kind: item.context.kind,
+ id: item.context.logId,
+ error: err instanceof Error ? err.message : String(err),
+ })
+ fail++
+ }
+ }
+ return { ok, fail }
+}
+
+// Embed-batch size for rebuild. Each item is one /v1/embeddings call's
+// `input` array element; the provider sees the whole batch as one HTTP
+// round-trip. 32 fits comfortably under typical per-request token budgets
+// (32 × ~110 tok/item ≈ 3.5k tokens) and gets close to per-call
+// throughput for GPU-backed endpoints (vLLM, Triton, etc.). Override via
+// REBUILD_EMBED_BATCH_SIZE for endpoints that prefer smaller/larger
+// batches. Set to 1 to fall back to the legacy per-item path.
+const DEFAULT_REBUILD_EMBED_BATCH = 32
+
+function getRebuildEmbedBatchSize(): number {
+ const raw = process.env.REBUILD_EMBED_BATCH_SIZE
+ if (!raw) return DEFAULT_REBUILD_EMBED_BATCH
+ const n = parseInt(raw, 10)
+ return Number.isFinite(n) && n > 0 ? n : DEFAULT_REBUILD_EMBED_BATCH
+}
+
export async function rebuildIndex(kv: StateKV): Promise {
const idx = getSearchIndex()
idx.clear()
@@ -96,8 +189,28 @@ export async function rebuildIndex(kv: StateKV): Promise {
// repopulation loops run, so BM25 and vector stay in sync.
vectorIndex?.clear()
+ const batchSize = getRebuildEmbedBatchSize()
+ // Accumulator for the batched embed flush. BM25 add is synchronous and
+ // doesn't need batching — only the vector path benefits.
+ type EmbedJob = {
+ id: string
+ sessionId: string
+ text: string
+ context: { kind: "memory" | "observation" | "synthetic"; logId: string }
+ }
+ const pending: EmbedJob[] = []
let count = 0
+ const flush = async (): Promise => {
+ if (pending.length === 0) return
+ await vectorIndexAddBatchGuarded(pending)
+ pending.length = 0
+ }
+ const enqueue = async (job: EmbedJob): Promise => {
+ pending.push(job)
+ if (pending.length >= batchSize) await flush()
+ }
+
// Memories live in their own KV scope outside per-session observation
// scopes, so they need a separate walk. Without this, mem::remember
// entries vanish from BM25 on every restart even after the live-write
@@ -108,12 +221,12 @@ export async function rebuildIndex(kv: StateKV): Promise {
if (memory.isLatest === false) continue
if (!memory.title || !memory.content) continue
idx.add(memoryToObservation(memory))
- await vectorIndexAddGuarded(
- memory.id,
- memory.sessionIds[0] ?? 'memory',
- memory.title + ' ' + memory.content,
- { kind: "memory", logId: memory.id },
- )
+ await enqueue({
+ id: memory.id,
+ sessionId: memory.sessionIds[0] ?? 'memory',
+ text: memory.title + ' ' + memory.content,
+ context: { kind: "memory", logId: memory.id },
+ })
count++
}
} catch (err) {
@@ -123,7 +236,10 @@ export async function rebuildIndex(kv: StateKV): Promise {
}
const sessions = await kv.list(KV.sessions)
- if (!sessions.length) return count
+ if (!sessions.length) {
+ await flush()
+ return count
+ }
const obsPerSession: CompressedObservation[][] = []
const failedSessions: string[] = []
@@ -148,16 +264,19 @@ export async function rebuildIndex(kv: StateKV): Promise {
for (const obs of observations) {
if (obs.title && obs.narrative) {
idx.add(obs)
- await vectorIndexAddGuarded(
- obs.id,
- obs.sessionId,
- obs.title + ' ' + obs.narrative,
- { kind: "observation", logId: obs.id },
- )
+ await enqueue({
+ id: obs.id,
+ sessionId: obs.sessionId,
+ text: obs.title + ' ' + obs.narrative,
+ context: { kind: "observation", logId: obs.id },
+ })
count++
}
}
}
+
+ // Drain the last partial batch.
+ await flush()
return count
}
diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts
index fdeed273..c80b1f87 100644
--- a/src/functions/smart-search.ts
+++ b/src/functions/smart-search.ts
@@ -1,24 +1,32 @@
import type { ISdk } from "iii-sdk";
import type {
+ CompactLessonResult,
CompactSearchResult,
CompressedObservation,
HybridSearchResult,
+ Lesson,
} from "../types.js";
import { KV } from "../state/schema.js";
import { StateKV } from "../state/kv.js";
import { recordAccessBatch } from "./access-tracker.js";
import { logger } from "../logger.js";
+// Compact mode trims each lesson's content for at-a-glance display. The
+// full content is fetched via memory_lesson_recall when the caller needs it.
+const LESSON_CONTENT_PREVIEW_CHARS = 240;
+
export function registerSmartSearchFunction(
sdk: ISdk,
kv: StateKV,
searchFn: (query: string, limit: number) => Promise,
): void {
- sdk.registerFunction("mem::smart-search",
+ sdk.registerFunction("mem::smart-search",
async (data: {
query?: string;
expandIds?: Array;
limit?: number;
+ project?: string;
+ includeLessons?: boolean;
}) => {
if (data.expandIds && data.expandIds.length > 0) {
@@ -68,7 +76,21 @@ export function registerSmartSearchFunction(
}
const limit = Math.max(1, Math.min(data.limit ?? 20, 100));
- const hybridResults = await searchFn(data.query, limit);
+ // Cap lesson results at a smaller number than observations: lessons
+ // are denser (curated insights) so 10 is usually plenty for a recall.
+ const lessonLimit = Math.min(limit, 10);
+ const includeLessons = data.includeLessons !== false;
+
+ // Run observation hybrid-search and lesson recall in parallel so the
+ // extra lesson lookup adds no wallclock when the underlying calls
+ // can overlap. Lesson recall is best-effort: if mem::lesson-recall
+ // fails or returns unexpected shape, log + fall back to empty.
+ const [hybridResults, lessons] = await Promise.all([
+ searchFn(data.query, limit),
+ includeLessons
+ ? recallLessons(sdk, data.query, lessonLimit, data.project)
+ : Promise.resolve([]),
+ ]);
const compact: CompactSearchResult[] = hybridResults.map((r) => ({
obsId: r.observation.id,
@@ -87,12 +109,51 @@ export function registerSmartSearchFunction(
logger.info("Smart search compact", {
query: data.query,
results: compact.length,
+ lessons: lessons.length,
});
- return { mode: "compact", results: compact };
+ const response: {
+ mode: "compact";
+ results: CompactSearchResult[];
+ lessons?: CompactLessonResult[];
+ } = { mode: "compact", results: compact };
+ if (includeLessons) response.lessons = lessons;
+ return response;
},
);
}
+async function recallLessons(
+ sdk: ISdk,
+ query: string,
+ limit: number,
+ project?: string,
+): Promise {
+ try {
+ const result = (await sdk.trigger({
+ function_id: "mem::lesson-recall",
+ payload: { query, limit, project },
+ })) as { success?: boolean; lessons?: Array };
+ if (!result?.success || !Array.isArray(result.lessons)) return [];
+ return result.lessons.map((l) => ({
+ lessonId: l.id,
+ content:
+ l.content.length > LESSON_CONTENT_PREVIEW_CHARS
+ ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…"
+ : l.content,
+ confidence: l.confidence,
+ score: l.score ?? l.confidence,
+ createdAt: l.createdAt,
+ project: l.project,
+ tags: l.tags ?? [],
+ }));
+ } catch (err) {
+ logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", {
+ error: err instanceof Error ? err.message : String(err),
+ });
+ return [];
+ }
+}
+
async function findObservation(
kv: StateKV,
obsId: string,
diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts
index 140e0e12..80b29a09 100644
--- a/src/functions/summarize.ts
+++ b/src/functions/summarize.ts
@@ -7,7 +7,12 @@ import type {
} from "../types.js";
import { KV } from "../state/schema.js";
import { StateKV } from "../state/kv.js";
-import { SUMMARY_SYSTEM, buildSummaryPrompt } from "../prompts/summary.js";
+import {
+ SUMMARY_SYSTEM,
+ buildSummaryPrompt,
+ REDUCE_SYSTEM,
+ buildReducePrompt,
+} from "../prompts/summary.js";
import { getXmlTag, getXmlChildren } from "../prompts/xml.js";
import { SummaryOutputSchema } from "../eval/schemas.js";
import { validateOutput } from "../eval/validator.js";
@@ -16,6 +21,169 @@ import type { MetricsStore } from "../eval/metrics-store.js";
import { safeAudit } from "./audit.js";
import { logger } from "../logger.js";
+// Per-chunk observation budget when a session is too large to fit in one
+// LLM call. Default ≈ 50k input tokens per chunk at ~110 tok/obs — fits
+// comfortably in 128k-window models. Override via SUMMARIZE_CHUNK_SIZE.
+const CHUNK_SIZE_DEFAULT = 400;
+// Concurrent in-flight chunk calls. 6 keeps a 100-chunk session under
+// iii's 180s function-invocation timeout at ~8s/call while staying
+// inside generous-but-not-unlimited provider rate limits (well below
+// OpenAI free tier's 500 RPM). High-throughput providers
+// (Novita / DeepInfra / DeepSeek) typically allow 100+ concurrent — set
+// SUMMARIZE_CHUNK_CONCURRENCY higher to cover ~1000+ chunk sessions.
+const CHUNK_CONCURRENCY_DEFAULT = 6;
+// Bail on the merged summary if more than this fraction of chunks fail
+// to parse — a half-blind narrative is worse than a clean error.
+const MAX_SKIP_RATIO = 0.5;
+
+function getChunkSize(): number {
+ const raw = process.env.SUMMARIZE_CHUNK_SIZE;
+ if (!raw) return CHUNK_SIZE_DEFAULT;
+ const n = parseInt(raw, 10);
+ return Number.isFinite(n) && n > 0 ? n : CHUNK_SIZE_DEFAULT;
+}
+
+function getChunkConcurrency(): number {
+ const raw = process.env.SUMMARIZE_CHUNK_CONCURRENCY;
+ if (!raw) return CHUNK_CONCURRENCY_DEFAULT;
+ const n = parseInt(raw, 10);
+ return Number.isFinite(n) && n > 0 ? n : CHUNK_CONCURRENCY_DEFAULT;
+}
+
+// One chunk call with retry-once. Returns null when both attempts fail —
+// whether by parse failure, provider 4xx (content rejected by upstream
+// filters), or transient network/5xx errors that didn't recover on retry.
+// All failure modes are equivalent at this layer: the chunk is unusable,
+// skip it and let the caller decide via the skip-ratio bailout whether
+// the overall summary is still trustworthy. Errors that affect every
+// chunk (auth, model down) will trip the bailout naturally.
+async function summarizeChunkWithRetry(
+ provider: MemoryProvider,
+ chunk: CompressedObservation[],
+ sessionId: string,
+ project: string,
+ idx: number,
+ total: number,
+): Promise {
+ for (let attempt = 1; attempt <= 2; attempt++) {
+ try {
+ const xml = await provider.summarize(
+ SUMMARY_SYSTEM,
+ buildSummaryPrompt(chunk),
+ );
+ const parsed = parseSummaryXml(xml, sessionId, project, chunk.length);
+ if (parsed) return parsed;
+ logger.warn("Summarize chunk parse failed", {
+ sessionId,
+ chunk: `${idx + 1}/${total}`,
+ attempt,
+ });
+ } catch (err) {
+ logger.warn("Summarize chunk LLM call failed", {
+ sessionId,
+ chunk: `${idx + 1}/${total}`,
+ attempt,
+ error: err instanceof Error ? err.message : String(err),
+ });
+ }
+ }
+ return null;
+}
+
+// Returns the final summary XML string. For sessions ≤ chunk size, this is
+// a single LLM call (legacy behavior). For larger sessions, observations
+// are split into chunks processed in parallel batches, each chunk retried
+// once on parse failure, persistently-bad chunks skipped, and remaining
+// partials merged via a reduce call.
+async function produceSummaryXml(
+ provider: MemoryProvider,
+ compressed: CompressedObservation[],
+ sessionId: string,
+ project: string,
+): Promise<{
+ response: string;
+ mode: "single" | "chunked";
+ chunks: number;
+ skipped?: number;
+}> {
+ const chunkSize = getChunkSize();
+ if (compressed.length <= chunkSize) {
+ const response = await provider.summarize(
+ SUMMARY_SYSTEM,
+ buildSummaryPrompt(compressed),
+ );
+ return { response, mode: "single", chunks: 1 };
+ }
+
+ const chunks: CompressedObservation[][] = [];
+ for (let i = 0; i < compressed.length; i += chunkSize) {
+ chunks.push(compressed.slice(i, i + chunkSize));
+ }
+ const concurrency = getChunkConcurrency();
+ logger.info("Summarize chunking session", {
+ sessionId,
+ chunks: chunks.length,
+ chunkSize,
+ concurrency,
+ totalObservations: compressed.length,
+ });
+
+ // Sparse array preserves chunk → index mapping after parallel resolution,
+ // so the reduce step sees partials in chronological order even when some
+ // were skipped.
+ const partialByIdx: Array = new Array(chunks.length).fill(null);
+ for (let batchStart = 0; batchStart < chunks.length; batchStart += concurrency) {
+ const batch = chunks.slice(batchStart, batchStart + concurrency);
+ await Promise.all(
+ batch.map(async (chunk, j) => {
+ const idx = batchStart + j;
+ partialByIdx[idx] = await summarizeChunkWithRetry(
+ provider,
+ chunk,
+ sessionId,
+ project,
+ idx,
+ chunks.length,
+ );
+ }),
+ );
+ }
+
+ const skipped = partialByIdx.filter((p) => p === null).length;
+ const partials = partialByIdx.filter((p): p is SessionSummary => p !== null);
+
+ if (skipped > Math.floor(chunks.length * MAX_SKIP_RATIO)) {
+ throw new Error(
+ `too_many_chunks_skipped: ${skipped}/${chunks.length} chunks failed to parse after retry`,
+ );
+ }
+ if (skipped > 0) {
+ logger.warn("Summarize chunks partially skipped", {
+ sessionId,
+ skipped,
+ total: chunks.length,
+ });
+ }
+
+ const reduceInput = partials.map((p) => {
+ const originalIdx = partialByIdx.indexOf(p);
+ return {
+ title: p.title,
+ narrative: p.narrative,
+ keyDecisions: p.keyDecisions,
+ filesModified: p.filesModified,
+ concepts: p.concepts,
+ obsRangeStart: originalIdx * chunkSize + 1,
+ obsRangeEnd: Math.min((originalIdx + 1) * chunkSize, compressed.length),
+ };
+ });
+ const response = await provider.summarize(
+ REDUCE_SYSTEM,
+ buildReducePrompt(reduceInput),
+ );
+ return { response, mode: "chunked", chunks: chunks.length, skipped };
+}
+
function parseSummaryXml(
xml: string,
sessionId: string,
@@ -85,8 +253,12 @@ export function registerSummarizeFunction(
}
try {
- const prompt = buildSummaryPrompt(compressed);
- const response = await provider.summarize(SUMMARY_SYSTEM, prompt);
+ const { response, mode, chunks } = await produceSummaryXml(
+ provider,
+ compressed,
+ sessionId,
+ session.project,
+ );
if (!response || !response.trim()) {
const latencyMs = Date.now() - startMs;
if (metricsStore) {
@@ -95,8 +267,8 @@ export function registerSummarizeFunction(
logger.warn("Empty provider response on summarize", {
sessionId,
provider: provider.name,
- promptBytes: prompt.length,
- systemBytes: SUMMARY_SYSTEM.length,
+ mode,
+ chunks,
observationCount: compressed.length,
});
return { success: false, error: "empty_provider_response" };
diff --git a/src/hooks/notification.ts b/src/hooks/notification.ts
index 6c4b7b81..51347d50 100644
--- a/src/hooks/notification.ts
+++ b/src/hooks/notification.ts
@@ -29,9 +29,14 @@ async function main() {
}
if (isSdkChildContext(data)) return;
- if (data.notification_type !== "permission_prompt") return;
+ const notificationType = data.notification_type ?? data.notificationType;
+ if (notificationType !== "permission_prompt") return;
- const sessionId = (data.session_id as string) || "unknown";
+ const rawSessionId = data.session_id ?? data.sessionId;
+ const sessionId =
+ typeof rawSessionId === "string" && rawSessionId.length > 0
+ ? rawSessionId
+ : "unknown";
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -44,7 +49,7 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
data: {
- notification_type: data.notification_type,
+ notification_type: notificationType,
title: data.title,
message: data.message,
},
diff --git a/src/hooks/post-tool-failure.ts b/src/hooks/post-tool-failure.ts
index 337aebdd..7fa71d05 100644
--- a/src/hooks/post-tool-failure.ts
+++ b/src/hooks/post-tool-failure.ts
@@ -29,9 +29,12 @@ async function main() {
}
if (isSdkChildContext(data)) return;
- if (data.is_interrupt) return;
+ if (data.is_interrupt || data.isInterrupt) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+ const toolName = data.tool_name ?? data.toolName;
+ const toolInput = data.tool_input ?? data.toolArgs;
+ const error = data.error ?? data.errorMessage;
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -44,15 +47,15 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
data: {
- tool_name: data.tool_name,
+ tool_name: toolName,
tool_input:
- typeof data.tool_input === "string"
- ? data.tool_input.slice(0, 4000)
- : JSON.stringify(data.tool_input ?? "").slice(0, 4000),
+ typeof toolInput === "string"
+ ? toolInput.slice(0, 4000)
+ : JSON.stringify(toolInput ?? "").slice(0, 4000),
error:
- typeof data.error === "string"
- ? data.error.slice(0, 4000)
- : JSON.stringify(data.error ?? "").slice(0, 4000),
+ typeof error === "string"
+ ? error.slice(0, 4000)
+ : JSON.stringify(error ?? "").slice(0, 4000),
},
}),
signal: AbortSignal.timeout(3000),
diff --git a/src/hooks/post-tool-use.ts b/src/hooks/post-tool-use.ts
index 65afc8b1..c8319c48 100644
--- a/src/hooks/post-tool-use.ts
+++ b/src/hooks/post-tool-use.ts
@@ -30,9 +30,11 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+ const toolName = data.tool_name ?? data.toolName;
+ const toolInput = data.tool_input ?? data.toolArgs;
- const { imageData, cleanOutput } = extractImageData(data.tool_output);
+ const { imageData, cleanOutput } = extractImageData(toolOutput(data));
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -45,8 +47,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
data: {
- tool_name: data.tool_name,
- tool_input: data.tool_input,
+ tool_name: toolName,
+ tool_input: toolInput,
tool_output: truncate(cleanOutput, 8000),
...(imageData ? { image_data: imageData } : {}),
},
@@ -57,6 +59,17 @@ async function main() {
}
}
+function toolOutput(data: Record): unknown {
+ if (data.tool_response !== undefined) return data.tool_response;
+ if (data.tool_output !== undefined) return data.tool_output;
+ const result = data.tool_result ?? data.toolResult;
+ if (typeof result === "object" && result !== null) {
+ const obj = result as Record;
+ return obj.text_result_for_llm ?? obj.textResultForLlm ?? result;
+ }
+ return result;
+}
+
function isBase64Image(val: unknown): val is string {
return typeof val === "string" && (
val.startsWith("data:image/") ||
diff --git a/src/hooks/pre-compact.ts b/src/hooks/pre-compact.ts
index ea13ebec..77fb7a57 100644
--- a/src/hooks/pre-compact.ts
+++ b/src/hooks/pre-compact.ts
@@ -30,7 +30,7 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
const project = (data.cwd as string) || process.cwd();
if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") {
diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts
index 61f6c443..eea440c8 100644
--- a/src/hooks/pre-tool-use.ts
+++ b/src/hooks/pre-tool-use.ts
@@ -50,16 +50,28 @@ async function main() {
if (isSdkChildContext(data)) return;
- const toolName = data.tool_name as string;
+ const toolName =
+ typeof data.tool_name === "string"
+ ? data.tool_name
+ : typeof data.toolName === "string"
+ ? data.toolName
+ : undefined;
if (!toolName) return;
- const fileTools = ["Edit", "Write", "Read", "Glob", "Grep"];
- if (!fileTools.includes(toolName)) return;
-
- const toolInput = (data.tool_input || {}) as Record;
+ const normalizedToolName = toolName.toLowerCase();
+ const fileTools = ["edit", "write", "create", "read", "view", "glob", "grep"];
+ if (!fileTools.includes(normalizedToolName)) return;
+
+ const rawToolInput = data.tool_input ?? data.toolArgs;
+ const toolInput =
+ typeof rawToolInput === "object" &&
+ rawToolInput !== null &&
+ !Array.isArray(rawToolInput)
+ ? (rawToolInput as Record)
+ : {};
const files: string[] = [];
const fileKeys =
- toolName === "Grep"
+ normalizedToolName === "grep"
? ["path", "file"]
: ["file_path", "path", "file", "pattern"];
for (const key of fileKeys) {
@@ -69,14 +81,18 @@ async function main() {
if (files.length === 0) return;
const terms: string[] = [];
- if (toolName === "Grep" || toolName === "Glob") {
+ if (normalizedToolName === "grep" || normalizedToolName === "glob") {
const pattern = toolInput["pattern"];
if (typeof pattern === "string" && pattern.length > 0) {
terms.push(pattern);
}
}
- const sessionId = (data.session_id as string) || "unknown";
+ const rawSessionId = data.session_id || data.sessionId;
+ const sessionId =
+ typeof rawSessionId === "string" && rawSessionId.length > 0
+ ? rawSessionId
+ : "unknown";
try {
const res = await fetch(`${REST_URL}/agentmemory/enrich`, {
diff --git a/src/hooks/prompt-submit.ts b/src/hooks/prompt-submit.ts
index 971b11be..10265a77 100644
--- a/src/hooks/prompt-submit.ts
+++ b/src/hooks/prompt-submit.ts
@@ -30,7 +30,7 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/observe`, {
@@ -42,7 +42,7 @@ async function main() {
project: data.cwd || process.cwd(),
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
- data: { prompt: data.prompt },
+ data: { prompt: data.prompt ?? data.userPrompt },
}),
signal: AbortSignal.timeout(3000),
});
diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts
index 31bef22e..7efa550e 100644
--- a/src/hooks/session-end.ts
+++ b/src/hooks/session-end.ts
@@ -30,7 +30,7 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/session/end`, {
@@ -76,4 +76,4 @@ async function main() {
}
}
-main();
\ No newline at end of file
+main();
diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts
index a6cefe41..444edc32 100644
--- a/src/hooks/session-start.ts
+++ b/src/hooks/session-start.ts
@@ -49,7 +49,8 @@ async function main() {
if (isSdkChildContext(data)) return;
const sessionId =
- (data.session_id as string) || `ses_${Date.now().toString(36)}`;
+ ((data.session_id || data.sessionId) as string) ||
+ `ses_${Date.now().toString(36)}`;
const project = (data.cwd as string) || process.cwd();
const url = `${REST_URL}/agentmemory/session/start`;
diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts
index 1f2f5b8a..18ca371d 100644
--- a/src/hooks/stop.ts
+++ b/src/hooks/stop.ts
@@ -37,7 +37,7 @@ async function main() {
return;
}
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
try {
await fetch(`${REST_URL}/agentmemory/summarize`, {
@@ -51,4 +51,4 @@ async function main() {
}
}
-main();
\ No newline at end of file
+main();
diff --git a/src/hooks/subagent-start.ts b/src/hooks/subagent-start.ts
index 3f730adb..3463da0b 100644
--- a/src/hooks/subagent-start.ts
+++ b/src/hooks/subagent-start.ts
@@ -38,7 +38,9 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+ const agentId = data.agent_id || data.agentName;
+ const agentType = data.agent_type || data.agentDisplayName || data.agentName;
fetch(`${REST_URL}/agentmemory/observe`, {
method: "POST",
@@ -50,8 +52,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
data: {
- agent_id: data.agent_id,
- agent_type: data.agent_type,
+ agent_id: agentId,
+ agent_type: agentType,
},
}),
signal: AbortSignal.timeout(TIMEOUT_MS),
diff --git a/src/hooks/subagent-stop.ts b/src/hooks/subagent-stop.ts
index c555746e..90b99fd6 100644
--- a/src/hooks/subagent-stop.ts
+++ b/src/hooks/subagent-stop.ts
@@ -30,7 +30,9 @@ async function main() {
if (isSdkChildContext(data)) return;
- const sessionId = (data.session_id as string) || "unknown";
+ const sessionId = ((data.session_id || data.sessionId) as string) || "unknown";
+ const agentId = data.agent_id || data.agentName;
+ const agentType = data.agent_type || data.agentDisplayName || data.agentName;
const lastMsg =
typeof data.last_assistant_message === "string"
? data.last_assistant_message.slice(0, 4000)
@@ -47,8 +49,8 @@ async function main() {
cwd: data.cwd || process.cwd(),
timestamp: new Date().toISOString(),
data: {
- agent_id: data.agent_id,
- agent_type: data.agent_type,
+ agent_id: agentId,
+ agent_type: agentType,
last_message: lastMsg,
},
}),
diff --git a/src/index.ts b/src/index.ts
index b9b9e84d..704d4809 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -11,6 +11,7 @@ import {
isAutoCompressEnabled,
isConsolidationEnabled,
isContextInjectionEnabled,
+ isDropStaleIndexEnabled,
} from "./config.js";
import {
createProvider,
@@ -376,8 +377,7 @@ async function main() {
.map((m) => `${m.obsId} (dim=${m.dim})`)
.join(", ");
const distinct = Array.from(seenDimensions).sort((a, b) => a - b).join(", ");
- const dropStale =
- process.env["AGENTMEMORY_DROP_STALE_INDEX"] === "true";
+ const dropStale = isDropStaleIndexEnabled();
if (dropStale) {
console.warn(
`[agentmemory] Persisted vector index has ${mismatches.length} of ` +
@@ -412,16 +412,24 @@ async function main() {
const needsRebuild = bm25Index.size === 0;
if (needsRebuild) {
- const indexCount = await rebuildIndex(kv).catch((err) => {
- console.warn(`[agentmemory] Failed to rebuild search index:`, err);
- return 0;
- });
- if (indexCount > 0) {
- bootLog(
- `Search index rebuilt: ${indexCount} entries`,
- );
- indexPersistence.scheduleSave();
- }
+ // Fire-and-forget. rebuildIndex iterates every observation across
+ // every session and AWAITS an embedding-provider call per record.
+ // On a large corpus + rate-limited embedding endpoint that can
+ // take HOURS; awaiting it here blocks every subsequent boot step
+ // (including startViewerServer below, leaving the viewer port
+ // unbound for the duration). The index lazily fills in over time
+ // and search degrades gracefully — partial coverage > no viewer
+ // for hours. Errors still surface via the inner .catch.
+ void rebuildIndex(kv)
+ .then((indexCount) => {
+ if (indexCount > 0) {
+ bootLog(`Search index rebuilt: ${indexCount} entries`);
+ indexPersistence.scheduleSave();
+ }
+ })
+ .catch((err) => {
+ console.warn(`[agentmemory] Failed to rebuild search index:`, err);
+ });
} else {
// Backfill memories into BM25 for users upgrading from <0.9.5: prior
// versions of mem::remember never indexed memories, so the persisted
diff --git a/src/mcp/standalone.ts b/src/mcp/standalone.ts
index 86678a76..1413cbf8 100644
--- a/src/mcp/standalone.ts
+++ b/src/mcp/standalone.ts
@@ -89,6 +89,8 @@ interface Validated {
files?: string[];
query?: string;
limit?: number;
+ format?: string;
+ tokenBudget?: number;
memoryIds?: string[];
reason?: string;
}
@@ -118,6 +120,17 @@ function validate(toolName: string, args: Record): Validated {
}
v.query = query.trim();
v.limit = parseLimit(args["limit"]);
+ const fmt = args["format"];
+ if (typeof fmt === "string" && fmt.trim()) {
+ v.format = fmt.trim().toLowerCase();
+ }
+ const budget = args["token_budget"];
+ if (typeof budget === "number" && Number.isFinite(budget) && budget > 0) {
+ v.tokenBudget = Math.floor(budget);
+ } else if (typeof budget === "string" && budget.trim()) {
+ const n = Number(budget);
+ if (Number.isFinite(n) && n > 0) v.tokenBudget = Math.floor(n);
+ }
return v;
}
case "memory_sessions": {
@@ -159,11 +172,26 @@ async function handleProxy(
});
return textResponse(result);
}
- case "memory_recall":
+ case "memory_recall": {
+ const body: Record = {
+ query: v.query,
+ limit: v.limit,
+ format: v.format ?? "full",
+ };
+ if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget;
+ const result = await handle.call("/agentmemory/search", {
+ method: "POST",
+ body: JSON.stringify(body),
+ });
+ return textResponse(result, true);
+ }
case "memory_smart_search": {
+ const body: Record = { query: v.query, limit: v.limit };
+ if (v.format != null) body["format"] = v.format;
+ if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget;
const result = await handle.call("/agentmemory/smart-search", {
method: "POST",
- body: JSON.stringify({ query: v.query, limit: v.limit }),
+ body: JSON.stringify(body),
});
return textResponse(result, true);
}
diff --git a/src/mcp/transport.ts b/src/mcp/transport.ts
index 766e6472..759ed019 100644
--- a/src/mcp/transport.ts
+++ b/src/mcp/transport.ts
@@ -1,5 +1,3 @@
-import { createInterface } from "node:readline";
-
export interface JsonRpcRequest {
jsonrpc: "2.0";
id?: string | number;
@@ -19,6 +17,11 @@ export type RequestHandler = (
params: Record,
) => Promise;
+export interface StdioMessageParser {
+ push: (chunk: Buffer | string) => void;
+ isFramed: () => boolean;
+}
+
// JSON-RPC 2.0 notifications are messages without an `id` field. The spec
// (and the MCP transport contract) requires the server to NOT send a
// response for notifications. Some clients tolerate spurious responses;
@@ -130,26 +133,131 @@ export async function processLine(
}
}
+function findHeaderEnd(buffer: Buffer): { headerEnd: number; bodyStart: number } | null {
+ const crlf = buffer.indexOf("\r\n\r\n");
+ const lf = buffer.indexOf("\n\n");
+ if (crlf === -1 && lf === -1) return null;
+ if (crlf !== -1 && (lf === -1 || crlf <= lf)) {
+ return { headerEnd: crlf, bodyStart: crlf + 4 };
+ }
+ return { headerEnd: lf, bodyStart: lf + 2 };
+}
+
+function parseContentLength(header: string): number | null {
+ for (const line of header.split(/\r?\n/)) {
+ const match = line.match(/^content-length:\s*(\d+)\s*$/i);
+ if (match) return Number(match[1]);
+ }
+ return null;
+}
+
+export function formatResponse(
+ response: JsonRpcResponse,
+ framed: boolean,
+): string | Buffer[] {
+ const body = JSON.stringify(response);
+ if (!framed) return `${body}\n`;
+ const bytes = Buffer.from(body, "utf8");
+ return [Buffer.from(`Content-Length: ${bytes.length}\r\n\r\n`, "ascii"), bytes];
+}
+
+export function createMessageParser(
+ onMessage: (message: string) => void,
+ writeErr: (msg: string) => void = (msg) => process.stderr.write(msg),
+): StdioMessageParser {
+ let buffer = Buffer.alloc(0);
+ let framed = false;
+
+ function processBuffer(): void {
+ while (buffer.length > 0) {
+ if (buffer[0] === 10 || buffer[0] === 13) {
+ buffer = buffer.subarray(1);
+ continue;
+ }
+
+ const preview = buffer.toString("ascii", 0, Math.min(buffer.length, 32));
+ if (/^content-length:/i.test(preview)) {
+ const header = findHeaderEnd(buffer);
+ if (!header) return;
+
+ const headerText = buffer.subarray(0, header.headerEnd).toString("ascii");
+ const contentLength = parseContentLength(headerText);
+ if (contentLength === null) {
+ writeErr("[mcp-transport] missing Content-Length header\n");
+ buffer = buffer.subarray(header.bodyStart);
+ continue;
+ }
+
+ const messageEnd = header.bodyStart + contentLength;
+ if (buffer.length < messageEnd) return;
+
+ framed = true;
+ const message = buffer.subarray(header.bodyStart, messageEnd).toString("utf8");
+ buffer = buffer.subarray(messageEnd);
+ onMessage(message);
+ continue;
+ }
+
+ const newline = buffer.indexOf(10);
+ if (newline === -1) return;
+ const line = buffer
+ .subarray(0, newline)
+ .toString("utf8")
+ .replace(/\r$/, "");
+ buffer = buffer.subarray(newline + 1);
+ onMessage(line);
+ }
+ }
+
+ return {
+ push(chunk) {
+ const bytes = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, "utf8");
+ buffer = Buffer.concat([buffer, bytes]);
+ processBuffer();
+ },
+ isFramed() {
+ return framed;
+ },
+ };
+}
+
export function createStdioTransport(handler: RequestHandler): {
start: () => void;
stop: () => void;
} {
- let rl: ReturnType | null = null;
+ let parser: StdioMessageParser | null = null;
+ let queue = Promise.resolve();
const writeResponse = (response: JsonRpcResponse) => {
- process.stdout.write(JSON.stringify(response) + "\n");
+ const formatted = formatResponse(response, parser?.isFramed() ?? false);
+ if (typeof formatted === "string") {
+ process.stdout.write(formatted);
+ return;
+ }
+ for (const chunk of formatted) {
+ process.stdout.write(chunk);
+ }
};
- const onLine = (line: string) => processLine(line, handler, writeResponse);
+ const onData = (chunk: Buffer) => parser?.push(chunk);
return {
start() {
- rl = createInterface({ input: process.stdin });
- rl.on("line", onLine);
+ parser = createMessageParser((message) => {
+ queue = queue.then(() => processLine(message, handler, writeResponse));
+ void queue.catch((err) => {
+ process.stderr.write(
+ `[mcp-transport] request processing failed: ${
+ err instanceof Error ? err.message : String(err)
+ }\n`,
+ );
+ });
+ });
+ process.stdin.on("data", onData);
},
stop() {
- rl?.close();
- rl = null;
+ process.stdin.off("data", onData);
+ parser = null;
},
};
}
diff --git a/src/prompts/summary.ts b/src/prompts/summary.ts
index f01b28b8..bd040212 100644
--- a/src/prompts/summary.ts
+++ b/src/prompts/summary.ts
@@ -36,3 +36,52 @@ export function buildSummaryPrompt(observations: Array<{
})
return `Session observations (${observations.length} total):\n\n${lines.join('\n\n---\n\n')}`
}
+
+export const REDUCE_SYSTEM = `You are merging multiple partial summaries of the SAME coding session into one final session summary. The partials are chronological chunks of one continuous session — not separate sessions.
+
+Output EXACTLY this XML format with no additional text:
+
+
+ Short session title (max 100 chars)
+ 3-5 sentence narrative covering the whole session
+
+ Key technical decision made
+
+
+ path/to/modified/file
+
+
+ key concept from session
+
+
+
+Rules:
+- Synthesize a single narrative that reflects the whole arc, not a chunk-by-chunk recap
+- Preserve every distinct decision across chunks
+- Union (deduplicate) all files and concepts
+- Title should capture the session's overall outcome`
+
+export function buildReducePrompt(partials: Array<{
+ title: string
+ narrative: string
+ keyDecisions: string[]
+ filesModified: string[]
+ concepts: string[]
+ obsRangeStart: number
+ obsRangeEnd: number
+}>): string {
+ const sections = partials.map((p, i) => {
+ const decisions = p.keyDecisions.map((d) => ` - ${d}`).join('\n')
+ const files = p.filesModified.map((f) => ` - ${f}`).join('\n')
+ const concepts = p.concepts.join(', ')
+ return `[Chunk ${i + 1} of ${partials.length} — obs ${p.obsRangeStart}-${p.obsRangeEnd}]
+Title: ${p.title}
+Narrative: ${p.narrative}
+Decisions:
+${decisions}
+Files:
+${files}
+Concepts: ${concepts}`
+ })
+ return `Partial summaries (${partials.length} chunks of one session, chronological):\n\n${sections.join('\n\n---\n\n')}`
+}
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
index bca2370f..88e10829 100644
--- a/src/providers/openai.ts
+++ b/src/providers/openai.ts
@@ -80,6 +80,13 @@ export class OpenAIProvider implements MemoryProvider {
const body: Record = {
model: this.model,
max_tokens: this.maxTokens,
+ // OpenAI API spec defines `stream` as defaulting to false, so omitting
+ // it should yield a JSON response. Some OpenAI-compatible proxies
+ // (notably 9Router < 0.4.56 — see decolua/9router#1260) default to
+ // text/event-stream when `stream` is absent, which crashes the
+ // `response.json()` call below with `Unexpected token 'd', "data: {"id"...`.
+ // Send it explicitly so non-spec endpoints route to non-streaming too.
+ stream: false,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
diff --git a/src/triggers/api.ts b/src/triggers/api.ts
index 083c2159..66eaadc2 100644
--- a/src/triggers/api.ts
+++ b/src/triggers/api.ts
@@ -9,6 +9,7 @@ import type { ResilientProvider } from "../providers/resilient.js";
import { VERSION } from "../version.js";
import { timingSafeCompare } from "../auth.js";
import { renderViewerDocument } from "../viewer/document.js";
+import { getBoundViewerPort, getViewerSkipped } from "../viewer/server.js";
import { MAX_FILES_UPPER_BOUND } from "../functions/replay.js";
import {
isGraphExtractionEnabled,
@@ -143,7 +144,7 @@ export function registerApiTriggers(
sdk.registerFunction("api::liveness",
async (): Promise => ({
status_code: 200,
- body: { status: "ok", service: "agentmemory" },
+ body: { status: "ok", service: "agentmemory", viewerPort: getBoundViewerPort(), viewerSkipped: getViewerSkipped() },
}),
);
sdk.registerTrigger({
@@ -244,6 +245,8 @@ export function registerApiTriggers(
health: health || null,
functionMetrics,
circuitBreaker,
+ viewerPort: getBoundViewerPort(),
+ viewerSkipped: getViewerSkipped(),
},
};
},
diff --git a/src/types.ts b/src/types.ts
index bc38a058..72e347b3 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -266,6 +266,16 @@ export interface CompactSearchResult {
timestamp: string;
}
+export interface CompactLessonResult {
+ lessonId: string;
+ content: string;
+ confidence: number;
+ score: number;
+ createdAt: string;
+ project?: string;
+ tags: string[];
+}
+
export interface TimelineEntry {
observation: CompressedObservation;
sessionId: string;
@@ -293,7 +303,7 @@ export interface ExportPagination {
}
export interface ExportData {
- version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20";
+ version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20" | "0.9.21";
exportedAt: string;
sessions: Session[];
observations: Record;
diff --git a/src/version.ts b/src/version.ts
index 35bfcbb0..8a1b6acf 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = "0.9.20";
+export const VERSION = "0.9.21";
diff --git a/src/viewer/index.html b/src/viewer/index.html
index 4bd9293d..c2c200b8 100644
--- a/src/viewer/index.html
+++ b/src/viewer/index.html
@@ -1127,6 +1127,39 @@ agentmemory
};
}
+ // IME_SAFE_SEARCH_V2
+ function bindImeSafeSearch(input, ms, onSearch) {
+ var composing = false;
+ var justCommitted = false;
+ var run = debounce(function(value) { onSearch(value); }, ms);
+ input.addEventListener('compositionstart', function() { composing = true; });
+ input.addEventListener('compositionend', function() {
+ composing = false;
+ justCommitted = true;
+ onSearch(input.value);
+ setTimeout(function() { justCommitted = false; }, 0);
+ });
+ input.addEventListener('input', function(e) {
+ if (composing || e.isComposing) return;
+ if (justCommitted) return;
+ run(input.value);
+ });
+ }
+ function captureSearchFocus(ids) {
+ var a = document.activeElement;
+ if (!a || ids.indexOf(a.id) < 0) return null;
+ return { id: a.id, start: a.selectionStart, end: a.selectionEnd };
+ }
+ function restoreSearchFocus(focus) {
+ if (!focus) return;
+ var el = document.getElementById(focus.id);
+ if (!el) return;
+ el.focus();
+ if (typeof el.setSelectionRange === 'function') {
+ try { el.setSelectionRange(focus.start, focus.end); } catch (e) {}
+ }
+ }
+
async function api(path, opts) {
try {
var url = REST + '/agentmemory/' + path;
@@ -1629,6 +1662,7 @@ agentmemory
html += '↻ Rebuild Graph ';
html += '
';
+ var __focus = captureSearchFocus(['graph-search']);
sb.innerHTML = html;
sb.querySelectorAll('input[type="checkbox"]').forEach(function(cb) {
@@ -1640,11 +1674,9 @@ agentmemory
var searchInput = document.getElementById('graph-search');
if (searchInput) {
- searchInput.addEventListener('input', debounce(function() {
- graphSearchTerm = this.value.toLowerCase();
- renderGraph();
- }, 150));
+ bindImeSafeSearch(searchInput, 200, function(v){ graphSearchTerm = v.toLowerCase(); renderGraph(); });
}
+ restoreSearchFocus(__focus);
}
function initGraph() {
@@ -2198,7 +2230,26 @@ agentmemory
var filtered = items.filter(function(m) {
if (typeFilter && m.type !== typeFilter) return false;
- if (search && !(m.title || '').toLowerCase().includes(search) && !(m.content || '').toLowerCase().includes(search)) return false;
+ const normalizedSearch = (search || '')
+ .normalize("NFKC")
+ .toLowerCase();
+
+ const normalizedTitle = (m.title || '')
+ .normalize("NFKC")
+ .toLowerCase();
+
+ const normalizedContent = (m.content || '')
+ .normalize("NFKC")
+ .toLowerCase();
+
+ if (
+ search &&
+ !normalizedTitle.includes(normalizedSearch) &&
+ !normalizedContent.includes(normalizedSearch)
+ ) {
+ return false;
+ }
+
return true;
});
@@ -2261,14 +2312,12 @@ agentmemory
html += '';
}
+ var __focus = captureSearchFocus(['mem-search']);
el.innerHTML = html;
var searchInput = document.getElementById('mem-search');
if (searchInput) {
- searchInput.addEventListener('input', debounce(function() {
- state.memories.search = this.value;
- renderMemories();
- }, 200));
+ bindImeSafeSearch(searchInput, 200, function(v){ state.memories.search = v; renderMemories(); });
}
var typeSelect = document.getElementById('mem-type-filter');
if (typeSelect) {
@@ -2277,6 +2326,7 @@ agentmemory
renderMemories();
});
}
+ restoreSearchFocus(__focus);
}
function deleteMemory(id, title) {
@@ -2853,7 +2903,7 @@ agentmemory
html += '';
html += '';
- html += ' ';
+ html += ' ';
html += '' + items.length + ' lessons ';
html += '
';
@@ -2882,7 +2932,11 @@ agentmemory
html += '';
}
+ var __focus = captureSearchFocus(['lessons-search']);
el.innerHTML = html;
+ var __ls = document.getElementById('lessons-search');
+ if (__ls) bindImeSafeSearch(__ls, 200, function(v){ state.lessons.search = v; renderLessons(); });
+ restoreSearchFocus(__focus);
}
async function loadActions() {
@@ -2912,8 +2966,8 @@ agentmemory
}
var html = '';
- html += ' ';
- html += '';
+ html += ' ';
+ html += '';
html += 'All statuses ';
['pending','active','done','blocked','cancelled'].forEach(function(s) {
html += '' + s + ' ';
@@ -2951,7 +3005,13 @@ agentmemory
html += '';
}
+ var __focus = captureSearchFocus(['actions-search']);
el.innerHTML = html;
+ var __as = document.getElementById('actions-search');
+ if (__as) bindImeSafeSearch(__as, 200, function(v){ state.actions.search = v; renderActions(); });
+ var __af = document.getElementById('actions-status-filter');
+ if (__af) __af.addEventListener('change', function(){ state.actions.statusFilter = this.value; renderActions(); });
+ restoreSearchFocus(__focus);
}
async function loadCrystals() {
@@ -2999,7 +3059,7 @@ agentmemory
html += '
';
html += '';
- html += ' ';
+ html += ' ';
html += '' + items.length + ' crystals ';
html += '
';
@@ -3060,7 +3120,11 @@ agentmemory
});
}
+ var __focus = captureSearchFocus(['crystals-search']);
el.innerHTML = html;
+ var __cs = document.getElementById('crystals-search');
+ if (__cs) bindImeSafeSearch(__cs, 200, function(v){ state.crystals.search = v; renderCrystals(); });
+ restoreSearchFocus(__focus);
}
async function loadAudit() {
diff --git a/src/viewer/server.ts b/src/viewer/server.ts
index bd8e3c63..71598690 100644
--- a/src/viewer/server.ts
+++ b/src/viewer/server.ts
@@ -131,6 +131,16 @@ function readBody(req: IncomingMessage): Promise {
const MAX_VIEWER_PORT_RETRIES = 10;
+let boundViewerPort: number | null = null;
+let viewerSkipped = false;
+
+export function getBoundViewerPort(): number | null {
+ return boundViewerPort;
+}
+export function getViewerSkipped(): boolean {
+ return viewerSkipped;
+}
+
export function startViewerServer(
port: number,
_kv: unknown,
@@ -138,6 +148,10 @@ export function startViewerServer(
secret?: string,
restPort?: number,
): Server {
+ // Reset exported runtime state for each start attempt.
+ boundViewerPort = null;
+ viewerSkipped = false;
+
const resolvedRestPort = restPort ?? port - 2;
const requestedPort = port;
// Computed lazily on first request — `port` may be 0 here (OS-assigned)
@@ -227,6 +241,12 @@ export function startViewerServer(
};
server.on("listening", () => {
+ const addr = server.address();
+ boundViewerPort =
+ addr && typeof addr === "object" && "port" in addr
+ ? addr.port
+ : currentPort;
+ viewerSkipped = false;
if (currentPort === requestedPort) {
console.log(`[agentmemory] Viewer: http://localhost:${currentPort}`);
} else {
@@ -244,10 +264,14 @@ export function startViewerServer(
return;
}
if (err.code === "EADDRINUSE") {
+ boundViewerPort = null;
+ viewerSkipped = true;
console.warn(
`[agentmemory] Viewer ports ${requestedPort}-${requestedPort + MAX_VIEWER_PORT_RETRIES} all in use, skipping viewer.`,
);
} else {
+ boundViewerPort = null;
+ viewerSkipped = true;
console.error(`[agentmemory] Viewer error:`, err.message);
}
});
diff --git a/test/cli-connect.test.ts b/test/cli-connect.test.ts
index 99174dac..fbb8c2b5 100644
--- a/test/cli-connect.test.ts
+++ b/test/cli-connect.test.ts
@@ -10,6 +10,17 @@ import {
} from "../src/cli/connect/index.js";
import type { ConnectAdapter } from "../src/cli/connect/types.js";
+const EXPECTED_COPILOT_MCP_COMMAND =
+ process.platform === "win32"
+ ? {
+ command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe",
+ args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"],
+ }
+ : {
+ command: "npx",
+ args: ["-y", "@agentmemory/mcp"],
+ };
+
describe("agentmemory connect — dispatcher", () => {
it("resolves every known agent by lowercase name", () => {
for (const name of knownAgents()) {
@@ -29,10 +40,11 @@ describe("agentmemory connect — dispatcher", () => {
expect(resolveAdapter("")).toBeNull();
});
- it("ships exactly the 8 agents specified by the spec", () => {
+ it("ships exactly the 9 agents specified by the spec", () => {
expect(knownAgents().sort()).toEqual(
[
"claude-code",
+ "copilot-cli",
"codex",
"cursor",
"gemini-cli",
@@ -42,7 +54,7 @@ describe("agentmemory connect — dispatcher", () => {
"pi",
].sort(),
);
- expect(ADAPTERS.length).toBe(8);
+ expect(ADAPTERS.length).toBe(9);
});
it("every adapter exposes detect() and install()", () => {
@@ -175,7 +187,193 @@ describe("agentmemory connect — claude-code adapter (mock filesystem)", () =>
if (result.kind === "installed") {
expect(result.backupPath).toBeDefined();
expect(existsSync(result.backupPath!)).toBe(true);
- expect(result.backupPath!).toContain(".agentmemory/backups");
+ expect(result.backupPath!).toContain(join(".agentmemory", "backups"));
+ }
+ });
+});
+
+describe("agentmemory connect — copilot-cli adapter (mock filesystem)", () => {
+ let tmpHome: string;
+ let originalHome: string | undefined;
+ let originalUserprofile: string | undefined;
+ let originalCopilotHome: string | undefined;
+ let importCounter = 0;
+
+ beforeEach(() => {
+ tmpHome = mkdtempSync(join(tmpdir(), "am-connect-"));
+ originalHome = process.env["HOME"];
+ originalUserprofile = process.env["USERPROFILE"];
+ originalCopilotHome = process.env["COPILOT_HOME"];
+ process.env["HOME"] = tmpHome;
+ process.env["USERPROFILE"] = tmpHome;
+ delete process.env["COPILOT_HOME"];
+ vi.resetModules();
+ });
+
+ afterEach(() => {
+ if (originalHome !== undefined) process.env["HOME"] = originalHome;
+ else delete process.env["HOME"];
+ if (originalUserprofile !== undefined)
+ process.env["USERPROFILE"] = originalUserprofile;
+ else delete process.env["USERPROFILE"];
+ if (originalCopilotHome !== undefined)
+ process.env["COPILOT_HOME"] = originalCopilotHome;
+ else delete process.env["COPILOT_HOME"];
+ rmSync(tmpHome, { recursive: true, force: true });
+ vi.resetModules();
+ });
+
+ async function loadAdapter(): Promise {
+ const mod = await import(
+ "../src/cli/connect/copilot-cli.js?t=" + Date.now() + "-" + importCounter++
+ );
+ return (mod as { adapter: ConnectAdapter }).adapter;
+ }
+
+ it("detect() returns false when ~/.copilot doesn't exist", async () => {
+ const a = await loadAdapter();
+ expect(a.detect()).toBe(false);
+ });
+
+ it("install() writes mcpServers.agentmemory into ~/.copilot/mcp-config.json and is idempotent", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+
+ const a = await loadAdapter();
+ expect(a.detect()).toBe(true);
+
+ const first = await a.install({ dryRun: false, force: false });
+ expect(first.kind).toBe("installed");
+
+ const config = JSON.parse(
+ readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+ );
+ expect(config.mcpServers.agentmemory).toEqual({
+ type: "local",
+ ...EXPECTED_COPILOT_MCP_COMMAND,
+ env: {
+ AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+ AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+ },
+ tools: ["*"],
+ });
+
+ const second = await a.install({ dryRun: false, force: false });
+ expect(second.kind).toBe("already-wired");
+ });
+
+ it("honors COPILOT_HOME when locating mcp-config.json", async () => {
+ const customCopilotHome = join(tmpHome, "custom-copilot-home");
+ process.env["COPILOT_HOME"] = customCopilotHome;
+ require("node:fs").mkdirSync(customCopilotHome, { recursive: true });
+
+ const a = await loadAdapter();
+ expect(a.detect()).toBe(true);
+
+ const result = await a.install({ dryRun: false, force: false });
+ expect(result.kind).toBe("installed");
+ expect(result.mutatedPath).toBe(join(customCopilotHome, "mcp-config.json"));
+ expect(existsSync(join(customCopilotHome, "mcp-config.json"))).toBe(true);
+ expect(existsSync(join(tmpHome, ".copilot", "mcp-config.json"))).toBe(false);
+ });
+
+ it("install() preserves unrelated top-level keys and mcpServers entries", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+ writeFileSync(
+ join(tmpHome, ".copilot", "mcp-config.json"),
+ JSON.stringify({
+ otherTopLevel: { keep: true },
+ mcpServers: { other: { type: "local", command: "other" } },
+ }),
+ );
+
+ const a = await loadAdapter();
+ const result = await a.install({ dryRun: false, force: false });
+ expect(result.kind).toBe("installed");
+
+ const config = JSON.parse(
+ readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+ );
+ expect(config.otherTopLevel).toEqual({ keep: true });
+ expect(config.mcpServers.other).toEqual({ type: "local", command: "other" });
+ expect(config.mcpServers.agentmemory.command).toBe(
+ EXPECTED_COPILOT_MCP_COMMAND.command,
+ );
+ });
+
+ it("install() writes env passthrough block for AGENTMEMORY_URL + AGENTMEMORY_SECRET", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+
+ const a = await loadAdapter();
+ const result = await a.install({ dryRun: false, force: false });
+ expect(result.kind).toBe("installed");
+
+ const config = JSON.parse(
+ readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+ );
+ const entry = config.mcpServers.agentmemory;
+ expect(entry.env.AGENTMEMORY_URL).toBe("${AGENTMEMORY_URL}");
+ expect(entry.env.AGENTMEMORY_SECRET).toBe("${AGENTMEMORY_SECRET}");
+ });
+
+ it("install() with --force rewrites even when already wired", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+ writeFileSync(
+ join(tmpHome, ".copilot", "mcp-config.json"),
+ JSON.stringify({
+ mcpServers: {
+ agentmemory: {
+ type: "local",
+ ...EXPECTED_COPILOT_MCP_COMMAND,
+ env: {
+ AGENTMEMORY_URL: "${AGENTMEMORY_URL}",
+ AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}",
+ },
+ tools: ["memory_save"],
+ },
+ },
+ }),
+ );
+
+ const a = await loadAdapter();
+ const result = await a.install({ dryRun: false, force: true });
+ expect(result.kind).toBe("installed");
+
+ const config = JSON.parse(
+ readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"),
+ );
+ expect(config.mcpServers.agentmemory.tools).toEqual(["*"]);
+ });
+
+ it("install() with --dry-run does not mutate the file", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+ const before = JSON.stringify({ mcpServers: {} });
+ writeFileSync(join(tmpHome, ".copilot", "mcp-config.json"), before);
+
+ const a = await loadAdapter();
+ const result = await a.install({ dryRun: true, force: false });
+ expect(result.kind).toBe("installed");
+
+ const after = readFileSync(
+ join(tmpHome, ".copilot", "mcp-config.json"),
+ "utf-8",
+ );
+ expect(after).toBe(before);
+ });
+
+ it("install() creates a backup file when config pre-exists", async () => {
+ require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true });
+ writeFileSync(
+ join(tmpHome, ".copilot", "mcp-config.json"),
+ JSON.stringify({ mcpServers: {} }),
+ );
+
+ const a = await loadAdapter();
+ const result = await a.install({ dryRun: false, force: false });
+ expect(result.kind).toBe("installed");
+ if (result.kind === "installed") {
+ expect(result.backupPath).toBeDefined();
+ expect(existsSync(result.backupPath!)).toBe(true);
+ expect(result.backupPath!).toContain(join(".agentmemory", "backups"));
}
});
});
diff --git a/test/cli-onboarding.test.ts b/test/cli-onboarding.test.ts
new file mode 100644
index 00000000..9779a7e9
--- /dev/null
+++ b/test/cli-onboarding.test.ts
@@ -0,0 +1,94 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+const prompts = vi.hoisted(() => ({
+ note: vi.fn(),
+ multiselect: vi.fn(async () => {
+ throw new Error("interactive multiselect should not run in non-TTY onboarding");
+ }),
+ select: vi.fn(async () => {
+ throw new Error("interactive select should not run in non-TTY onboarding");
+ }),
+ confirm: vi.fn(async () => true),
+ isCancel: vi.fn(() => false),
+ cancel: vi.fn(),
+ log: {
+ warn: vi.fn(),
+ step: vi.fn(),
+ error: vi.fn(),
+ },
+}));
+
+vi.mock("@clack/prompts", () => prompts);
+vi.mock("../src/cli/connect/index.js", () => ({
+ resolveAdapter: vi.fn(),
+ runAdapter: vi.fn(),
+}));
+
+const ORIGINAL_HOME = process.env["HOME"];
+const ORIGINAL_USERPROFILE = process.env["USERPROFILE"];
+const stdinTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdin, "isTTY");
+const stdoutTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdout, "isTTY");
+
+let sandboxHome: string;
+
+function setTTY(value: boolean): void {
+ Object.defineProperty(process.stdin, "isTTY", { value, configurable: true });
+ Object.defineProperty(process.stdout, "isTTY", { value, configurable: true });
+}
+
+function restoreTTY(): void {
+ if (stdinTtyDescriptor) Object.defineProperty(process.stdin, "isTTY", stdinTtyDescriptor);
+ else delete (process.stdin as NodeJS.ReadStream & { isTTY?: boolean }).isTTY;
+ if (stdoutTtyDescriptor) Object.defineProperty(process.stdout, "isTTY", stdoutTtyDescriptor);
+ else delete (process.stdout as NodeJS.WriteStream & { isTTY?: boolean }).isTTY;
+}
+
+async function freshOnboarding() {
+ vi.resetModules();
+ return await import("../src/cli/onboarding.js");
+}
+
+describe("cli onboarding", () => {
+ beforeEach(() => {
+ sandboxHome = mkdtempSync(join(tmpdir(), "agentmemory-onboarding-"));
+ process.env["HOME"] = sandboxHome;
+ process.env["USERPROFILE"] = sandboxHome;
+ setTTY(false);
+ vi.clearAllMocks();
+ });
+
+ afterEach(() => {
+ restoreTTY();
+ if (ORIGINAL_HOME === undefined) delete process.env["HOME"];
+ else process.env["HOME"] = ORIGINAL_HOME;
+ if (ORIGINAL_USERPROFILE === undefined) delete process.env["USERPROFILE"];
+ else process.env["USERPROFILE"] = ORIGINAL_USERPROFILE;
+ rmSync(sandboxHome, { recursive: true, force: true });
+ });
+
+ it("does not prompt and records default preferences when onboarding runs without a TTY", async () => {
+ const { runOnboarding } = await freshOnboarding();
+
+ const result = await runOnboarding();
+
+ expect(result).toEqual({ agents: [], provider: null });
+ expect(prompts.multiselect).not.toHaveBeenCalled();
+ expect(prompts.select).not.toHaveBeenCalled();
+ expect(prompts.confirm).not.toHaveBeenCalled();
+
+ const preferencesPath = join(sandboxHome, ".agentmemory", "preferences.json");
+ expect(existsSync(preferencesPath)).toBe(true);
+ const preferences = JSON.parse(readFileSync(preferencesPath, "utf-8"));
+ expect(preferences).toMatchObject({
+ schemaVersion: 1,
+ lastAgent: null,
+ lastAgents: [],
+ lastProvider: null,
+ skipSplash: true,
+ });
+ expect(typeof preferences.firstRunAt).toBe("string");
+ });
+});
diff --git a/test/codex-connect-hooks.test.ts b/test/codex-connect-hooks.test.ts
new file mode 100644
index 00000000..75accbee
--- /dev/null
+++ b/test/codex-connect-hooks.test.ts
@@ -0,0 +1,137 @@
+import { describe, it, expect } from "vitest";
+import { writeFileSync, readFileSync, mkdirSync, rmSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { tmpdir } from "node:os";
+import {
+ buildMergedHooks,
+ findPluginRoot,
+ type HookManifest,
+} from "../src/cli/connect/codex-hooks.js";
+
+const PLUGIN_ROOT = resolve(__dirname, "..", "plugin");
+
+describe("findPluginRoot", () => {
+ it("locates the bundled plugin/ directory from src/cli/connect/", () => {
+ const root = findPluginRoot();
+ expect(root).toBe(PLUGIN_ROOT);
+ });
+});
+
+describe("buildMergedHooks", () => {
+ it("rewrites ${CLAUDE_PLUGIN_ROOT} to absolute pluginRoot in every command", () => {
+ const merged = buildMergedHooks(null, PLUGIN_ROOT);
+ for (const entries of Object.values(merged.hooks)) {
+ for (const entry of entries) {
+ for (const handler of entry.hooks) {
+ expect(handler.command).not.toContain("${CLAUDE_PLUGIN_ROOT}");
+ expect(handler.command).toContain(`${PLUGIN_ROOT}/scripts/`);
+ }
+ }
+ }
+ });
+
+ it("preserves matchers from the bundled manifest (e.g. PreToolUse)", () => {
+ const merged = buildMergedHooks(null, PLUGIN_ROOT);
+ const preToolUse = merged.hooks["PreToolUse"];
+ expect(preToolUse).toBeDefined();
+ expect(preToolUse!.length).toBeGreaterThan(0);
+ expect(preToolUse![0].matcher).toBe("Edit|Write|Read|Glob|Grep");
+ });
+
+ it("includes all six expected lifecycle events", () => {
+ const merged = buildMergedHooks(null, PLUGIN_ROOT);
+ for (const event of [
+ "SessionStart",
+ "UserPromptSubmit",
+ "PreToolUse",
+ "PostToolUse",
+ "PreCompact",
+ "Stop",
+ ]) {
+ expect(Object.keys(merged.hooks)).toContain(event);
+ }
+ });
+
+ it("appends to existing user hooks without dropping them", () => {
+ const existing: HookManifest = {
+ hooks: {
+ SessionStart: [
+ {
+ hooks: [{ type: "command", command: "echo user-custom" }],
+ },
+ ],
+ UserPromptSubmit: [
+ {
+ hooks: [{ type: "command", command: "echo another-user-hook" }],
+ },
+ ],
+ },
+ };
+ const merged = buildMergedHooks(existing, PLUGIN_ROOT);
+ const sessionStart = merged.hooks["SessionStart"]!;
+ const userHook = sessionStart.find((e) =>
+ e.hooks.some((h) => h.command === "echo user-custom"),
+ );
+ expect(userHook, "user's SessionStart hook should survive").toBeDefined();
+ const ours = sessionStart.find((e) =>
+ e.hooks.some((h) => h.command.includes(`${PLUGIN_ROOT}/scripts/session-start.mjs`)),
+ );
+ expect(ours, "agentmemory SessionStart hook should be appended").toBeDefined();
+ });
+
+ it("re-install strips previous agentmemory entries (idempotent by script path)", () => {
+ const first = buildMergedHooks(null, PLUGIN_ROOT);
+ const second = buildMergedHooks(first, PLUGIN_ROOT);
+ for (const event of Object.keys(first.hooks)) {
+ expect(
+ second.hooks[event]!.length,
+ `${event} should not double after second install`,
+ ).toBe(first.hooks[event]!.length);
+ }
+ });
+
+ it("re-install preserves unrelated user entries", () => {
+ const userEntry = {
+ hooks: [{ type: "command", command: "echo user-untouchable" }],
+ };
+ const withUser: HookManifest = {
+ hooks: {
+ SessionStart: [userEntry],
+ Stop: [{ hooks: [{ type: "command", command: "echo also-user" }] }],
+ },
+ };
+ const installed = buildMergedHooks(withUser, PLUGIN_ROOT);
+ const reinstalled = buildMergedHooks(installed, PLUGIN_ROOT);
+ expect(
+ reinstalled.hooks["SessionStart"]!.some((e) =>
+ e.hooks.some((h) => h.command === "echo user-untouchable"),
+ ),
+ ).toBe(true);
+ expect(
+ reinstalled.hooks["Stop"]!.some((e) =>
+ e.hooks.some((h) => h.command === "echo also-user"),
+ ),
+ ).toBe(true);
+ });
+
+ it("handles empty existing manifest object", () => {
+ const merged = buildMergedHooks({ hooks: {} }, PLUGIN_ROOT);
+ expect(Object.keys(merged.hooks).length).toBeGreaterThan(0);
+ });
+});
+
+describe("buildMergedHooks file round-trip", () => {
+ it("produces JSON that parses back to a structurally equivalent manifest", () => {
+ const dir = join(tmpdir(), `agentmemory-codex-hooks-${process.pid}-${Date.now()}`);
+ mkdirSync(dir, { recursive: true });
+ const path = join(dir, "hooks.json");
+ try {
+ const merged = buildMergedHooks(null, PLUGIN_ROOT);
+ writeFileSync(path, `${JSON.stringify(merged, null, 2)}\n`, "utf-8");
+ const reread = JSON.parse(readFileSync(path, "utf-8")) as HookManifest;
+ expect(Object.keys(reread.hooks).sort()).toEqual(Object.keys(merged.hooks).sort());
+ } finally {
+ rmSync(dir, { recursive: true, force: true });
+ }
+ });
+});
diff --git a/test/codex-plugin.test.ts b/test/codex-plugin.test.ts
index bb380876..bbbd88db 100644
--- a/test/codex-plugin.test.ts
+++ b/test/codex-plugin.test.ts
@@ -9,6 +9,29 @@ function readJson(path: string): T {
return JSON.parse(readFileSync(path, "utf-8")) as T;
}
+type HookHandler = { type: string; command: string };
+type HookEntry = { hooks: HookHandler[] };
+
+function hookCommands(path: string): string[] {
+ const manifest = readJson<{ hooks: Record }>(path);
+ return Object.values(manifest.hooks).flatMap((entries) =>
+ entries.flatMap((entry) => entry.hooks.map((handler) => handler.command)),
+ );
+}
+
+describe("Plugin hook manifests", () => {
+ it("quote plugin script paths so roots with spaces stay intact", () => {
+ for (const manifest of ["hooks.json", "hooks.codex.json"]) {
+ const commands = hookCommands(join(pluginRoot, "hooks", manifest));
+ expect(commands.length, `${manifest} should contain hook commands`).toBeGreaterThan(0);
+
+ for (const command of commands) {
+ expect(command).toMatch(/^node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/[^\s"]+\.mjs"$/);
+ }
+ }
+ });
+});
+
describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
it("ships .codex-plugin/plugin.json with kebab-case name + version + references", () => {
const manifestPath = join(pluginRoot, ".codex-plugin/plugin.json");
@@ -72,8 +95,6 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
});
it("hook command scripts referenced in hooks.codex.json exist on disk", () => {
- type HookHandler = { type: string; command: string };
- type HookEntry = { hooks: HookHandler[] };
const hooks = readJson<{ hooks: Record }>(
join(pluginRoot, "hooks/hooks.codex.json"),
);
@@ -81,7 +102,7 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => {
for (const entries of Object.values(hooks.hooks)) {
for (const entry of entries) {
for (const handler of entry.hooks) {
- const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s]+)/);
+ const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s"]+)/);
if (match) scriptRefs.add(match[1]);
}
}
diff --git a/test/copilot-plugin.test.ts b/test/copilot-plugin.test.ts
new file mode 100644
index 00000000..e4121688
--- /dev/null
+++ b/test/copilot-plugin.test.ts
@@ -0,0 +1,377 @@
+import { describe, expect, it } from "vitest";
+import { readFileSync, existsSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { createServer } from "node:http";
+import { spawn } from "node:child_process";
+
+const repoRoot = resolve(__dirname, "..");
+const pluginRoot = join(repoRoot, "plugin");
+
+function readJson(path: string): T {
+ return JSON.parse(readFileSync(path, "utf-8")) as T;
+}
+
+const SUPPORTED_COPILOT_EVENTS = new Set([
+ "sessionStart",
+ "userPromptSubmitted",
+ "preToolUse",
+ "postToolUse",
+ "postToolUseFailure",
+ "preCompact",
+ "agentStop",
+ "sessionEnd",
+ "subagentStart",
+ "subagentStop",
+ "notification",
+]);
+
+const REQUIRED_MINIMUM_EVENTS = [
+ "sessionStart",
+ "userPromptSubmitted",
+ "preToolUse",
+ "postToolUse",
+ "agentStop",
+];
+
+const KNOWN_SKILL_DIRS = [
+ "recall",
+ "remember",
+ "session-history",
+ "forget",
+ "handoff",
+ "recap",
+ "commit-context",
+ "commit-history",
+];
+
+describe("Copilot plugin manifest (plugin/plugin.json)", () => {
+ it("manifest exists with kebab-case name, version, and required fields", () => {
+ const manifestPath = join(pluginRoot, "plugin.json");
+ expect(existsSync(manifestPath)).toBe(true);
+ const manifest = readJson<{
+ name: string;
+ version: string;
+ description?: string;
+ skills?: string;
+ mcpServers?: string;
+ hooks?: string;
+ }>(manifestPath);
+ expect(manifest.name).toBe("agentmemory");
+ expect(manifest.name).toMatch(/^[a-z][a-z0-9-]*$/);
+ expect(manifest.version).toMatch(/^\d+\.\d+\.\d+/);
+ expect(manifest.skills).toBeDefined();
+ expect(manifest.mcpServers).toBeDefined();
+ expect(manifest.hooks).toBeDefined();
+ });
+
+ it("manifest version matches main package.json", () => {
+ const pkgVer = readJson<{ version: string }>(join(repoRoot, "package.json")).version;
+ const pluginVer = readJson<{ version: string }>(
+ join(pluginRoot, "plugin.json"),
+ ).version;
+ expect(pluginVer).toBe(pkgVer);
+ });
+
+ it("all referenced manifest paths resolve to existing files / directories", () => {
+ const manifest = readJson<{ skills: string; mcpServers: string; hooks: string }>(
+ join(pluginRoot, "plugin.json"),
+ );
+ const manifestDir = pluginRoot;
+ expect(existsSync(resolve(manifestDir, manifest.skills))).toBe(true);
+ expect(existsSync(resolve(manifestDir, manifest.mcpServers))).toBe(true);
+ expect(existsSync(resolve(manifestDir, manifest.hooks))).toBe(true);
+ });
+
+ it("skills path resolves and contains all known skill directories", () => {
+ const manifest = readJson<{ skills: string }>(join(pluginRoot, "plugin.json"));
+ const manifestDir = pluginRoot;
+ const skillsPath = resolve(manifestDir, manifest.skills);
+ for (const skill of KNOWN_SKILL_DIRS) {
+ expect(
+ existsSync(join(skillsPath, skill)),
+ `missing skill directory: ${skill}`,
+ ).toBe(true);
+ }
+ });
+});
+
+describe("Copilot MCP config (.mcp.copilot.json)", () => {
+ it("file exists with expected shape", () => {
+ const mcpPath = join(pluginRoot, ".mcp.copilot.json");
+ expect(existsSync(mcpPath)).toBe(true);
+ const config = readJson<{
+ mcpServers: {
+ agentmemory: {
+ type: string;
+ command: string;
+ args: string[];
+ env: Record;
+ tools: string[];
+ };
+ };
+ }>(mcpPath);
+ const server = config.mcpServers.agentmemory;
+ expect(server.type).toBe("local");
+ expect(server.command).toBe("npx");
+ expect(server.args).toEqual(["-y", "@agentmemory/mcp"]);
+ expect(server.env["AGENTMEMORY_URL"]).toBe("${AGENTMEMORY_URL}");
+ expect(server.env["AGENTMEMORY_SECRET"]).toBe("${AGENTMEMORY_SECRET}");
+ expect(server.tools).toContain("*");
+ });
+});
+
+describe("Copilot hooks config (hooks/hooks.copilot.json)", () => {
+ type HookEntry = {
+ type: string;
+ command?: string;
+ bash?: string;
+ powershell?: string;
+ matcher?: string;
+ };
+
+ function loadHooks() {
+ return readJson<{ version: number; hooks: Record }>(
+ join(pluginRoot, "hooks/hooks.copilot.json"),
+ );
+ }
+
+ it("has top-level version === 1 and hooks object", () => {
+ const config = loadHooks();
+ expect(config.version).toBe(1);
+ expect(config.hooks).toBeDefined();
+ expect(typeof config.hooks).toBe("object");
+ });
+
+ it("contains only supported Copilot event names", () => {
+ const config = loadHooks();
+ for (const event of Object.keys(config.hooks)) {
+ expect(
+ SUPPORTED_COPILOT_EVENTS.has(event),
+ `unsupported event "${event}" in hooks.copilot.json`,
+ ).toBe(true);
+ }
+ });
+
+ it("contains all required minimum events", () => {
+ const config = loadHooks();
+ const events = Object.keys(config.hooks);
+ for (const event of REQUIRED_MINIMUM_EVENTS) {
+ expect(events, `missing required event: ${event}`).toContain(event);
+ }
+ });
+
+ it("PreToolUse entry has the correct matcher", () => {
+ const config = loadHooks();
+ const preToolEntries = config.hooks["preToolUse"];
+ expect(preToolEntries).toBeDefined();
+ const withMatcher = preToolEntries.find(
+ (e) => e.matcher === "edit|write|create|read|view|glob|grep",
+ );
+ expect(
+ withMatcher,
+ "PreToolUse must have matcher edit|write|create|read|view|glob|grep",
+ ).toBeDefined();
+ });
+
+ it("every handler has type === 'command' and exactly one of command/bash/powershell", () => {
+ const config = loadHooks();
+ for (const [event, entries] of Object.entries(config.hooks)) {
+ for (const handler of entries) {
+ expect(handler.type, `${event} handler type`).toBe("command");
+ const commandFields = [handler.command, handler.bash, handler.powershell].filter(
+ (v): v is string => typeof v === "string" && v.trim().length > 0,
+ );
+ expect(
+ commandFields.length,
+ `${event} handler must have exactly one of command/bash/powershell`,
+ ).toBe(1);
+ }
+ }
+ });
+
+ it("every referenced script exists on disk", () => {
+ const config = loadHooks();
+ const scriptRefs = new Set();
+ for (const entries of Object.values(config.hooks)) {
+ for (const handler of entries) {
+ const cmd = handler.command ?? handler.bash ?? handler.powershell ?? "";
+ const match = cmd.match(/\$\{(?:COPILOT_PLUGIN_ROOT|CLAUDE_PLUGIN_ROOT)\}\/(scripts\/[^\s]+)/);
+ if (match) scriptRefs.add(match[1]);
+ }
+ }
+ expect(scriptRefs.size).toBeGreaterThan(0);
+ for (const rel of scriptRefs) {
+ expect(existsSync(join(pluginRoot, rel)), `missing hook script: ${rel}`).toBe(true);
+ }
+ });
+});
+
+describe("Copilot hook scripts", () => {
+ type ObservedRequest = { path: string; body: Record };
+
+ async function runHook(
+ script: string,
+ payload: Record,
+ env: Record = {},
+ ): Promise<{ requests: ObservedRequest[]; stdout: string }> {
+ const requests: ObservedRequest[] = [];
+ const server = createServer((req, res) => {
+ let raw = "";
+ req.on("data", (chunk) => {
+ raw += chunk;
+ });
+ req.on("end", () => {
+ requests.push({
+ path: req.url ?? "",
+ body: raw ? (JSON.parse(raw) as Record) : {},
+ });
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ context: "remembered context" }));
+ });
+ });
+
+ await new Promise((resolveServer) => {
+ server.listen(0, "127.0.0.1", resolveServer);
+ });
+
+ const address = server.address();
+ if (!address || typeof address === "string") {
+ server.close();
+ throw new Error("test server did not bind to a TCP port");
+ }
+
+ try {
+ const child = spawn(process.execPath, [join(pluginRoot, script)], {
+ env: {
+ ...process.env,
+ AGENTMEMORY_URL: `http://127.0.0.1:${address.port}`,
+ AGENTMEMORY_SECRET: "",
+ ...env,
+ },
+ stdio: ["pipe", "pipe", "pipe"],
+ });
+ let stdout = "";
+ let stderr = "";
+ child.stdout.on("data", (chunk) => {
+ stdout += chunk;
+ });
+ child.stderr.on("data", (chunk) => {
+ stderr += chunk;
+ });
+ child.stdin.end(JSON.stringify(payload));
+
+ const exitCode = await new Promise((resolveExit, reject) => {
+ const timeout = setTimeout(() => {
+ child.kill();
+ reject(new Error(`hook ${script} timed out`));
+ }, 5000);
+ child.on("error", reject);
+ child.on("close", (code) => {
+ clearTimeout(timeout);
+ resolveExit(code);
+ });
+ });
+
+ expect(exitCode, stderr).toBe(0);
+ return { requests, stdout };
+ } finally {
+ await new Promise((resolveClose) => {
+ server.close(() => resolveClose());
+ });
+ }
+ }
+
+ it("session-start accepts Copilot camelCase sessionId", async () => {
+ const result = await runHook(
+ "scripts/session-start.mjs",
+ { sessionId: "copilot-session", cwd: "C:\\repo" },
+ { AGENTMEMORY_INJECT_CONTEXT: "true" },
+ );
+
+ expect(result.stdout).toBe("remembered context");
+ expect(result.requests[0]?.path).toBe("/agentmemory/session/start");
+ expect(result.requests[0]?.body).toMatchObject({
+ sessionId: "copilot-session",
+ project: "C:\\repo",
+ cwd: "C:\\repo",
+ });
+ });
+
+ it("pre-tool-use narrows Copilot sessionId to strings", async () => {
+ const result = await runHook(
+ "scripts/pre-tool-use.mjs",
+ {
+ sessionId: 123,
+ toolName: "read",
+ toolArgs: { path: "src/index.ts" },
+ },
+ { AGENTMEMORY_INJECT_CONTEXT: "true" },
+ );
+
+ expect(result.stdout).toBe("remembered context");
+ expect(result.requests[0]?.path).toBe("/agentmemory/enrich");
+ expect(result.requests[0]?.body).toMatchObject({
+ sessionId: "unknown",
+ files: ["src/index.ts"],
+ terms: [],
+ toolName: "read",
+ });
+ });
+
+ it("prompt-submit accepts Copilot camelCase prompt payload", async () => {
+ const result = await runHook("scripts/prompt-submit.mjs", {
+ sessionId: "copilot-session",
+ cwd: "C:\\repo",
+ userPrompt: "remember this prompt",
+ });
+
+ expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+ expect(result.requests[0]?.body).toMatchObject({
+ hookType: "prompt_submit",
+ sessionId: "copilot-session",
+ data: { prompt: "remember this prompt" },
+ });
+ });
+
+ it("post-tool-failure accepts Copilot camelCase tool and error payloads", async () => {
+ const result = await runHook("scripts/post-tool-failure.mjs", {
+ sessionId: "copilot-session",
+ cwd: "C:\\repo",
+ toolName: "edit",
+ toolArgs: { filePath: "src/index.ts" },
+ errorMessage: "failed",
+ });
+
+ expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+ expect(result.requests[0]?.body).toMatchObject({
+ hookType: "post_tool_failure",
+ sessionId: "copilot-session",
+ data: {
+ tool_name: "edit",
+ tool_input: JSON.stringify({ filePath: "src/index.ts" }),
+ error: "failed",
+ },
+ });
+ });
+
+ it("notification accepts Copilot camelCase notificationType", async () => {
+ const result = await runHook("scripts/notification.mjs", {
+ sessionId: "copilot-session",
+ cwd: "C:\\repo",
+ notificationType: "permission_prompt",
+ title: "Tool approval",
+ message: "Approve edit",
+ });
+
+ expect(result.requests[0]?.path).toBe("/agentmemory/observe");
+ expect(result.requests[0]?.body).toMatchObject({
+ hookType: "notification",
+ sessionId: "copilot-session",
+ data: {
+ notification_type: "permission_prompt",
+ title: "Tool approval",
+ message: "Approve edit",
+ },
+ });
+ });
+});
diff --git a/test/diagnostics.test.ts b/test/diagnostics.test.ts
index d2dc706e..053e1c40 100644
--- a/test/diagnostics.test.ts
+++ b/test/diagnostics.test.ts
@@ -195,7 +195,10 @@ describe("Diagnostics Functions", () => {
};
expect(result.success).toBe(true);
- expect(result.summary.pass).toBe(8);
+ // 14 = 8 original (actions, leases, sentinels, sketches, signals,
+ // sessions, memories, mesh) + 6 added in #lesson-visibility
+ // (lessons, summaries, semantic, procedural, crystals, insights).
+ expect(result.summary.pass).toBe(14);
expect(result.summary.warn).toBe(0);
expect(result.summary.fail).toBe(0);
expect(result.summary.fixable).toBe(0);
@@ -636,4 +639,229 @@ describe("Diagnostics Functions", () => {
expect(unchanged!.status).toBe("blocked");
});
});
+
+ describe("per-store tally categories (#lesson-visibility)", () => {
+ it("lessons category: passes with valid live lessons + ignores tombstoned", async () => {
+ await kv.set(KV.lessons, "lsn_live", {
+ id: "lsn_live", content: "x", context: "", confidence: 0.8,
+ reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+ createdAt: "", updatedAt: "", decayRate: 0.05,
+ });
+ await kv.set(KV.lessons, "lsn_tomb", {
+ id: "lsn_tomb", content: "x", context: "", confidence: 0.5,
+ reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+ createdAt: "", updatedAt: "", decayRate: 0.05, deleted: true,
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["lessons"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const ok = result.checks.find((c) => c.name === "lessons-ok");
+ expect(ok?.status).toBe("pass");
+ expect(ok?.message).toMatch(/All 1 lessons.*1 tombstoned/);
+ });
+
+ it("lessons category: warns on out-of-range confidence", async () => {
+ await kv.set(KV.lessons, "lsn_bad", {
+ id: "lsn_bad", content: "x", context: "", confidence: 1.5,
+ reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+ createdAt: "", updatedAt: "", decayRate: 0.05,
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["lessons"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("summaries category: warns on missing title", async () => {
+ await kv.set(KV.summaries, "ses_1", {
+ sessionId: "ses_1", project: "p", createdAt: "", title: "",
+ narrative: "n", keyDecisions: [], filesModified: [], concepts: [],
+ observationCount: 1,
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["summaries"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("procedural category: warns on empty steps", async () => {
+ await kv.set(KV.procedural, "proc_1", {
+ id: "proc_1", name: "noop", steps: [], triggerCondition: "x",
+ frequency: 1, sourceSessionIds: [], strength: 0.5,
+ createdAt: "", updatedAt: "",
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["procedural"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("procedural-empty-steps:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("crystals category: warns on empty narrative", async () => {
+ await kv.set(KV.crystals, "cry_1", {
+ id: "cry_1", narrative: "", keyOutcomes: [], filesAffected: [],
+ lessons: [], sourceActionIds: [], createdAt: "",
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["crystals"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("insights category: warns on out-of-range confidence", async () => {
+ await kv.set(KV.insights, "ins_bad", {
+ id: "ins_bad", title: "t", content: "c", confidence: -0.1,
+ reinforcements: 0, sourceConceptCluster: [], sourceMemoryIds: [],
+ sourceLessonIds: [], sourceCrystalIds: [], tags: [],
+ createdAt: "", updatedAt: "", decayRate: 0.05,
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["insights"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("insight-bad-confidence:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("semantic category: warns on out-of-range confidence", async () => {
+ await kv.set(KV.semantic, "sem_bad", {
+ id: "sem_bad", fact: "f", confidence: 2.0, sourceSessionIds: [],
+ sourceMemoryIds: [], accessCount: 0, lastAccessedAt: "",
+ strength: 0, createdAt: "", updatedAt: "",
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["semantic"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("semantic-bad-confidence:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("categories filter accepts new categories and skips others", async () => {
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["lessons", "summaries"],
+ })) as { checks: DiagnosticCheck[] };
+
+ expect(result.checks.every((c) => c.category === "lessons" || c.category === "summaries")).toBe(true);
+ expect(result.checks.some((c) => c.category === "lessons")).toBe(true);
+ expect(result.checks.some((c) => c.category === "summaries")).toBe(true);
+ });
+
+ describe("defensive row-shape handling (CodeRabbit #473 review)", () => {
+ it("NaN/Infinity confidence on a lesson is flagged as warn, not silently passed", async () => {
+ await kv.set(KV.lessons, "lsn_nan", {
+ id: "lsn_nan", content: "x", context: "", confidence: NaN,
+ reinforcements: 0, source: "manual", sourceIds: [], tags: [],
+ createdAt: "", updatedAt: "", decayRate: 0.05,
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["lessons"],
+ })) as { checks: DiagnosticCheck[] };
+
+ const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("non-string summary title doesn't throw — surfaces as warn", async () => {
+ await kv.set(KV.summaries, "ses_bad_title", {
+ sessionId: "ses_bad_title",
+ project: "p",
+ createdAt: "",
+ title: null as unknown as string, // simulate corrupted row
+ narrative: "n",
+ keyDecisions: [],
+ filesModified: [],
+ concepts: [],
+ observationCount: 1,
+ });
+
+ // The bug to guard against: the old code called .trim() unconditionally,
+ // which throws on null/number, which aborts the whole diagnose run and
+ // any later category check never executes. Verify diagnose completes
+ // AND surfaces the bad row.
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["summaries", "lessons"],
+ })) as { checks: DiagnosticCheck[]; success?: boolean };
+
+ expect(result.success).toBe(true);
+ const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:"));
+ expect(warn?.status).toBe("warn");
+ // Later category still ran:
+ expect(result.checks.some((c) => c.category === "lessons")).toBe(true);
+ });
+
+ it("non-string crystal narrative doesn't throw — surfaces as warn", async () => {
+ await kv.set(KV.crystals, "cry_bad", {
+ id: "cry_bad",
+ narrative: undefined as unknown as string,
+ keyOutcomes: [],
+ filesAffected: [],
+ lessons: [],
+ sourceActionIds: [],
+ createdAt: "",
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["crystals"],
+ })) as { checks: DiagnosticCheck[]; success?: boolean };
+
+ expect(result.success).toBe(true);
+ const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:"));
+ expect(warn?.status).toBe("warn");
+ });
+
+ it("Infinity confidence on insight + semantic both flagged", async () => {
+ await kv.set(KV.insights, "ins_inf", {
+ id: "ins_inf",
+ title: "t",
+ content: "c",
+ confidence: Infinity,
+ reinforcements: 0,
+ sourceConceptCluster: [],
+ sourceMemoryIds: [],
+ sourceLessonIds: [],
+ sourceCrystalIds: [],
+ tags: [],
+ createdAt: "",
+ updatedAt: "",
+ decayRate: 0.05,
+ });
+ await kv.set(KV.semantic, "sem_nan", {
+ id: "sem_nan",
+ fact: "f",
+ confidence: NaN,
+ sourceSessionIds: [],
+ sourceMemoryIds: [],
+ accessCount: 0,
+ lastAccessedAt: "",
+ strength: 0,
+ createdAt: "",
+ updatedAt: "",
+ });
+
+ const result = (await sdk.trigger("mem::diagnose", {
+ categories: ["insights", "semantic"],
+ })) as { checks: DiagnosticCheck[] };
+
+ expect(result.checks.find((c) => c.name === "insight-bad-confidence:ins_inf")?.status).toBe("warn");
+ expect(result.checks.find((c) => c.name === "semantic-bad-confidence:sem_nan")?.status).toBe("warn");
+ });
+ });
+ });
});
diff --git a/test/env-loader.test.ts b/test/env-loader.test.ts
index 9c6f2955..17ff6a8e 100644
--- a/test/env-loader.test.ts
+++ b/test/env-loader.test.ts
@@ -25,6 +25,7 @@ describe("loadEnvFile", () => {
process.env["HOME"] = sandboxHome;
process.env["USERPROFILE"] = sandboxHome;
delete process.env["AGENTMEMORY_AUTO_COMPRESS"];
+ delete process.env["AGENTMEMORY_DROP_STALE_INDEX"];
delete process.env["CONSOLIDATION_ENABLED"];
delete process.env["GRAPH_EXTRACTION_ENABLED"];
delete process.env["TOKEN"];
@@ -82,4 +83,10 @@ describe("loadEnvFile", () => {
const cfg = await freshConfig();
expect(cfg.getEnvVar("TOKEN")).toBe("abc");
});
+
+ it("reads AGENTMEMORY_DROP_STALE_INDEX from the env file", async () => {
+ writeEnv("AGENTMEMORY_DROP_STALE_INDEX=true");
+ const cfg = await freshConfig();
+ expect(cfg.isDropStaleIndexEnabled()).toBe(true);
+ });
});
diff --git a/test/eval-adapters.test.ts b/test/eval-adapters.test.ts
new file mode 100644
index 00000000..90f914f5
--- /dev/null
+++ b/test/eval-adapters.test.ts
@@ -0,0 +1,92 @@
+import { describe, it, expect } from "vitest";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { grepAdapter } from "../eval/runner/adapters/grep.js";
+import { aggregate, scoreQuestion } from "../eval/runner/score.js";
+import type { Question, Session } from "../eval/runner/types.js";
+
+const DATA_DIR = resolve(__dirname, "..", "eval", "data", "coding-agent-life-v1");
+const sessions = JSON.parse(readFileSync(`${DATA_DIR}/sessions.json`, "utf8")) as Session[];
+const queries = JSON.parse(readFileSync(`${DATA_DIR}/queries.json`, "utf8")) as Array<
+ Omit
+>;
+
+describe("eval scaffold", () => {
+ it("coding-agent-life-v1 corpus is well-formed", () => {
+ expect(sessions.length).toBeGreaterThan(0);
+ expect(queries.length).toBeGreaterThan(0);
+ const sessionIds = new Set(sessions.map((s) => s.id));
+ for (const q of queries) {
+ expect(q.goldSessionIds.length).toBeGreaterThan(0);
+ for (const id of q.goldSessionIds) {
+ expect(sessionIds.has(id)).toBe(true);
+ }
+ }
+ });
+
+ it("grep adapter ranks gold session in top-5 for most queries", async () => {
+ const state = await grepAdapter.init(sessions);
+ let hits = 0;
+ for (const q of queries) {
+ const ranked = await grepAdapter.query(q.question, state, 5);
+ const topIds = new Set(ranked.map((r) => r.sessionId));
+ if (q.goldSessionIds.some((id) => topIds.has(id))) hits += 1;
+ }
+ expect(hits / queries.length).toBeGreaterThan(0.5);
+ });
+
+ it("scoreQuestion computes P@K, R@K, hit, topGoldRank", () => {
+ const q: Question = {
+ id: "test",
+ type: "single-session",
+ question: "?",
+ goldSessionIds: ["a", "b"],
+ haystack: [],
+ };
+ const ranked = [
+ { sessionId: "x", score: 0.9 },
+ { sessionId: "a", score: 0.7 },
+ { sessionId: "y", score: 0.5 },
+ { sessionId: "b", score: 0.3 },
+ ];
+ const row = scoreQuestion(q, ranked, 5, "test", 12);
+ expect(row.hit).toBe(true);
+ expect(row.recallAtK).toBe(1);
+ expect(row.precisionAtK).toBeCloseTo(2 / 5);
+ expect(row.topGoldRank).toBe(2);
+ });
+
+ it("scoreQuestion handles miss", () => {
+ const q: Question = {
+ id: "test",
+ type: "x",
+ question: "?",
+ goldSessionIds: ["a"],
+ haystack: [],
+ };
+ const ranked = [
+ { sessionId: "x", score: 1 },
+ { sessionId: "y", score: 0.5 },
+ ];
+ const row = scoreQuestion(q, ranked, 5, "test", 5);
+ expect(row.hit).toBe(false);
+ expect(row.recallAtK).toBe(0);
+ expect(row.topGoldRank).toBeNull();
+ });
+
+ it("aggregate computes per-adapter and per-type means", () => {
+ const q: Question = {
+ id: "1",
+ type: "t1",
+ question: "?",
+ goldSessionIds: ["a"],
+ haystack: [],
+ };
+ const row1 = scoreQuestion(q, [{ sessionId: "a", score: 1 }], 5, "grep", 10);
+ const row2 = scoreQuestion(q, [{ sessionId: "x", score: 1 }], 5, "grep", 20);
+ const agg = aggregate([row1, row2]);
+ expect(agg.byAdapter.grep.hit).toBe(1);
+ expect(agg.byAdapter.grep.n).toBe(2);
+ expect(agg.byType.t1.grep.n).toBe(2);
+ });
+});
diff --git a/test/export-import.test.ts b/test/export-import.test.ts
index 4426ce8e..373d2518 100644
--- a/test/export-import.test.ts
+++ b/test/export-import.test.ts
@@ -119,7 +119,7 @@ describe("Export/Import Functions", () => {
it("export produces valid ExportData structure", async () => {
const result = (await sdk.trigger("mem::export", {})) as ExportData;
- expect(result.version).toBe("0.9.20");
+ expect(result.version).toBe("0.9.21");
expect(result.exportedAt).toBeDefined();
expect(result.sessions.length).toBe(1);
expect(result.sessions[0].id).toBe("ses_1");
diff --git a/test/fs-watcher.test.ts b/test/fs-watcher.test.ts
index 76212b06..48c1b094 100644
--- a/test/fs-watcher.test.ts
+++ b/test/fs-watcher.test.ts
@@ -12,7 +12,7 @@ function wait(ms: number): Promise {
return new Promise((r) => setTimeout(r, ms));
}
-describe("FilesystemWatcher", () => {
+describe("FilesystemWatcher", { retry: 2 }, () => {
let root: string;
const originalFetch = globalThis.fetch;
let captured: Array<{ url: string; body: unknown; headers: Record }>;
@@ -49,7 +49,7 @@ describe("FilesystemWatcher", () => {
w.start();
try {
writeFileSync(join(root, "notes.md"), "hello world\n");
- await wait(800);
+ await wait(1500);
expect(captured.length).toBeGreaterThanOrEqual(1);
const obs = captured[captured.length - 1];
expect(obs.url).toBe("http://localhost:3111/agentmemory/observe");
@@ -87,7 +87,7 @@ describe("FilesystemWatcher", () => {
w.start();
try {
unlinkSync(join(root, "old.md"));
- await wait(800);
+ await wait(1500);
const deletes = captured.filter(
(c) => (c.body as { data: { changeKind: string } }).data?.changeKind === "file_delete",
);
@@ -116,7 +116,7 @@ describe("FilesystemWatcher", () => {
w.start();
try {
writeFileSync(join(root, "node_modules", "ignored.js"), "x");
- await wait(800);
+ await wait(1500);
const matches = captured.filter((c) =>
(c.body as { data: { files: string[] } }).data?.files?.some((f) => f.includes("ignored.js")),
);
@@ -136,7 +136,7 @@ describe("FilesystemWatcher", () => {
w.start();
try {
writeFileSync(join(root, "secret.md"), "bearer test\n");
- await wait(800);
+ await wait(1500);
expect(captured.length).toBeGreaterThanOrEqual(1);
const headers = captured[captured.length - 1].headers as Record;
expect(headers.authorization).toBe("Bearer shhh");
diff --git a/test/hermes-plugin.test.ts b/test/hermes-plugin.test.ts
new file mode 100644
index 00000000..f13f06f3
--- /dev/null
+++ b/test/hermes-plugin.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from "vitest";
+import { readFileSync } from "node:fs";
+
+const expectedHermesHooks = [
+ "prefetch",
+ "sync_turn",
+ "on_session_end",
+ "on_pre_compress",
+ "on_memory_write",
+ "system_prompt_block",
+];
+
+function readHermesPluginHooks(): string[] {
+ const manifest = readFileSync("integrations/hermes/plugin.yaml", "utf8");
+ const hooks: string[] = [];
+ let inHooks = false;
+
+ for (const line of manifest.split(/\r?\n/)) {
+ if (line.trim() === "hooks:") {
+ inHooks = true;
+ continue;
+ }
+ if (!inHooks) continue;
+ if (line.trim() === "") continue;
+ if (!line.startsWith(" ")) break;
+
+ const match = line.match(/^\s*-\s*([A-Za-z_][A-Za-z0-9_]*)\s*$/);
+ if (match) hooks.push(match[1]);
+ }
+
+ return hooks;
+}
+
+function isHermesLifecycleHook(methodName: string): boolean {
+ return (
+ methodName === "prefetch" ||
+ methodName === "sync_turn" ||
+ methodName === "system_prompt_block" ||
+ methodName.startsWith("on_")
+ );
+}
+
+function readAgentMemoryProviderHookMethods(): string[] {
+ const source = readFileSync("integrations/hermes/__init__.py", "utf8");
+ const methods: string[] = [];
+ const providerMethodPattern = /^ def ([a-z_][a-z0-9_]*)\(/gm;
+
+ for (const match of source.matchAll(providerMethodPattern)) {
+ const methodName = match[1];
+ if (isHermesLifecycleHook(methodName)) methods.push(methodName);
+ }
+
+ return methods;
+}
+
+describe("Hermes plugin manifest", () => {
+ it("declares every implemented lifecycle hook", () => {
+ const declaredHooks = readHermesPluginHooks();
+ const implementedHooks = readAgentMemoryProviderHookMethods();
+
+ expect([...declaredHooks].sort()).toEqual([...implementedHooks].sort());
+ expect(declaredHooks).toEqual(expectedHermesHooks);
+ });
+});
diff --git a/test/mcp-standalone-proxy.test.ts b/test/mcp-standalone-proxy.test.ts
index 0d93b227..dc08a024 100644
--- a/test/mcp-standalone-proxy.test.ts
+++ b/test/mcp-standalone-proxy.test.ts
@@ -75,6 +75,61 @@ describe("@agentmemory/mcp standalone — server proxy (issue #159)", () => {
expect(body.results[0].id).toBe("m1");
});
+ it("proxies memory_recall to POST /agentmemory/search and forwards format/token_budget (#507)", async () => {
+ const calls: Array<{ url: string; body?: unknown }> = [];
+ installFetch((url, init) => {
+ if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 });
+ const body = init?.body ? JSON.parse(init.body as string) : undefined;
+ calls.push({ url, body });
+ if (url.endsWith("/agentmemory/search")) {
+ return new Response(
+ JSON.stringify({
+ mode: "full",
+ facts: [{ id: "m1" }],
+ narrative: "n",
+ concepts: ["c"],
+ files: ["f"],
+ }),
+ { status: 200, headers: { "content-type": "application/json" } },
+ );
+ }
+ return new Response("not found", { status: 404 });
+ });
+ const res = await handleToolCall("memory_recall", {
+ query: "auth bug",
+ limit: 5,
+ format: "full",
+ token_budget: 800,
+ });
+ const body = JSON.parse(res.content[0].text);
+ expect(body.mode).toBe("full");
+ expect(body.facts[0].id).toBe("m1");
+ const searchCall = calls.find((c) => c.url.endsWith("/agentmemory/search"));
+ expect(searchCall).toBeDefined();
+ expect(searchCall?.body).toEqual({
+ query: "auth bug",
+ limit: 5,
+ format: "full",
+ token_budget: 800,
+ });
+ expect(calls.find((c) => c.url.endsWith("/agentmemory/smart-search"))).toBeUndefined();
+ });
+
+ it("memory_recall defaults format to 'full' when omitted (#507)", async () => {
+ let recallBody: Record | undefined;
+ installFetch((url, init) => {
+ if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 });
+ if (url.endsWith("/agentmemory/search")) {
+ recallBody = init?.body ? JSON.parse(init.body as string) : undefined;
+ return new Response(JSON.stringify({ mode: "full", facts: [] }), { status: 200 });
+ }
+ return new Response("not found", { status: 404 });
+ });
+ await handleToolCall("memory_recall", { query: "x" });
+ expect(recallBody?.["format"]).toBe("full");
+ expect(recallBody).not.toHaveProperty("token_budget");
+ });
+
it("proxies memory_governance_delete to the DELETE REST endpoint", async () => {
const calls: Array<{ url: string; method: string; body?: unknown }> = [];
installFetch((url, init) => {
diff --git a/test/mcp-transport.test.ts b/test/mcp-transport.test.ts
index bb8627dc..006ecc9e 100644
--- a/test/mcp-transport.test.ts
+++ b/test/mcp-transport.test.ts
@@ -1,5 +1,7 @@
import { describe, it, expect, vi } from "vitest";
import {
+ createMessageParser,
+ formatResponse,
processLine,
type JsonRpcResponse,
type RequestHandler,
@@ -227,3 +229,47 @@ describe("processLine — id type validation (JSON-RPC §4)", () => {
expect(c.out[0].result).toEqual({ method: "ping" });
});
});
+
+describe("stdio framing", () => {
+ it("parses Content-Length framed MCP messages split across chunks", () => {
+ const messages: string[] = [];
+ const parser = createMessageParser((message) => messages.push(message));
+ const body = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "initialize" });
+ const framed = `Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n${body}`;
+
+ parser.push(framed.slice(0, 12));
+ parser.push(framed.slice(12));
+
+ expect(messages).toEqual([body]);
+ expect(parser.isFramed()).toBe(true);
+ });
+
+ it("parses newline-delimited JSON for existing clients", () => {
+ const messages: string[] = [];
+ const parser = createMessageParser((message) => messages.push(message));
+ const first = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/list" });
+ const second = JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized" });
+
+ parser.push(`${first}\n${second}\n`);
+
+ expect(messages).toEqual([first, second]);
+ expect(parser.isFramed()).toBe(false);
+ });
+
+ it("formats responses with Content-Length framing when requested", () => {
+ const response: JsonRpcResponse = {
+ jsonrpc: "2.0",
+ id: 1,
+ result: { ok: true },
+ };
+ const formatted = formatResponse(response, true);
+
+ expect(Array.isArray(formatted)).toBe(true);
+ if (!Array.isArray(formatted)) throw new Error("expected framed response");
+ const header = formatted[0].toString("ascii");
+ const body = formatted[1].toString("utf8");
+
+ expect(header).toBe(`Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n`);
+ expect(JSON.parse(body)).toEqual(response);
+ });
+});
diff --git a/test/onboarding.test.ts b/test/onboarding.test.ts
new file mode 100644
index 00000000..053085b8
--- /dev/null
+++ b/test/onboarding.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, it } from "vitest";
+
+import { buildAgentOptions, getInitialAgentValues } from "../src/cli/onboarding.js";
+
+describe("first-run onboarding", () => {
+ it("offers GitHub Copilot CLI as a native setup target", () => {
+ const options = buildAgentOptions();
+ expect(options).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({
+ value: "copilot-cli",
+ label: expect.stringContaining("GitHub Copilot CLI"),
+ hint: "native plugin",
+ }),
+ ]),
+ );
+ });
+
+ it("selects GitHub Copilot CLI by default when running inside Copilot CLI", () => {
+ expect(getInitialAgentValues({ COPILOT_CLI: "1" })).toEqual(["copilot-cli"]);
+ expect(getInitialAgentValues({ COPILOT_AGENT_SESSION_ID: "session" })).toEqual(["copilot-cli"]);
+ });
+
+ it("keeps Claude Code as the default outside known agent environments", () => {
+ expect(getInitialAgentValues({})).toEqual(["claude-code"]);
+ });
+});
diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts
index 4f22d1a9..9d0c94e0 100644
--- a/test/smart-search.test.ts
+++ b/test/smart-search.test.ts
@@ -193,4 +193,102 @@ describe("Smart Search Function", () => {
} | null;
expect(log?.count).toBe(1);
});
+
+ describe("lesson inclusion (#lesson-visibility)", () => {
+ it("compact mode returns lessons array alongside observation results", async () => {
+ sdk.registerFunction("mem::lesson-recall", async (payload: any) => ({
+ success: true,
+ lessons: [
+ { id: "lsn_a", content: "always rebase before push", confidence: 0.9, createdAt: "2026-04-01T00:00:00Z", project: "p", tags: ["git"], score: 0.81 },
+ { id: "lsn_b", content: "never force-push to main", confidence: 0.95, createdAt: "2026-04-02T00:00:00Z", project: "p", tags: ["git"], score: 0.76 },
+ ],
+ }));
+
+ const result = (await sdk.trigger("mem::smart-search", {
+ query: "rebase",
+ })) as { mode: string; results: CompactSearchResult[]; lessons?: any[] };
+
+ expect(result.mode).toBe("compact");
+ expect(result.results.length).toBe(2); // observations unchanged
+ expect(result.lessons).toBeDefined();
+ expect(result.lessons!.length).toBe(2);
+ expect(result.lessons![0]).toMatchObject({
+ lessonId: "lsn_a",
+ confidence: 0.9,
+ score: 0.81,
+ });
+ expect(result.lessons![0].tags).toEqual(["git"]);
+ });
+
+ it("compact mode truncates long lesson content for preview", async () => {
+ const long = "x".repeat(500);
+ sdk.registerFunction("mem::lesson-recall", async () => ({
+ success: true,
+ lessons: [{ id: "lsn_long", content: long, confidence: 0.5, createdAt: "", tags: [], score: 0.4 }],
+ }));
+
+ const result = (await sdk.trigger("mem::smart-search", {
+ query: "x",
+ })) as { lessons: any[] };
+
+ expect(result.lessons[0].content.length).toBeLessThan(long.length);
+ expect(result.lessons[0].content).toMatch(/…$/);
+ });
+
+ it("includeLessons:false omits the lessons array entirely", async () => {
+ // No lesson-recall handler registered — would throw if invoked.
+ const result = (await sdk.trigger("mem::smart-search", {
+ query: "auth",
+ includeLessons: false,
+ })) as { mode: string; results: CompactSearchResult[]; lessons?: unknown };
+
+ expect(result.results.length).toBe(2);
+ expect(result.lessons).toBeUndefined();
+ });
+
+ it("forwards project filter to mem::lesson-recall", async () => {
+ let receivedPayload: any = null;
+ sdk.registerFunction("mem::lesson-recall", async (payload: any) => {
+ receivedPayload = payload;
+ return { success: true, lessons: [] };
+ });
+
+ await sdk.trigger("mem::smart-search", {
+ query: "rebase",
+ project: "gitops-assistant",
+ });
+
+ expect(receivedPayload).toMatchObject({
+ query: "rebase",
+ project: "gitops-assistant",
+ });
+ });
+
+ it("tolerates mem::lesson-recall failure: returns empty lessons, observations unchanged", async () => {
+ sdk.registerFunction("mem::lesson-recall", async () => {
+ throw new Error("lessons store unavailable");
+ });
+
+ const result = (await sdk.trigger("mem::smart-search", {
+ query: "auth",
+ })) as { results: CompactSearchResult[]; lessons: any[] };
+
+ expect(result.results.length).toBe(2);
+ expect(result.lessons).toEqual([]);
+ });
+
+ it("tolerates non-success lesson-recall response shape", async () => {
+ sdk.registerFunction("mem::lesson-recall", async () => ({
+ success: false,
+ error: "query is required",
+ }));
+
+ const result = (await sdk.trigger("mem::smart-search", {
+ query: "auth",
+ })) as { results: CompactSearchResult[]; lessons: any[] };
+
+ expect(result.results.length).toBe(2);
+ expect(result.lessons).toEqual([]);
+ });
+ });
});
diff --git a/test/summarize.test.ts b/test/summarize.test.ts
new file mode 100644
index 00000000..03aa1926
--- /dev/null
+++ b/test/summarize.test.ts
@@ -0,0 +1,417 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+
+vi.mock("../src/logger.js", () => ({
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+vi.mock("../src/state/schema.js", () => ({
+ KV: {
+ sessions: "sessions",
+ summaries: "summaries",
+ observations: (sessionId: string) => `obs:${sessionId}`,
+ audit: "audit",
+ },
+}));
+
+vi.mock("../src/eval/schemas.js", () => ({
+ SummaryOutputSchema: {},
+}));
+
+vi.mock("../src/eval/validator.js", () => ({
+ validateOutput: () => ({ valid: true, result: { errors: [] } }),
+}));
+
+vi.mock("../src/eval/quality.js", () => ({
+ scoreSummary: () => 100,
+}));
+
+vi.mock("../src/functions/audit.js", () => ({
+ safeAudit: vi.fn(),
+}));
+
+import { registerSummarizeFunction } from "../src/functions/summarize.js";
+import type {
+ CompressedObservation,
+ Session,
+ MemoryProvider,
+} from "../src/types.js";
+
+function mockKV() {
+ const store = new Map>();
+ return {
+ store,
+ get: async (scope: string, key: string): Promise =>
+ (store.get(scope)?.get(key) as T) ?? null,
+ set: async (scope: string, key: string, data: T): Promise => {
+ if (!store.has(scope)) store.set(scope, new Map());
+ store.get(scope)!.set(key, data);
+ return data;
+ },
+ delete: async (scope: string, key: string): Promise => {
+ store.get(scope)?.delete(key);
+ },
+ list: async (scope: string): Promise => {
+ const entries = store.get(scope);
+ return entries ? (Array.from(entries.values()) as T[]) : [];
+ },
+ };
+}
+
+function mockSdk() {
+ const functions = new Map();
+ return {
+ functions,
+ registerFunction: (id: string, handler: Function) => {
+ functions.set(id, handler);
+ },
+ registerTrigger: () => {},
+ trigger: async () => ({}),
+ };
+}
+
+function makeObs(i: number, sessionId: string): CompressedObservation {
+ return {
+ id: `obs_${i}`,
+ sessionId,
+ timestamp: new Date().toISOString(),
+ type: "conversation",
+ title: `obs ${i}`,
+ facts: [`fact ${i}`],
+ narrative: `narrative for obs ${i}`,
+ concepts: [],
+ files: [`src/file_${i}.ts`],
+ importance: 5,
+ };
+}
+
+function makeProvider(responses: string[]): MemoryProvider & {
+ calls: Array<{ system: string; user: string }>;
+} {
+ const calls: Array<{ system: string; user: string }> = [];
+ let i = 0;
+ return {
+ name: "test",
+ calls,
+ compress: async () => "",
+ summarize: async (system: string, user: string) => {
+ calls.push({ system, user });
+ const r = responses[i] ?? responses[responses.length - 1];
+ i += 1;
+ return r;
+ },
+ };
+}
+
+function summaryXml(opts: {
+ title: string;
+ narrative?: string;
+ decisions?: string[];
+ files?: string[];
+ concepts?: string[];
+}): string {
+ const d = (opts.decisions ?? []).map((x) => `${x} `).join("");
+ const f = (opts.files ?? []).map((x) => `${x} `).join("");
+ const c = (opts.concepts ?? []).map((x) => `${x} `).join("");
+ return `
+${opts.title}
+${opts.narrative ?? "narrative"}
+${d}
+${f}
+${c}
+ `;
+}
+
+async function setupHandler(opts: {
+ sessionId: string;
+ obsCount: number;
+ provider: MemoryProvider;
+}) {
+ const sdk = mockSdk();
+ const kv = mockKV();
+ const session: Session = {
+ id: opts.sessionId,
+ project: "test-project",
+ cwd: "/tmp",
+ startedAt: new Date().toISOString(),
+ status: "completed",
+ observationCount: opts.obsCount,
+ };
+ await kv.set("sessions", opts.sessionId, session);
+ for (let i = 0; i < opts.obsCount; i++) {
+ const o = makeObs(i, opts.sessionId);
+ await kv.set(`obs:${opts.sessionId}`, o.id, o);
+ }
+ registerSummarizeFunction(sdk as any, kv as any, opts.provider);
+ const handler = sdk.functions.get("mem::summarize")!;
+ return { handler, kv };
+}
+
+describe("mem::summarize chunking", () => {
+ const ORIGINAL_ENV = { ...process.env };
+
+ beforeEach(() => {
+ delete process.env.SUMMARIZE_CHUNK_SIZE;
+ delete process.env.SUMMARIZE_CHUNK_CONCURRENCY;
+ });
+
+ afterEach(() => {
+ process.env = { ...ORIGINAL_ENV };
+ });
+
+ it("small session takes the single-call path (no chunking, no reduce)", async () => {
+ const provider = makeProvider([
+ summaryXml({
+ title: "Small session",
+ decisions: ["decision A"],
+ files: ["src/a.ts"],
+ concepts: ["concept-a"],
+ }),
+ ]);
+ const { handler, kv } = await setupHandler({
+ sessionId: "ses_small",
+ obsCount: 10,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_small" });
+
+ expect(result.success).toBe(true);
+ expect(provider.calls).toHaveLength(1);
+ expect(provider.calls[0].user).toContain("Session observations (10 total)");
+ const stored: any = await kv.get("summaries", "ses_small");
+ expect(stored?.title).toBe("Small session");
+ });
+
+ it("large session map-reduces: N chunk calls + 1 reduce call", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; // serial keeps call ordering deterministic
+ const provider = makeProvider([
+ summaryXml({ title: "Chunk 1", decisions: ["dA"], files: ["src/a.ts"], concepts: ["ca"] }),
+ summaryXml({ title: "Chunk 2", decisions: ["dB"], files: ["src/b.ts"], concepts: ["cb"] }),
+ summaryXml({ title: "Chunk 3", decisions: ["dC"], files: ["src/c.ts"], concepts: ["cc"] }),
+ summaryXml({
+ title: "Merged",
+ decisions: ["dA", "dB", "dC"],
+ files: ["src/a.ts", "src/b.ts", "src/c.ts"],
+ concepts: ["ca", "cb", "cc"],
+ }),
+ ]);
+ const { handler, kv } = await setupHandler({
+ sessionId: "ses_large",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_large" });
+
+ expect(result.success).toBe(true);
+ expect(provider.calls).toHaveLength(4);
+ // First three are chunk calls (use the summary system prompt).
+ expect(provider.calls[0].system).toContain("session summarizer");
+ expect(provider.calls[2].system).toContain("session summarizer");
+ // Last is the reduce call (uses the merge system prompt).
+ expect(provider.calls[3].system).toContain("merging multiple partial summaries");
+ expect(provider.calls[3].user).toContain("Chunk 1 of 3");
+ expect(provider.calls[3].user).toContain("Chunk 3 of 3");
+
+ const stored: any = await kv.get("summaries", "ses_large");
+ expect(stored?.title).toBe("Merged");
+ // observationCount on the persisted summary should reflect the full session,
+ // not just the final chunk.
+ expect(stored?.observationCount).toBe(250);
+ expect(stored?.keyDecisions).toEqual(["dA", "dB", "dC"]);
+ });
+
+ it("SUMMARIZE_CHUNK_SIZE env override is respected", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "50";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ const provider = makeProvider([
+ summaryXml({ title: "chunk" }),
+ summaryXml({ title: "chunk" }),
+ summaryXml({ title: "chunk" }),
+ summaryXml({ title: "chunk" }),
+ summaryXml({ title: "merged" }),
+ ]);
+ const { handler } = await setupHandler({
+ sessionId: "ses_env",
+ obsCount: 175,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_env" });
+
+ expect(result.success).toBe(true);
+ // 175 obs ÷ 50 = 4 chunks (last chunk has 25) + 1 reduce = 5 calls.
+ expect(provider.calls).toHaveLength(5);
+ });
+
+ it("flaky chunk: parse fails once, retried, then succeeds — no skip", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ const provider = makeProvider([
+ summaryXml({ title: "ok1" }),
+ " ", // chunk 2 attempt 1: parse-fail
+ summaryXml({ title: "ok2" }), // chunk 2 attempt 2 (retry): success
+ summaryXml({ title: "ok3" }),
+ summaryXml({ title: "merged" }),
+ ]);
+ const { handler, kv } = await setupHandler({
+ sessionId: "ses_flaky",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_flaky" });
+
+ expect(result.success).toBe(true);
+ // 3 chunks × 1 attempt + 1 retry on chunk 2 + 1 reduce = 5 calls.
+ expect(provider.calls).toHaveLength(5);
+ const stored: any = await kv.get("summaries", "ses_flaky");
+ expect(stored?.title).toBe("merged");
+ });
+
+ it("persistently-broken chunk is skipped, reduce still runs on remaining partials", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ const provider = makeProvider([
+ summaryXml({ title: "ok1" }),
+ " ", " ", // chunk 2: both attempts parse-fail
+ summaryXml({ title: "ok3" }),
+ summaryXml({ title: "merged-with-skip" }),
+ ]);
+ const { handler, kv } = await setupHandler({
+ sessionId: "ses_skip",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_skip" });
+
+ expect(result.success).toBe(true);
+ // 1 ok + (1 + 1 retry skip) + 1 ok + 1 reduce = 5 calls.
+ expect(provider.calls).toHaveLength(5);
+ // Reduce input should mention only 2 of 3 chunks (chunk 2 skipped) —
+ // but the chunk indices in the reduce labels should reflect chunk 1 and 3,
+ // preserving chronological boundaries.
+ const reduceCall = provider.calls[4];
+ expect(reduceCall.user).toContain("Chunk 1 of 2");
+ expect(reduceCall.user).toContain("Chunk 2 of 2");
+ expect(reduceCall.user).toContain("obs 1-100"); // first surviving chunk
+ expect(reduceCall.user).toContain("obs 201-250"); // third surviving chunk (was idx 2, range 201-250)
+ const stored: any = await kv.get("summaries", "ses_skip");
+ expect(stored?.title).toBe("merged-with-skip");
+ });
+
+ it("too many skipped chunks bails out with a clear error", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ // 3 chunks, 2 fully broken → >50% skipped → bail.
+ const provider = makeProvider([
+ summaryXml({ title: "ok1" }),
+ " ", " ",
+ " ", " ",
+ ]);
+ const { handler } = await setupHandler({
+ sessionId: "ses_too_broken",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_too_broken" });
+
+ expect(result.success).toBe(false);
+ expect(result.error).toMatch(/too_many_chunks_skipped: 2\/3/);
+ });
+
+ it("provider error on one chunk after retry is skipped, not propagated", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ let i = 0;
+ const provider: MemoryProvider & { calls: any[] } = {
+ name: "test",
+ calls: [],
+ compress: async () => "",
+ summarize: async (system: string, user: string) => {
+ (provider as any).calls.push({ system, user });
+ i += 1;
+ if (i === 1) return summaryXml({ title: "ok1" });
+ // chunk 2: both attempts throw (e.g. provider 400)
+ if (i === 2 || i === 3) throw new Error("OpenAI API error (400): content rejected");
+ if (i === 4) return summaryXml({ title: "ok3" });
+ return summaryXml({ title: "merged-with-skip" });
+ },
+ };
+ const { handler, kv } = await setupHandler({
+ sessionId: "ses_net",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_net" });
+
+ expect(result.success).toBe(true);
+ // 1 ok + 2 fail + 1 ok + 1 reduce = 5 calls.
+ expect((provider as any).calls.length).toBe(5);
+ const stored: any = await kv.get("summaries", "ses_net");
+ expect(stored?.title).toBe("merged-with-skip");
+ });
+
+ it("every chunk failing on provider error trips too_many_chunks_skipped", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1";
+ // 3 chunks, all chunk calls throw → 3/3 skipped → bail.
+ const provider: MemoryProvider & { calls: any[] } = {
+ name: "test",
+ calls: [],
+ compress: async () => "",
+ summarize: async (system: string, user: string) => {
+ (provider as any).calls.push({ system, user });
+ throw new Error("OpenAI API error (400): invalid request");
+ },
+ };
+ const { handler } = await setupHandler({
+ sessionId: "ses_all_400",
+ obsCount: 250,
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_all_400" });
+
+ expect(result.success).toBe(false);
+ expect(result.error).toMatch(/too_many_chunks_skipped: 3\/3/);
+ });
+
+ it("chunks run in parallel batches according to SUMMARIZE_CHUNK_CONCURRENCY", async () => {
+ process.env.SUMMARIZE_CHUNK_SIZE = "100";
+ process.env.SUMMARIZE_CHUNK_CONCURRENCY = "2";
+ let inflight = 0;
+ let maxInflight = 0;
+ const provider: MemoryProvider & { calls: any[] } = {
+ name: "test",
+ calls: [],
+ compress: async () => "",
+ summarize: async (system: string, user: string) => {
+ (provider as any).calls.push({ system, user });
+ inflight += 1;
+ maxInflight = Math.max(maxInflight, inflight);
+ // Yield to event loop so siblings can also enter before we resolve.
+ await new Promise((r) => setTimeout(r, 5));
+ inflight -= 1;
+ if (system.includes("merging")) return summaryXml({ title: "merged" });
+ return summaryXml({ title: "ok" });
+ },
+ };
+ const { handler } = await setupHandler({
+ sessionId: "ses_par",
+ obsCount: 400, // 4 chunks at chunkSize=100
+ provider,
+ });
+
+ const result: any = await handler({ sessionId: "ses_par" });
+
+ expect(result.success).toBe(true);
+ // 4 chunks at concurrency 2 → max 2 in flight at once during the chunk phase.
+ // Reduce is a single call so doesn't bump it.
+ expect(maxInflight).toBe(2);
+ });
+});