diff --git a/.env.example b/.env.example index f1c207c1..77ca0f3a 100644 --- a/.env.example +++ b/.env.example @@ -98,6 +98,8 @@ # AGENTMEMORY_GRAPH_WEIGHT=0.2 # Graph traversal bonus on smart-search ranking # TOKEN_BUDGET=2000 # Max tokens injected via mem::context per session # MAX_OBS_PER_SESSION=500 # Per-session observation cap before consolidation kicks in +# SUMMARIZE_CHUNK_SIZE=400 # When mem::summarize sees a session larger than this, it chunks observations and map-reduces (chunk-summarize → reduce-merge) to stay within the LLM's context window. Default 400 ≈ 50k tokens per chunk at ~110 tok/obs. Native sessions are capped by MAX_OBS_PER_SESSION; chunking primarily matters for bulk-imported jsonl sessions, which bypass that cap. +# SUMMARIZE_CHUNK_CONCURRENCY=6 # Parallel chunk LLM calls during chunked summarize. Default 6 fits ~100-chunk sessions under iii's 180s function-invocation timeout at typical ~8s/call. High-throughput providers (Novita, DeepInfra, DeepSeek) commonly allow 100+ concurrent — bump this for very large imported sessions. # ----------------------------------------------------------------------------- # 5. Behaviour flags diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..a2f5e0c5 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [rohitg00] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41c99434..b9671280 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,19 +1,62 @@ name: CI +# `paths-ignore` keeps doc-only / website / README / CHANGELOG churn from +# burning runner minutes. Source / config / workflow changes always run. +# `workflow_dispatch` gives a manual re-run button for flake debugging. on: push: branches: [main] + paths-ignore: + - "README.md" + - "CHANGELOG.md" + - "AGENTS.md" + - "ROADMAP.md" + - "website/**" + - "docs/**" + - "assets/**" + - "deploy/**/README.md" + - "**/*.md" + - "**/*.mdx" pull_request: branches: [main] + paths-ignore: + - "README.md" + - "CHANGELOG.md" + - "AGENTS.md" + - "ROADMAP.md" + - "website/**" + - "docs/**" + - "assets/**" + - "deploy/**/README.md" + - "**/*.md" + - "**/*.mdx" + workflow_dispatch: + +# Cancel in-flight PR runs when a force-push lands. Keep push runs to +# protect against partial state on main. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: + # Don't bail the whole matrix on one cell's failure — we want to + # see whether the same failure reproduces across OSes (e.g. + # whether a flake is platform-specific or universal). + fail-fast: false matrix: + # Windows held back: test/obsidian-export.test.ts has hardcoded + # POSIX paths (`/tmp/...`) that fail on D:\ drive runners. + # src/functions/obsidian-export.ts needs os.tmpdir() + path.join + # rework before Windows can be added back. Tracked as follow-up. + os: [ubuntu-latest, macos-latest] node-version: [20, 22] steps: - uses: actions/checkout@v6 + with: + persist-credentials: false - uses: actions/setup-node@v6 with: node-version: ${{ matrix.node-version }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 62dc8925..00003399 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,15 +10,25 @@ on: required: false default: "agentmemory,mcp,fs-watcher" +# Workflow-level permissions stay minimal — only `contents: read` +# is required to check out the repo. `id-token: write` is granted on +# the publish job for npm's --provenance Sigstore OIDC mint. permissions: contents: read - id-token: write jobs: publish: runs-on: ubuntu-latest + permissions: + contents: read + id-token: write steps: - uses: actions/checkout@v6 + with: + # Don't persist the GITHUB_TOKEN to .git/config — the + # publish steps don't push back to the repo, so the token + # only needs to live in memory for this checkout. + persist-credentials: false - uses: actions/setup-node@v6 with: diff --git a/.gitignore b/.gitignore index 9a9260b8..ba6af995 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,10 @@ dist/ plugin/scripts/*.map plugin/scripts/*.d.mts data/ +!eval/data/ +!eval/data/** +data-*/ +agentmemory-debug/ .gstack/ # Lock files — never commit (see feedback_no_lockfiles memory) @@ -20,3 +24,8 @@ package-lock.json pnpm-lock.yaml yarn.lock integrations/hermes/__pycache__/ + +# Eval reports (transient; published scorecards live in docs/benchmarks/) +eval/reports/ +# LongMemEval download is 278MB; fetched on demand +eval/data/longmemeval/ diff --git a/AGENTS.md b/AGENTS.md index ebcf3584..24e74245 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,6 +19,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng 5. `test/mcp-standalone.test.ts` — tool count assertion 6. `README.md` — tool counts (search for "MCP tools") 7. `plugin/.claude-plugin/plugin.json` — tool count in description +8. `plugin/plugin.json` and `plugin/.mcp.copilot.json` (when present) — tool count or MCP exposure **When adding REST endpoints, you MUST update:** 1. `src/triggers/api.ts` — endpoint registration @@ -32,6 +33,7 @@ agentmemory is a persistent memory system for AI coding agents, built on iii-eng 4. `src/functions/export-import.ts` — supportedVersions set 5. `test/export-import.test.ts` — version assertion 6. `plugin/.claude-plugin/plugin.json` — version field +7. `plugin/plugin.json` (when present) — version field **When adding new KV scopes:** 1. `src/state/schema.ts` — add to the KV object diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c73c185..0188e05a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,48 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +## [0.9.21] — 2026-05-19 + +Quality + integration wave. Headline: native OpenCode plugin with full Claude Code hook parity ([#237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer)). Ten more PRs alongside: `memory_recall` returning the wrong shape, env-file `AGENTMEMORY_DROP_STALE_INDEX` silently ignored, hook scripts crashing on Windows usernames with spaces, viewer search inputs interrupting CJK IME composition, large sessions silently failing at the LLM context limit, lessons invisible to smart-search, Hermes plugin manifest missing hooks, cli onboarding crashing in non-TTY contexts, rebuildIndex blocking boot on large corpora, 25h embed-loop bottleneck during rebuild, and the v0.9.19 iii-console installer workaround can come out now that upstream is fixed. + +### Added + +- **OpenCode plugin with 22 auto-capture hooks** ([PR #237](https://github.com/rohitg00/agentmemory/pull/237) by [@cl0ckt0wer](https://github.com/cl0ckt0wer), closes [#236](https://github.com/rohitg00/agentmemory/issues/236) + [#244](https://github.com/rohitg00/agentmemory/issues/244)). Complete OpenCode plugin in `plugin/opencode/` matching Claude Code hook parity. Covers session lifecycle (8 hooks), messages (3), tool lifecycle (2), part tracking, permissions, task tracking, plus a two-layer enrichment pipeline (memory context on first turn, file enrichment on subsequent turns) and two slash commands (`/recall`, `/remember`). Full gap analysis in `plugin/opencode/README.md`. + +### Fixed + +- **`memory_recall` endpoint + format/token_budget forwarding** ([PR #516](https://github.com/rohitg00/agentmemory/pull/516) by [@serhiizghama](https://github.com/serhiizghama), closes [#507](https://github.com/rohitg00/agentmemory/issues/507) + [#440](https://github.com/rohitg00/agentmemory/issues/440)). MCP `memory_recall` always returned compact mode and dropped `format` + `token_budget` params. Two root causes fixed: standalone shim routed through `/agentmemory/smart-search` instead of `/agentmemory/search`, and the local-fallback path didn't read either param. Now routes correctly, forwards both params end-to-end, defaults `format` to `"full"` matching the MCP schema. + +- **env-file `AGENTMEMORY_DROP_STALE_INDEX` flag now honored** ([PR #461](https://github.com/rohitg00/agentmemory/pull/461) by [@honor2030](https://github.com/honor2030), closes [#456](https://github.com/rohitg00/agentmemory/issues/456)). Setting the flag in `~/.agentmemory/.env` was silently ignored because the boot path read `process.env` directly. New `isDropStaleIndexEnabled()` helper reads merged env. Combined with [#455](https://github.com/rohitg00/agentmemory/issues/455) + [#469](https://github.com/rohitg00/agentmemory/issues/469) reports, this is the unblock path for the stale-index server-crash recovery loop. + +- **Windows hook scripts quote plugin paths correctly** ([PR #487](https://github.com/rohitg00/agentmemory/pull/487) by [@honor2030](https://github.com/honor2030), closes [#477](https://github.com/rohitg00/agentmemory/issues/477)). Hook command strings referenced `${CLAUDE_PLUGIN_ROOT}/scripts/*.mjs` without quotes — Windows users with spaces in their username had every hook crash. Quotes added + regression test. + +- **Viewer search inputs honor IME composition** ([PR #517](https://github.com/rohitg00/agentmemory/pull/517) by [@jonathanzhan1975](https://github.com/jonathanzhan1975)). CJK users typing in the viewer's search inputs hit mid-character interruption — every keystroke fired the `oninput=` re-render handler, breaking IME composition mid-syllable. New `bindImeSafeSearch` helper defers re-render until `compositionend`. + +- **Chunk large sessions to fit LLM context window** ([PR #472](https://github.com/rohitg00/agentmemory/pull/472) by [@efenex](https://github.com/efenex)). Sessions with >7000 observations silently failed at the LLM provider's context limit — the consolidation pipeline silently skipped the session. New chunking splits oversized sessions across multiple compress calls + restitches the narrative via a `REDUCE_SYSTEM` prompt. Legacy single-call path preserved when obs count is under the chunk size. Backfill script under `scripts/` for users hitting the pre-fix bug. + +- **Surface lessons in smart-search + diagnose tally** ([PR #473](https://github.com/rohitg00/agentmemory/pull/473) by [@efenex](https://github.com/efenex)). Closes the lesson round-trip with [#458](https://github.com/rohitg00/agentmemory/pull/458) (lessons auto-injected into `mem::context`): lessons are now also returned alongside hybrid search results in a separate `lessons` field on `smart-search`, and the `diagnose` health surface tallies per-store counts so the trust-shock pattern (save succeeds, recall empty, diagnose says 0) goes away. + +- **Declare all Hermes plugin hooks** ([PR #486](https://github.com/rohitg00/agentmemory/pull/486) by [@honor2030](https://github.com/honor2030)). The Hermes `plugin.yaml` manifest only declared 3 of the 6 implemented hooks. All 6 now declared (`prefetch`, `sync_turn`, `on_session_end`, `on_pre_compress`, `on_memory_write`, `system_prompt_block`). + +- **`rebuildIndex` non-blocking on boot** ([PR #500](https://github.com/rohitg00/agentmemory/pull/500) by [@efenex](https://github.com/efenex)). Boot path previously `await`-ed `rebuildIndex(kv)`, so the viewer + later boot steps stalled — on large corpora this was 25h+ of blocked startup. Replaced with `void rebuildIndex(kv).then(...).catch(...)` so the rebuild runs in the background. + +- **Batched embed calls in `rebuildIndex` (25h → 3h on large corpora)** ([PR #504](https://github.com/rohitg00/agentmemory/pull/504) by [@efenex](https://github.com/efenex)). The rebuild loop made one embed call per observation, paying full HTTP RTT per item. New `vectorIndexAddBatchGuarded` helper batches embeds (default 32, configurable via `REBUILD_EMBED_BATCH_SIZE`) and try/catches per-item failures. Measured 25h → 3h on a 250k-observation corpus. + +- **CLI skips onboarding prompts without a tty** ([PR #491](https://github.com/rohitg00/agentmemory/pull/491) by [@honor2030](https://github.com/honor2030)). Onboarding prompts crashed in non-interactive contexts (CI, `docker run -d`, piped input). New guard short-circuits with sensible defaults when stdin/stdout aren't TTYs or `CI=1`. + +### Changed + +- **Drop iii-console installer `--next` workaround** ([PR #546](https://github.com/rohitg00/agentmemory/pull/546)). v0.9.19 routed first-run iii-console install through `bash -s -- --next` to dodge an upstream tag-prefix bug at [iii-hq/iii#1652](https://github.com/iii-hq/iii/issues/1652). Upstream [iii-hq/iii#1660](https://github.com/iii-hq/iii/pull/1660) shipped 2026-05-19; `install.iii.dev/console/main/install.sh` is a CDN proxy serving upstream main HEAD so the fix is live without an iii release tag. Reverted to canonical bare `curl ... | sh`. + +### Infrastructure + +- 95 test files (was 92), **1067 tests pass** (was 1038) on `chore(release): v0.9.21`. +- Bundles 11 PRs: 1 contributor feature + 9 bug fixes across MCP / hooks / viewer / summarize / lessons / Hermes / rebuildIndex / CLI + 1 upstream-installer revert. +- New contributors landing first PRs this release: [@cl0ckt0wer](https://github.com/cl0ckt0wer), [@serhiizghama](https://github.com/serhiizghama), [@jonathanzhan1975](https://github.com/jonathanzhan1975). + +[0.9.21]: https://github.com/rohitg00/agentmemory/compare/v0.9.20...v0.9.21 + ## [0.9.20] — 2026-05-18 Hotfix: revert the Codex Stop → session-end chain shipped in v0.9.19. diff --git a/README.md b/README.md index ef840011..fc6300fb 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Your coding agent remembers everything. No more re-explaining. Built on iii engine
- Persistent memory for Claude Code, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client. + Persistent memory for Claude Code, GitHub Copilot CLI, Cursor, Gemini CLI, Codex CLI, Hermes, OpenClaw, pi, OpenCode, and any MCP client.

@@ -34,6 +34,7 @@

npm version + npm downloads CI License Stars @@ -72,10 +73,12 @@ ## Install ```bash -npm install -g @agentmemory/agentmemory # once — bare `agentmemory` on PATH -agentmemory # start the memory server on :3111 -agentmemory demo # seed sample sessions + prove recall -agentmemory connect claude-code # wire your agent (also: codex, cursor, gemini-cli, ...) +npm install -g @agentmemory/agentmemory # once — bare `agentmemory` on PATH +# If you hit EACCES on macOS/Linux system Node installs, retry with: +# sudo npm install -g @agentmemory/agentmemory +agentmemory # start the memory server on :3111 +agentmemory demo # seed sample sessions + prove recall +agentmemory connect claude-code # wire your agent (also: copilot-cli, codex, cursor, gemini-cli, ...) ``` Or via `npx` (no install): @@ -107,6 +110,11 @@ agentmemory works with any agent that supports hooks, MCP, or REST API. All agen native plugin + 6 hooks + MCP +GitHub Copilot CLI
+GitHub Copilot CLI
+MCP + plugin hooks/skills + + OpenClaw
OpenClaw
native plugin + MCP @@ -207,6 +215,15 @@ npx @agentmemory/agentmemory ### Retrieval Accuracy +**coding-agent-life-v1** (in-house corpus, sandbox-reproducible) + +| Adapter | P@5 | R@5 | Top-5 hit rate | p50 latency | +|---|---|---|---|---| +| **agentmemory hybrid** | **0.578** | **0.967** | **15 / 15** | 14 ms | +| grep baseline | 0.267 | 0.967 | 15 / 15 | 0 ms | + +100% top-5 hit rate. **2.2×** better precision than the grep baseline on identical input. Full per-type breakdown: [`docs/benchmarks/2026-05-20-coding-agent-life-v1.md`](docs/benchmarks/2026-05-20-coding-agent-life-v1.md). + **LongMemEval-S** (ICLR 2025, 500 questions) | System | R@5 | R@10 | MRR | @@ -232,6 +249,8 @@ npx @agentmemory/agentmemory > Embedding model: `all-MiniLM-L6-v2` (local, free, no API key). Full reports: [`benchmark/LONGMEMEVAL.md`](benchmark/LONGMEMEVAL.md), [`benchmark/QUALITY.md`](benchmark/QUALITY.md), [`benchmark/SCALE.md`](benchmark/SCALE.md). Competitor comparison: [`benchmark/COMPARISON.md`](benchmark/COMPARISON.md) — agentmemory vs mem0, Letta, Khoj, claude-mem, Hippo. +**Reproduce locally:** [`eval/README.md`](eval/README.md) — adapter-pluggable harness for LongMemEval `_s` (public 500-Q) + `coding-agent-life-v1` (in-house 15-session corpus). Grep / vector / agentmemory adapters score side-by-side, NDJSON output, published scorecards land in [`docs/benchmarks/`](docs/benchmarks/). + ---

vs Competitors

@@ -349,6 +368,8 @@ Open `http://localhost:3113` to watch the memory build live. ```bash npm install -g @agentmemory/agentmemory +# If you hit EACCES on macOS/Linux system Node installs, retry with: +# sudo npm install -g @agentmemory/agentmemory agentmemory # start the server (same as the npx form) agentmemory stop # tear it down agentmemory remove # uninstall everything we created @@ -418,6 +439,30 @@ The Codex plugin ships from the same `plugin/` directory as the Claude Code plug Codex's hook engine injects `CLAUDE_PLUGIN_ROOT` into hook subprocesses (per [`codex-rs/hooks/src/engine/discovery.rs`](https://github.com/openai/codex/blob/main/codex-rs/hooks/src/engine/discovery.rs)), so the same hook scripts work across both hosts without duplication. Subagent / SessionEnd / Notification / TaskCompleted / PostToolUseFailure events are Claude-Code-only and are not registered for Codex. +#### Codex Desktop: plugin hooks currently silent (workaround available) + +`CodexHooks` and `PluginHooks` are both stable + default-enabled in [`codex-rs/features/src/lib.rs`](https://github.com/openai/codex/blob/main/codex-rs/features/src/lib.rs), but Codex Desktop builds currently do not dispatch plugin-local `hooks.json` ([openai/codex#16430](https://github.com/openai/codex/issues/16430)). MCP tools still work; only the lifecycle observations are missing. + +Until upstream lands the fix, mirror the same hook commands into the global `~/.codex/hooks.json`: + +```bash +agentmemory connect codex --with-hooks +``` + +This adds an idempotent block to `~/.codex/hooks.json` referencing absolute paths to the bundled scripts (no `${CLAUDE_PLUGIN_ROOT}` expansion needed at user-scope). Re-run the same command after upgrading agentmemory to refresh paths. User entries in the same file are preserved; only previous agentmemory entries are replaced. + +### GitHub Copilot CLI + +```bash +# MCP-only wiring +agentmemory connect copilot-cli + +# Full hooks/skills plugin from the GitHub subdir +copilot plugin install rohitg00/agentmemory:plugin +``` + +`agentmemory connect copilot-cli` merges `mcpServers.agentmemory` into `~/.copilot/mcp-config.json` (or `$COPILOT_HOME/mcp-config.json` when `COPILOT_HOME` is set) and preserves existing servers. This adapter is Windows-safe even though other `connect` adapters still require manual Windows setup. Copilot picks up the MCP server on next launch or after `/mcp`. Install the plugin as well when you want the full hook/skill experience. +
OpenClaw (paste this prompt) @@ -490,9 +535,11 @@ The agentmemory entry is the **same MCP server block** across every host that us | **Cline / Roo Code / Kilo Code** | Cline MCP settings (Settings UI → MCP Servers → Edit) | Same `mcpServers` block. | | **Windsurf** | `~/.codeium/windsurf/mcp_config.json` | Same `mcpServers` block. | | **Gemini CLI** | `~/.gemini/settings.json` | `gemini mcp add agentmemory npx -y @agentmemory/mcp --scope user` (auto-merges). | +| **GitHub Copilot CLI (MCP only)** | `~/.copilot/mcp-config.json` | `agentmemory connect copilot-cli` merges `mcpServers.agentmemory`; Copilot picks it up on next launch or `/mcp`. | +| **GitHub Copilot CLI (full plugin)** | Copilot plugin install | `copilot plugin install rohitg00/agentmemory:plugin` for the plugin from the GitHub subdir. | | **OpenClaw** | OpenClaw MCP config | Same `mcpServers` block, or use the deeper [memory plugin](integrations/openclaw/). | | **Codex CLI (MCP only)** | `.codex/config.toml` | TOML shape: `codex mcp add agentmemory -- npx -y @agentmemory/mcp`, or add `[mcp_servers.agentmemory]` manually. | -| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. | +| **Codex CLI (full plugin)** | Codex plugin marketplace | `codex plugin marketplace add rohitg00/agentmemory` then `codex plugin install agentmemory`. Registers MCP + 6 lifecycle hooks (SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, Stop) + 4 skills. On Codex Desktop, also run `agentmemory connect codex --with-hooks` until [openai/codex#16430](https://github.com/openai/codex/issues/16430) lands — plugin hooks are currently silent there. | | **OpenCode (MCP only)** | `opencode.json` | Different shape — top-level `mcp` key, command as array: `{"mcp": {"agentmemory": {"type": "local", "command": ["npx", "-y", "@agentmemory/mcp"], "enabled": true}}}`. | | **OpenCode (full plugin)** | `plugin/opencode/` | 22 auto-capture hooks covering session lifecycle, messages, tools, errors. Two slash commands (`/recall`, `/remember`). Copy `plugin/opencode/` into your OpenCode workspace and add the plugin entry to `opencode.json`. See [`plugin/opencode/README.md`](plugin/opencode/README.md) for the full hook table + gap analysis. | | **pi** | `~/.pi/agent/extensions/agentmemory` | Copy [`integrations/pi`](integrations/pi/) and restart pi. | @@ -1035,7 +1082,7 @@ Full registry: [workers.iii.dev](https://workers.iii.dev). Every worker there co ### LLM Providers -agentmemory auto-detects from your environment. No API key needed if you have a Claude subscription. +agentmemory auto-detects from your environment. By default, no LLM calls are made unless you configure a provider or explicitly opt in to the Claude subscription fallback. | Provider | Config | Notes | |----------|--------|-------| @@ -1046,6 +1093,33 @@ agentmemory auto-detects from your environment. No API key needed if you have a | OpenRouter | `OPENROUTER_API_KEY` | Any model | | Claude subscription fallback | `AGENTMEMORY_ALLOW_AGENT_SDK=true` | Opt-in only. Spawns `@anthropic-ai/claude-agent-sdk` sessions — used to cause unbounded Stop-hook recursion (#149 follow-up) so it is no longer the default. | +### Config File + +Put agentmemory runtime configuration in `~/.agentmemory/.env` instead of exporting variables in every shell. If the viewer shows a setup hint like `export ANTHROPIC_API_KEY=...`, copy it into this file as `ANTHROPIC_API_KEY=...` without the `export` prefix, then restart agentmemory. + +Process environment variables still work and take precedence over values in the file. + +On Windows, the same file lives at `%USERPROFILE%\.agentmemory\.env`: + +```powershell +New-Item -ItemType Directory -Force $HOME\.agentmemory +notepad $HOME\.agentmemory\.env +``` + +To test with a Claude Code Pro/Max subscription instead of an API key, opt in explicitly: + +```env +AGENTMEMORY_ALLOW_AGENT_SDK=true +AGENTMEMORY_AUTO_COMPRESS=true +``` + +Turn on graph or consolidation features in the same file if you want them: + +```env +GRAPH_EXTRACTION_ENABLED=true +CONSOLIDATION_ENABLED=true +``` + ### Environment Variables Create `~/.agentmemory/.env`: diff --git a/docs/benchmarks/2026-05-20-coding-agent-life-v1.md b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md new file mode 100644 index 00000000..f280b27d --- /dev/null +++ b/docs/benchmarks/2026-05-20-coding-agent-life-v1.md @@ -0,0 +1,76 @@ +# 2026-05-20 — coding-agent-life-v1 (v0.9.21) + +**Commit:** `e9dc710` +**Bench:** coding-agent-life-v1 (15 sessions, 15 queries) +**N:** 15 +**K:** 5 +**Hardware:** macOS 15 (Apple Silicon) +**agentmemory:** v0.9.21 +**iii-engine:** v0.11.2 +**Embedding provider:** local default +**Sandbox:** isolated data dir at `/tmp/agentmemory-eval-sandbox/`, ports 3411/3412 + +## Headline + +`agentmemory-hybrid` hits **100% top-5 hit rate**, R@5 = **0.967**, P@5 = **0.578**. + +Same corpus, grep baseline: R@5 = 0.967, P@5 = 0.267 — same recall, but **2.2× worse precision**. Hybrid's top-5 is mostly gold; grep's top-5 is half noise. + +## Per-adapter + +| Adapter | P@5 | R@5 | Hit rate | p50 latency | +|---|---|---|---|---| +| grep (tokenized substring) | 0.267 | 0.967 | 15 / 15 | 0 ms | +| `agentmemory-hybrid` | **0.578** | **0.967** | **15 / 15** | 14 ms | + +`agentmemory-hybrid` runs through the production smart-search endpoint (`POST /agentmemory/smart-search`) so it exercises the full BM25 + embedding + reranker stack. + +## Per-question-type + +P@5, grep vs `agentmemory-hybrid`: + +| Type | grep | hybrid | hybrid lift | +|---|---|---|---| +| single-session-bug | 0.20 | 0.33 | 1.7× | +| single-session-infra (n=2) | 0.20 | 0.50 | 2.5× | +| single-session-refactor | 0.20 | 0.50 | 2.5× | +| single-session-feature | 0.50 | 0.50 | tie | +| single-session-test | 0.20 | 0.33 | 1.7× | +| single-session-perf | 0.20 | 0.50 | 2.5× | +| single-session-api | 0.20 | 0.50 | 2.5× | +| single-session-db | 0.20 | 0.50 | 2.5× | +| single-session-release | 0.20 | 0.33 | 1.7× | +| multi-session-causal | 0.40 | 0.40 | tie | +| preference (n=2) | 0.20 | 0.42 | 2.1× | +| multi-session-review | 0.40 | 0.67 | 1.7× | +| temporal (R@5 = 0.50 grep / 1.00 hybrid) | 0.50 | 0.67 | 1.3× | + +Temporal queries (`What was shipped on April 8th 2026?`) need both gold sessions to score full recall. grep finds 1/2; hybrid finds 2/2. + +## Methodology + +- 15 fictional Claude Code sessions across a 10-day stretch of a Rust CLI project (`shipctl`) — bug fixes, refactors, infra, perf, schema migrations, preferences, post-mortem +- 15 hand-graded queries with `goldSessionIds[]` covering single-session, multi-session causal, multi-session review, preference, temporal +- Each session ingested via `POST /agentmemory/remember` with `type=eval-session` and `concepts=[session_id]` +- Each query hits `POST /agentmemory/smart-search` with `limit=50`; dedupe by session ID; truncate to K=5 +- No LLM in the retrieval loop +- Sandbox: clean `~/.agentmemory` via `HOME` override + alt ports (3411/3412) so no cross-contamination from a user's real store + +## Reproduce + +```sh +git checkout e9dc710 +npm install --legacy-peer-deps +npm run build + +source eval/scripts/sandbox.sh +npm run eval:coding-life -- --adapters grep,agentmemory +``` + +Outputs land in `eval/reports/coding-life/`: `scores.ndjson` (per-query rows) and `summary.json` (per-adapter and per-type aggregates). + +## Notes + +- The single-session-feature tie (`Which PR introduced helm chart support?`) is interesting: query says `PR introduced helm chart` and gold session has `helm chart` literally — grep wins on lexical exactness, hybrid matches but doesn't outperform. +- The corpus is intentionally small for fast iteration. Hardening targets: paraphrased queries, synonym substitution, in-corpus distractors with shared keywords, longer multi-session chains. +- Vector adapter not measured here — requires `OPENAI_API_KEY`; will be added in a follow-up scorecard alongside LongMemEval `_s`. diff --git a/docs/benchmarks/TEMPLATE.md b/docs/benchmarks/TEMPLATE.md new file mode 100644 index 00000000..b830e24e --- /dev/null +++ b/docs/benchmarks/TEMPLATE.md @@ -0,0 +1,54 @@ +# + +**Commit:** `` +**Bench:** LongMemEval `_s` / coding-agent-life-v1 / ... +**N:** 500 / 15 / ... +**K:** 5 +**Hardware:** macos-15 / ubuntu-22.04 / ... +**OpenAI model:** text-embedding-3-small +**Anthropic model:** N/A (no LLM in retrieval loop) + +## Headline + +agentmemory-hybrid: **R@5 = XX.XX%**, P@5 = XX.XX%, p50 latency = XXms + +Beats grep baseline by +X.Xpt R@5, vector by +X.Xpt R@5. + +## Per-adapter + +| Adapter | P@5 | R@5 | Hit rate | p50 latency | +|---|---|---|---|---| +| grep | | | | | +| vector | | | | | +| agentmemory-hybrid | | | | | + +## Per-question-type + +| Type | grep R@5 | vector R@5 | agentmemory R@5 | +|---|---|---|---| +| single-session-bug | | | | +| single-session-refactor | | | | +| preference | | | | +| multi-session-causal | | | | +| temporal | | | | + +## Methodology + +- Sessions ingested via `POST /agentmemory/remember` with `type=eval-session` +- Queries hit `POST /agentmemory/smart-search` with `limit=k*4` +- No LLM in retrieval loop. Direct rank from hybrid scoring. +- Ranks dedup by sessionId before truncating to K +- Latency measured as init+query for LongMemEval (per-question fresh state), query-only for coding-life (shared state) + +## Reproduce + +```sh +git checkout +npm install --legacy-peer-deps +OPENAI_API_KEY=sk-... AGENTMEMORY_BASE_URL=http://localhost:3111 \ + npm run eval:longmemeval -- --stratify 10 +``` + +## Notes + + diff --git a/eval/README.md b/eval/README.md new file mode 100644 index 00000000..7f295367 --- /dev/null +++ b/eval/README.md @@ -0,0 +1,111 @@ +# agentmemory-evals + +Public benchmarks for agentmemory's hybrid memory stack (BM25 + embeddings + consolidation + graph). + +Two families, both reproducible: + +- **LongMemEval** — public 500-question retrieval benchmark over multi-session chat +- **coding-agent-life-v1** — in-house corpus of 15 fictional Claude Code sessions for a Rust CLI project (`shipctl`), with 15 hand-graded queries covering bug fixes, refactors, preferences, and multi-session causal reasoning + +## Adapters + +| Adapter | Backend | API key needed | +|---|---|---| +| `grep` | Tokenized substring match | none | +| `vector` | OpenAI `text-embedding-3-small` + cosine | `OPENAI_API_KEY` | +| `agentmemory` | Running agentmemory server, smart-search endpoint | none (auth optional via `AGENTMEMORY_SECRET`) | + +## Sandbox first + +Running the `agentmemory` adapter against your real `~/.agentmemory` directory pollutes the eval with pre-existing memories AND pollutes your real store with eval test data. Always sandbox. + +`eval/scripts/sandbox.sh` spins up a clean agentmemory + iii-engine on ports 3411/3412 with state in `/tmp/agentmemory-eval-sandbox/`, exports `AGENTMEMORY_BASE_URL`, and tears down on exit. + +```sh +source eval/scripts/sandbox.sh +npm run eval:coding-life -- --adapters grep,agentmemory +``` + +Requires iii v0.11.2 on PATH (agentmemory pin). If you already have a different version installed, install the pinned build into `~/.local/bin` and make sure that directory comes first on `PATH`: + +```sh +mkdir -p ~/.local/bin +curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin +export PATH="$HOME/.local/bin:$PATH" # add to ~/.zshrc or ~/.bashrc for persistence +``` + +## Quickstart + +### coding-agent-life-v1 (in-house, no download) + +```sh +# grep baseline, no sandbox needed +npm run eval:coding-life -- --adapters grep + +# add agentmemory + vector (sandbox + OpenAI key) +source eval/scripts/sandbox.sh +OPENAI_API_KEY=sk-... npm run eval:coding-life -- --adapters grep,vector,agentmemory +``` + +### LongMemEval `_s` (public, 278MB download) + +```sh +mkdir -p ~/datasets/longmemeval +curl -Lo ~/datasets/longmemeval/longmemeval_s.json \ + https://huggingface.co/datasets/xiaowu0162/longmemeval/resolve/main/longmemeval_s + +source eval/scripts/sandbox.sh + +# Stratified sample of 10 per type (fast iteration, ~$0.20 OpenAI cost) +OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \ + npm run eval:longmemeval -- --stratify 10 + +# Full 500 questions × 3 adapters (~$2 OpenAI cost) +OPENAI_API_KEY=sk-... LONGMEMEVAL_PATH=~/datasets/longmemeval/longmemeval_s.json \ + npm run eval:longmemeval +``` + +## Repo layout + +```text +eval/ +├── README.md +├── runner/ +│ ├── types.ts Adapter, Question, RankedDoc, ScoreRow +│ ├── score.ts P@K, R@K, aggregation +│ ├── load.ts LongMemEval JSON → Question[] +│ ├── adapters/ +│ │ ├── grep.ts tokenized substring baseline +│ │ ├── vector.ts OpenAI embeddings + cosine +│ │ └── agentmemory.ts POST /agentmemory/{remember,smart-search} +│ ├── longmemeval.ts public benchmark runner +│ └── coding-life.ts in-house benchmark runner +└── data/ + └── coding-agent-life-v1/ + ├── sessions.json 15 fictional sessions (~6KB) + └── queries.json 15 queries with gold session IDs +``` + +Reports land in `eval/reports//` (gitignored): `scores.ndjson` + `summary.json`. + +Published scorecards land in `docs/benchmarks/YYYY-MM-DD-.md`. + +## Writing a new adapter + +1. Implement `Adapter` from `eval/runner/types.ts`: + ```ts + import type { Adapter } from "../types.js"; + export const myAdapter: Adapter = { + name: "my-adapter", + async init(sessions, config) { /* index */ return state; }, + async query(q, state, k) { /* search */ return ranked; }, + }; + ``` +2. Register in `eval/runner/{longmemeval,coding-life}.ts` `ADAPTERS` map. +3. Run against `coding-agent-life-v1` to sanity-check before committing OpenAI spend on LongMemEval. + +## Why a benchmark for agentmemory + +agentmemory ships BM25 + embeddings + consolidation + graph retrieval. Numbers from those layers should be measured against grep/vector baselines so the value of each layer is provable. + +The in-house corpus is small on purpose (15 sessions) — covers single-session, multi-session, preference, and temporal question types without taking 15 minutes to run. LongMemEval gives the public-comparison axis. diff --git a/eval/data/coding-agent-life-v1/queries.json b/eval/data/coding-agent-life-v1/queries.json new file mode 100644 index 00000000..5603e8a0 --- /dev/null +++ b/eval/data/coding-agent-life-v1/queries.json @@ -0,0 +1,107 @@ +[ + { + "id": "q-001", + "type": "single-session-bug", + "question": "Where did we land the auth env var precedence fix?", + "answer": "PR #11 with SHIPCTL_TOKEN > SHIP_TOKEN > SC_TOKEN precedence", + "goldSessionIds": ["sess-001"] + }, + { + "id": "q-002", + "type": "single-session-infra", + "question": "What was the multi-arch Docker fix?", + "answer": "Added --platform=$BUILDPLATFORM and BUILDX_PLATFORMS for amd64+arm64", + "goldSessionIds": ["sess-002"] + }, + { + "id": "q-003", + "type": "single-session-refactor", + "question": "Where did we consolidate the retry logic?", + "answer": "src/retry.rs with exponential backoff base=200ms cap=30s full jitter", + "goldSessionIds": ["sess-003"] + }, + { + "id": "q-004", + "type": "single-session-feature", + "question": "Which PR introduced helm chart support?", + "answer": "PR #14", + "goldSessionIds": ["sess-004"] + }, + { + "id": "q-005", + "type": "single-session-test", + "question": "Which test was flaky on macos and how was it fixed?", + "answer": "fs-watcher emits_changekind_file_delete; bumped wait to 1500ms + retry: 2", + "goldSessionIds": ["sess-005"] + }, + { + "id": "q-006", + "type": "single-session-perf", + "question": "How did we fix the memory leak?", + "answer": "Replaced unbounded HashMap with LruCache cap=10k in src/cache.rs (PR #16)", + "goldSessionIds": ["sess-006"] + }, + { + "id": "q-007", + "type": "single-session-api", + "question": "How did we handle the github API rate limit?", + "answer": "Conditional requests with If-None-Match etag and 304 caching via http-cache", + "goldSessionIds": ["sess-007"] + }, + { + "id": "q-008", + "type": "single-session-db", + "question": "What was the schema migration approach for run_history?", + "answer": "Three-phase: nullable column + dual-write, backfill + flip reads, drop old column", + "goldSessionIds": ["sess-008"] + }, + { + "id": "q-009", + "type": "single-session-infra", + "question": "How is the docs site deployed?", + "answer": "GitHub Actions docs.yml workflow + mdbook build + Cloudflare Pages on shipctl.dev", + "goldSessionIds": ["sess-009"] + }, + { + "id": "q-010", + "type": "single-session-release", + "question": "Which PR set up the cross-platform release pipeline?", + "answer": "PR #19 with cross-rs for linux and native macos/windows builds", + "goldSessionIds": ["sess-010"] + }, + { + "id": "q-011", + "type": "multi-session-causal", + "question": "What was the root cause of the staging incident, and where was it fixed?", + "answer": "SHIPCTL_TOKEN unset caused fallback to bad SC_TOKEN; fixed in PR #11 (sess-001) with precedence test; documented in post-mortem (sess-014)", + "goldSessionIds": ["sess-001", "sess-014"] + }, + { + "id": "q-012", + "type": "preference", + "question": "Which async runtime does the team prefer for new code?", + "answer": "async-std (decided in arch review, not tokio)", + "goldSessionIds": ["sess-015"] + }, + { + "id": "q-013", + "type": "preference", + "question": "What are the user's formatting preferences?", + "answer": "2-space indent in yaml, tab in rust, rust-analyzer over rls, cargo clippy before commit", + "goldSessionIds": ["sess-012"] + }, + { + "id": "q-014", + "type": "multi-session-review", + "question": "What was the race condition Aria fixed in the schema migration?", + "answer": "Dual-write step had race when two writers hit same row; Aria added per-row advisory lock", + "goldSessionIds": ["sess-008", "sess-013"] + }, + { + "id": "q-015", + "type": "temporal", + "question": "What was shipped on April 8th 2026?", + "answer": "Auth env-var precedence fix in PR #11, then started docker arm64 work", + "goldSessionIds": ["sess-001", "sess-011"] + } +] diff --git a/eval/data/coding-agent-life-v1/sessions.json b/eval/data/coding-agent-life-v1/sessions.json new file mode 100644 index 00000000..71134134 --- /dev/null +++ b/eval/data/coding-agent-life-v1/sessions.json @@ -0,0 +1,77 @@ +[ + { + "id": "sess-001", + "timestamp": "2026-04-08T09:14:00Z", + "content": "[user] auth check broken on staging when SHIPCTL_TOKEN unset, falling back to wrong env var\n[assistant] looked at src/auth.rs:42. precedence is SHIPCTL_TOKEN > SHIP_TOKEN > legacy SC_TOKEN. legacy path didn't validate. fixing precedence + adding length check.\n[user] yeah ship it\n[assistant] PR #11 opened with fix + regression test. covers all three env names." + }, + { + "id": "sess-002", + "timestamp": "2026-04-08T14:30:00Z", + "content": "[user] docker build failing for arm64 on M2 runner\n[assistant] missing target in Dockerfile FROM line. need --platform=$BUILDPLATFORM on builder stage. updated Dockerfile + Makefile to set BUILDX_PLATFORMS=linux/amd64,linux/arm64. tested locally on apple silicon, builds clean." + }, + { + "id": "sess-003", + "timestamp": "2026-04-09T10:02:00Z", + "content": "[user] retry logic is duplicated across handlers. consolidate.\n[assistant] extracted retry_with_backoff into src/retry.rs. exponential backoff base=200ms cap=30s jitter=full. all 4 callsites now use it. cuts ~80 lines." + }, + { + "id": "sess-004", + "timestamp": "2026-04-10T11:45:00Z", + "content": "[user] add helm chart support\n[assistant] new subcommand `shipctl helm install`. wraps helm CLI, threads through --namespace and --values. PR #14 has the changes + 12 integration tests. Aria reviewed and approved." + }, + { + "id": "sess-005", + "timestamp": "2026-04-11T16:20:00Z", + "content": "[user] test flake on macos-22 runner. emits_changekind_file_delete keeps failing\n[assistant] fsevents propagation slower on apple silicon, the 800ms wait isn't enough. bumped to 1500ms + describe-level retry: 2. all 15 fs-watcher tests pass now." + }, + { + "id": "sess-006", + "timestamp": "2026-04-12T09:50:00Z", + "content": "[user] memory leak in long-running daemon. RSS grows 5MB/min.\n[assistant] traced to unbounded HashMap in src/cache.rs. swapped for LruCache with 10k cap. RSS stable at 180MB after 6h soak test. PR #16." + }, + { + "id": "sess-007", + "timestamp": "2026-04-13T13:15:00Z", + "content": "[user] github API rate-limiting us when listing PRs. 5000/hr isn't enough during ci\n[assistant] added conditional requests with If-None-Match etag. cache 304 responses. drops actual API calls by ~70%. used http-cache crate." + }, + { + "id": "sess-008", + "timestamp": "2026-04-15T10:30:00Z", + "content": "[user] schema migration for run_history table needs zero-downtime\n[assistant] three-phase migration. phase 1: add nullable new column + dual-write. phase 2: backfill + flip reads. phase 3: drop old column. avoids ALTER TABLE locks. ran on staging, took 4s for 2M rows." + }, + { + "id": "sess-009", + "timestamp": "2026-04-16T15:40:00Z", + "content": "[user] docs site is stale. set up auto-deploy from main\n[assistant] added .github/workflows/docs.yml. mdbook build + cloudflare pages deploy. domain shipctl.dev wired up. preview deploys on PRs." + }, + { + "id": "sess-010", + "timestamp": "2026-04-17T11:00:00Z", + "content": "[user] release pipeline. cut v0.3.0 with binaries for linux/macos/windows\n[assistant] new .github/workflows/release.yml. cross-rs for static linux, native for macos + windows. uploads to gh release + crates.io publish. tag-triggered. PR #19 has the workflow." + }, + { + "id": "sess-011", + "timestamp": "2026-04-08T17:00:00Z", + "content": "[user] standup notes: shipped #11 auth fix this morning. starting on docker arm64 next." + }, + { + "id": "sess-012", + "timestamp": "2026-04-10T18:30:00Z", + "content": "[user] preferences: I like rust analyzer over rls. always run cargo clippy before commit. prefer 2-space indent in yaml but tab in rust." + }, + { + "id": "sess-013", + "timestamp": "2026-04-15T19:00:00Z", + "content": "[assistant] reviewed Aria's PR #18 (schema migration). flagged race condition in dual-write step when two writers hit same row. Aria added per-row advisory lock. lgtm now." + }, + { + "id": "sess-014", + "timestamp": "2026-04-16T20:10:00Z", + "content": "[user] post-mortem from prod incident last week: SHIPCTL_TOKEN was unset in staging, fell back to bad SC_TOKEN which had wrong perms. delivery delayed 40min. action items: (1) precedence test (done in #11), (2) startup validation, (3) alert on auth fallback." + }, + { + "id": "sess-015", + "timestamp": "2026-04-17T16:45:00Z", + "content": "[user] preferences: stick to async-std not tokio for new code. team agreed in arch review." + } +] diff --git a/eval/runner/adapters/agentmemory.ts b/eval/runner/adapters/agentmemory.ts new file mode 100644 index 00000000..38028a7d --- /dev/null +++ b/eval/runner/adapters/agentmemory.ts @@ -0,0 +1,93 @@ +import type { Adapter, RankedDoc, Session } from "../types.js"; + +interface AgentMemoryState { + baseUrl: string; + secret?: string; + sessions: Session[]; + observationToSession: Map; +} + +interface RememberResponse { + memory?: { id?: string }; + observationId?: string; + id?: string; + observation?: { id?: string }; +} + +interface SmartSearchResponse { + results?: Array<{ + obsId?: string; + id?: string; + observationId?: string; + sessionId?: string; + score?: number; + content?: string; + }>; + observations?: Array<{ + obsId?: string; + id?: string; + sessionId?: string; + score?: number; + content?: string; + }>; +} + +function authHeaders(secret?: string): Record { + const h: Record = { "Content-Type": "application/json" }; + if (secret) h.Authorization = `Bearer ${secret}`; + return h; +} + +export const agentmemoryAdapter: Adapter = { + name: "agentmemory-hybrid", + async init(sessions, config) { + const baseUrl = (config?.baseUrl as string) ?? process.env.AGENTMEMORY_BASE_URL ?? "http://localhost:3111"; + const secret = (config?.secret as string) ?? process.env.AGENTMEMORY_SECRET; + const observationToSession = new Map(); + for (const s of sessions) { + const res = await fetch(`${baseUrl}/agentmemory/remember`, { + method: "POST", + headers: authHeaders(secret), + body: JSON.stringify({ + content: s.content, + type: "eval-session", + concepts: [s.id], + }), + }); + if (!res.ok) { + throw new Error(`remember failed for ${s.id}: ${res.status} ${await res.text()}`); + } + const body = (await res.json()) as RememberResponse; + const obsId = + body.memory?.id ?? body.observationId ?? body.id ?? body.observation?.id; + if (obsId) observationToSession.set(obsId, s.id); + } + return { baseUrl, secret, sessions, observationToSession }; + }, + async query(q, state, k) { + const res = await fetch(`${state.baseUrl}/agentmemory/smart-search`, { + method: "POST", + headers: authHeaders(state.secret), + body: JSON.stringify({ query: q, limit: Math.max(k * 10, 50) }), + }); + if (!res.ok) { + throw new Error(`smart-search failed: ${res.status} ${await res.text()}`); + } + const body = (await res.json()) as SmartSearchResponse; + const rows = body.results ?? body.observations ?? []; + const ranked: RankedDoc[] = []; + const seen = new Set(); + for (const row of rows) { + let sessionId = row.sessionId; + if (!sessionId) { + const memId = row.obsId ?? row.id ?? row.observationId; + sessionId = memId ? state.observationToSession.get(memId) : undefined; + } + if (!sessionId || seen.has(sessionId)) continue; + seen.add(sessionId); + ranked.push({ sessionId, score: row.score ?? 0 }); + if (ranked.length >= k) break; + } + return ranked; + }, +}; diff --git a/eval/runner/adapters/grep.ts b/eval/runner/adapters/grep.ts new file mode 100644 index 00000000..28b18ea6 --- /dev/null +++ b/eval/runner/adapters/grep.ts @@ -0,0 +1,36 @@ +import type { Adapter, RankedDoc, Session } from "../types.js"; + +interface GrepState { + sessions: Session[]; +} + +function tokenize(s: string): string[] { + return s + .toLowerCase() + .replace(/[^a-z0-9_]+/g, " ") + .split(/\s+/) + .filter((t) => t.length > 2); +} + +export const grepAdapter: Adapter = { + name: "grep", + async init(sessions) { + return { sessions }; + }, + async query(q, state, k) { + const terms = tokenize(q); + const scored: RankedDoc[] = []; + for (const s of state.sessions) { + const body = s.content.toLowerCase(); + let hits = 0; + for (const t of terms) { + if (body.includes(t)) hits += 1; + } + if (hits > 0) { + scored.push({ sessionId: s.id, score: hits }); + } + } + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, k); + }, +}; diff --git a/eval/runner/adapters/vector.ts b/eval/runner/adapters/vector.ts new file mode 100644 index 00000000..c40e414d --- /dev/null +++ b/eval/runner/adapters/vector.ts @@ -0,0 +1,108 @@ +import type { Adapter, RankedDoc, Session } from "../types.js"; + +interface VectorState { + sessions: Session[]; + embeddings: Float32Array[]; +} + +const OPENAI_URL = "https://api.openai.com/v1/embeddings"; +const MODEL = "text-embedding-3-small"; +const DIM = 1536; + +async function embed(text: string, apiKey: string): Promise { + const res = await fetch(OPENAI_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ input: text, model: MODEL }), + }); + if (!res.ok) { + throw new Error(`OpenAI embed failed: ${res.status} ${await res.text()}`); + } + const data = (await res.json()) as { data: Array<{ embedding: number[] }> }; + return Float32Array.from(data.data[0].embedding); +} + +async function embedBatch(texts: string[], apiKey: string): Promise { + const res = await fetch(OPENAI_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ input: texts, model: MODEL }), + }); + if (!res.ok) { + throw new Error(`OpenAI batch embed failed: ${res.status} ${await res.text()}`); + } + const data = (await res.json()) as { data: Array<{ embedding: number[]; index: number }> }; + if (!Array.isArray(data.data) || data.data.length !== texts.length) { + throw new Error( + `OpenAI batch embed: expected ${texts.length} embeddings, got ${data.data?.length ?? 0}`, + ); + } + const out = new Array(texts.length); + for (const row of data.data) { + if ( + !Number.isInteger(row.index) || + row.index < 0 || + row.index >= texts.length || + out[row.index] !== undefined + ) { + throw new Error(`OpenAI batch embed: invalid or duplicate index ${row.index}`); + } + if (!Array.isArray(row.embedding) || row.embedding.length === 0) { + throw new Error(`OpenAI batch embed: empty embedding at index ${row.index}`); + } + out[row.index] = Float32Array.from(row.embedding); + } + return out; +} + +function cosine(a: Float32Array, b: Float32Array): number { + let dot = 0; + let na = 0; + let nb = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + na += a[i] * a[i]; + nb += b[i] * b[i]; + } + const denom = Math.sqrt(na) * Math.sqrt(nb); + return denom === 0 ? 0 : dot / denom; +} + +export const vectorAdapter: Adapter = { + name: "vector", + async init(sessions) { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter"); + const embeddings: Float32Array[] = new Array(sessions.length); + const BATCH = 50; + for (let i = 0; i < sessions.length; i += BATCH) { + const batch = sessions.slice(i, i + BATCH); + const vecs = await embedBatch( + batch.map((s) => s.content.slice(0, 8000)), + apiKey, + ); + for (let j = 0; j < vecs.length; j++) embeddings[i + j] = vecs[j]; + } + if (embeddings.length > 0 && embeddings[0].length !== DIM) { + throw new Error(`unexpected embedding dim: ${embeddings[0].length}`); + } + return { sessions, embeddings }; + }, + async query(q, state, k) { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) throw new Error("OPENAI_API_KEY required for vector adapter"); + const qvec = await embed(q, apiKey); + const scored: RankedDoc[] = state.sessions.map((s, i) => ({ + sessionId: s.id, + score: cosine(qvec, state.embeddings[i]), + })); + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, k); + }, +}; diff --git a/eval/runner/coding-life.ts b/eval/runner/coding-life.ts new file mode 100644 index 00000000..753ca87f --- /dev/null +++ b/eval/runner/coding-life.ts @@ -0,0 +1,101 @@ +import { readFileSync, existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs"; +import { resolve } from "node:path"; +import { parseArgs } from "node:util"; +import { agentmemoryAdapter } from "./adapters/agentmemory.js"; +import { grepAdapter } from "./adapters/grep.js"; +import { vectorAdapter } from "./adapters/vector.js"; +import { aggregate, scoreQuestion } from "./score.js"; +import type { Adapter, Question, ScoreRow, Session } from "./types.js"; + +const ADAPTERS: Record = { + grep: grepAdapter as unknown as Adapter, + vector: vectorAdapter as unknown as Adapter, + agentmemory: agentmemoryAdapter as unknown as Adapter, +}; + +interface CliOptions { + data: string; + adapters: string; + k: string; + out: string; +} + +function parse(): CliOptions { + const { values } = parseArgs({ + options: { + data: { type: "string", default: "eval/data/coding-agent-life-v1" }, + adapters: { type: "string", default: "grep,vector,agentmemory" }, + k: { type: "string", default: "5" }, + out: { type: "string", default: "eval/reports/coding-life" }, + }, + }); + return values as unknown as CliOptions; +} + +async function main(): Promise { + const opts = parse(); + const k = Number(opts.k); + if (!Number.isInteger(k) || k <= 0) { + console.error(`--k must be a positive integer, got: ${opts.k}`); + process.exit(2); + } + const sessions = JSON.parse( + readFileSync(resolve(opts.data, "sessions.json"), "utf8"), + ) as Session[]; + const queriesRaw = JSON.parse( + readFileSync(resolve(opts.data, "queries.json"), "utf8"), + ) as Array>; + const questions: Question[] = queriesRaw.map((q) => ({ ...q, haystack: sessions })); + const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean); + for (const a of adapterNames) { + if (!ADAPTERS[a]) { + console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`); + process.exit(2); + } + } + console.log( + `loaded ${sessions.length} sessions, ${questions.length} queries, adapters: ${adapterNames.join(",")}, k=${k}`, + ); + + const outDir = resolve(opts.out); + mkdirSync(outDir, { recursive: true }); + const ndjsonPath = `${outDir}/scores.ndjson`; + if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, ""); + + const rows: ScoreRow[] = []; + for (const adapterName of adapterNames) { + const adapter = ADAPTERS[adapterName]; + console.log(`\n== ${adapter.name} ==`); + const state = await adapter.init(sessions); + try { + for (const q of questions) { + const t0 = performance.now(); + const ranked = await adapter.query(q.question, state, k); + const latencyMs = performance.now() - t0; + const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs); + rows.push(row); + appendFileSync(ndjsonPath, JSON.stringify(row) + "\n"); + const mark = row.hit ? "+" : "-"; + console.log( + ` ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`, + ); + } + } finally { + if (adapter.teardown) await adapter.teardown(state); + } + } + + const agg = aggregate(rows); + writeFileSync(`${outDir}/summary.json`, JSON.stringify(agg, null, 2)); + console.log("\n=== Summary ==="); + for (const [adapter, stats] of Object.entries(agg.byAdapter)) { + console.log( + ` ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`, + ); + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/eval/runner/load.ts b/eval/runner/load.ts new file mode 100644 index 00000000..aece2452 --- /dev/null +++ b/eval/runner/load.ts @@ -0,0 +1,54 @@ +import { readFileSync } from "node:fs"; +import type { Question, Session } from "./types.js"; + +interface LongMemEvalRaw { + question_id: string; + question_type: string; + question: string; + answer?: string; + answer_session_ids: string[]; + haystack_session_ids: string[]; + haystack_sessions: Array>; +} + +function flattenSession(turns: Array<{ role: string; content: string }>): string { + return turns.map((t) => `[${t.role}] ${t.content}`).join("\n\n"); +} + +export function loadLongMemEval(path: string, limit?: number): Question[] { + const raw = JSON.parse(readFileSync(path, "utf8")) as LongMemEvalRaw[]; + const slice = typeof limit === "number" ? raw.slice(0, limit) : raw; + const questions: Question[] = []; + for (const r of slice) { + if (r.haystack_session_ids.length !== r.haystack_sessions.length) { + throw new Error( + `LongMemEval row ${r.question_id}: haystack_session_ids (${r.haystack_session_ids.length}) and haystack_sessions (${r.haystack_sessions.length}) length mismatch`, + ); + } + const haystack: Session[] = r.haystack_session_ids.map((id, i) => ({ + id, + content: flattenSession(r.haystack_sessions[i]), + })); + questions.push({ + id: r.question_id, + type: r.question_type, + question: r.question, + answer: r.answer, + goldSessionIds: r.answer_session_ids, + haystack, + }); + } + return questions; +} + +export function stratifySample(questions: Question[], perType: number): Question[] { + const buckets: Record = {}; + for (const q of questions) { + (buckets[q.type] ??= []).push(q); + } + const out: Question[] = []; + for (const type of Object.keys(buckets).sort()) { + out.push(...buckets[type].slice(0, perType)); + } + return out; +} diff --git a/eval/runner/longmemeval.ts b/eval/runner/longmemeval.ts new file mode 100644 index 00000000..a906fa21 --- /dev/null +++ b/eval/runner/longmemeval.ts @@ -0,0 +1,126 @@ +import { existsSync, mkdirSync, writeFileSync, appendFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { parseArgs } from "node:util"; +import { agentmemoryAdapter } from "./adapters/agentmemory.js"; +import { grepAdapter } from "./adapters/grep.js"; +import { vectorAdapter } from "./adapters/vector.js"; +import { loadLongMemEval, stratifySample } from "./load.js"; +import { aggregate, scoreQuestion } from "./score.js"; +import type { Adapter, ScoreRow } from "./types.js"; + +const ADAPTERS: Record = { + grep: grepAdapter as unknown as Adapter, + vector: vectorAdapter as unknown as Adapter, + agentmemory: agentmemoryAdapter as unknown as Adapter, +}; + +interface CliOptions { + data: string; + adapters: string; + k: string; + limit?: string; + stratify?: string; + out: string; +} + +function parse(): CliOptions { + const { values } = parseArgs({ + options: { + data: { type: "string", default: process.env.LONGMEMEVAL_PATH ?? "" }, + adapters: { type: "string", default: "grep,vector,agentmemory" }, + k: { type: "string", default: "5" }, + limit: { type: "string" }, + stratify: { type: "string" }, + out: { type: "string", default: "eval/reports/longmemeval" }, + }, + }); + return values as unknown as CliOptions; +} + +async function main(): Promise { + const opts = parse(); + if (!opts.data) { + console.error("--data required (or LONGMEMEVAL_PATH env)"); + process.exit(2); + } + const k = Number(opts.k); + if (!Number.isInteger(k) || k <= 0) { + console.error(`--k must be a positive integer, got: ${opts.k}`); + process.exit(2); + } + let limit: number | undefined; + if (opts.limit !== undefined) { + limit = Number(opts.limit); + if (!Number.isInteger(limit) || limit <= 0) { + console.error(`--limit must be a positive integer, got: ${opts.limit}`); + process.exit(2); + } + } + let perType: number | undefined; + if (opts.stratify !== undefined) { + perType = Number(opts.stratify); + if (!Number.isInteger(perType) || perType <= 0) { + console.error(`--stratify must be a positive integer, got: ${opts.stratify}`); + process.exit(2); + } + } + const adapterNames = opts.adapters.split(",").map((s) => s.trim()).filter(Boolean); + for (const a of adapterNames) { + if (!ADAPTERS[a]) { + console.error(`unknown adapter: ${a}. options: ${Object.keys(ADAPTERS).join(",")}`); + process.exit(2); + } + } + let questions = loadLongMemEval(resolve(opts.data), limit); + if (perType) questions = stratifySample(questions, perType); + console.log( + `loaded ${questions.length} questions, adapters: ${adapterNames.join(",")}, k=${k}`, + ); + + const outDir = resolve(opts.out); + mkdirSync(outDir, { recursive: true }); + const ndjsonPath = `${outDir}/scores.ndjson`; + if (existsSync(ndjsonPath)) writeFileSync(ndjsonPath, ""); + mkdirSync(dirname(ndjsonPath), { recursive: true }); + + const rows: ScoreRow[] = []; + for (const adapterName of adapterNames) { + const adapter = ADAPTERS[adapterName]; + console.log(`\n== ${adapter.name} ==`); + for (const q of questions) { + const t0 = performance.now(); + const state = await adapter.init(q.haystack); + try { + const ranked = await adapter.query(q.question, state, k); + const latencyMs = performance.now() - t0; + const row = scoreQuestion(q, ranked, k, adapter.name, latencyMs); + rows.push(row); + appendFileSync(ndjsonPath, JSON.stringify(row) + "\n"); + const mark = row.hit ? "+" : "-"; + console.log( + ` ${mark} ${q.id} [${q.type}] R@${k}=${row.recallAtK.toFixed(2)} (${Math.round(latencyMs)}ms)`, + ); + } finally { + if (adapter.teardown) await adapter.teardown(state); + } + } + } + + const agg = aggregate(rows); + const summaryPath = `${outDir}/summary.json`; + writeFileSync(summaryPath, JSON.stringify(agg, null, 2)); + + console.log("\n=== Summary ==="); + for (const [adapter, stats] of Object.entries(agg.byAdapter)) { + console.log( + ` ${adapter.padEnd(22)} P@${k}=${stats.p.toFixed(3)} R@${k}=${stats.r.toFixed(3)} hit=${stats.hit}/${stats.n} p50=${Math.round(stats.latencyP50)}ms`, + ); + } + console.log(`\nwrote ${ndjsonPath}`); + console.log(`wrote ${summaryPath}`); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/eval/runner/score.ts b/eval/runner/score.ts new file mode 100644 index 00000000..b21d30ca --- /dev/null +++ b/eval/runner/score.ts @@ -0,0 +1,78 @@ +import type { Question, RankedDoc, ScoreRow } from "./types.js"; + +export function scoreQuestion( + q: Question, + ranked: RankedDoc[], + k: number, + adapter: string, + latencyMs: number, +): ScoreRow { + const topK = ranked.slice(0, k).map((r) => r.sessionId); + const gold = new Set(q.goldSessionIds); + const hits = topK.filter((id) => gold.has(id)).length; + const precisionAtK = k > 0 ? hits / k : 0; + const recallAtK = gold.size === 0 ? 0 : hits / gold.size; + const hit = hits > 0; + let topGoldRank: number | null = null; + for (let i = 0; i < ranked.length; i++) { + if (gold.has(ranked[i].sessionId)) { + topGoldRank = i + 1; + break; + } + } + return { + questionId: q.id, + questionType: q.type, + adapter, + k, + precisionAtK, + recallAtK, + hit, + topGoldRank, + latencyMs, + }; +} + +export function aggregate(rows: ScoreRow[]): { + byAdapter: Record; + byType: Record>; +} { + const byAdapter: Record< + string, + { p: number; r: number; hit: number; n: number; latencyP50: number } + > = {}; + const latencies: Record = {}; + for (const r of rows) { + const a = (byAdapter[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0, latencyP50: 0 }); + a.p += r.precisionAtK; + a.r += r.recallAtK; + a.hit += r.hit ? 1 : 0; + a.n += 1; + (latencies[r.adapter] ??= []).push(r.latencyMs); + } + for (const adapter of Object.keys(byAdapter)) { + const a = byAdapter[adapter]; + a.p = a.p / a.n; + a.r = a.r / a.n; + const sorted = latencies[adapter].slice().sort((x, y) => x - y); + a.latencyP50 = sorted[Math.floor(sorted.length / 2)] ?? 0; + } + const byType: Record> = + {}; + for (const r of rows) { + const t = (byType[r.questionType] ??= {}); + const a = (t[r.adapter] ??= { p: 0, r: 0, hit: 0, n: 0 }); + a.p += r.precisionAtK; + a.r += r.recallAtK; + a.hit += r.hit ? 1 : 0; + a.n += 1; + } + for (const t of Object.keys(byType)) { + for (const adapter of Object.keys(byType[t])) { + const a = byType[t][adapter]; + a.p = a.p / a.n; + a.r = a.r / a.n; + } + } + return { byAdapter, byType }; +} diff --git a/eval/runner/types.ts b/eval/runner/types.ts new file mode 100644 index 00000000..e72a6408 --- /dev/null +++ b/eval/runner/types.ts @@ -0,0 +1,38 @@ +export interface Session { + id: string; + timestamp?: string; + content: string; +} + +export interface Question { + id: string; + type: string; + question: string; + answer?: string; + goldSessionIds: string[]; + haystack: Session[]; +} + +export interface RankedDoc { + sessionId: string; + score: number; +} + +export interface Adapter { + name: string; + init(sessions: Session[], config?: Record): Promise; + query(q: string, state: State, k: number): Promise; + teardown?(state: State): Promise; +} + +export interface ScoreRow { + questionId: string; + questionType: string; + adapter: string; + k: number; + precisionAtK: number; + recallAtK: number; + hit: boolean; + topGoldRank: number | null; + latencyMs: number; +} diff --git a/eval/scripts/sandbox.sh b/eval/scripts/sandbox.sh new file mode 100755 index 00000000..5d402330 --- /dev/null +++ b/eval/scripts/sandbox.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Boot a sandboxed agentmemory + iii-engine on alt ports with a clean data dir, +# so eval runs aren't polluted by (and don't pollute) your real ~/.agentmemory. +# Source it: `source eval/scripts/sandbox.sh` then run eval scripts; +# the sandbox is torn down on EXIT. + +set -euo pipefail + +SANDBOX_ROOT="${SANDBOX_ROOT:-/tmp/agentmemory-eval-sandbox}" +SANDBOX_PORT="${SANDBOX_PORT:-3411}" +SANDBOX_STREAM_PORT="${SANDBOX_STREAM_PORT:-3412}" + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +if ! command -v iii >/dev/null 2>&1; then + echo "iii binary not on PATH. Install pinned version:" + echo " curl -fsSL https://github.com/iii-hq/iii/releases/download/iii/v0.11.2/iii-aarch64-apple-darwin.tar.gz | tar -xz -C ~/.local/bin" + exit 1 +fi + +iii_ver=$(iii --version 2>&1 | head -1) +if [[ "$iii_ver" != "0.11.2" ]]; then + echo "warning: iii version on PATH is $iii_ver; agentmemory pins 0.11.2" +fi + +if [[ ! -f "$REPO_ROOT/dist/index.mjs" ]]; then + echo "dist/ missing. Run: npm run build" >&2 + exit 1 +fi + +if [[ -z "${SANDBOX_ROOT:-}" || "$SANDBOX_ROOT" == "/" || "$SANDBOX_ROOT" != /tmp/* ]]; then + echo "refusing to wipe SANDBOX_ROOT='$SANDBOX_ROOT' — must be non-empty and under /tmp/" >&2 + exit 1 +fi +rm -rf "$SANDBOX_ROOT" +mkdir -p "$SANDBOX_ROOT/data" "$SANDBOX_ROOT/.agentmemory" + +cat > "$SANDBOX_ROOT/iii-config.yaml" < "$SANDBOX_ROOT/iii.log" 2>&1 & +SANDBOX_PID=$! + +cleanup() { + echo "tearing down sandbox (pid $SANDBOX_PID)" + kill "$SANDBOX_PID" 2>/dev/null || true + sleep 1 + kill -9 "$SANDBOX_PID" 2>/dev/null || true +} +trap cleanup EXIT + +# wait for livez +for i in $(seq 1 30); do + if curl -sS --max-time 1 "http://localhost:$SANDBOX_PORT/agentmemory/livez" 2>/dev/null | grep -q '"status":"ok"'; then + export AGENTMEMORY_BASE_URL="http://localhost:$SANDBOX_PORT" + echo "sandbox ready: $AGENTMEMORY_BASE_URL" + echo " state: $SANDBOX_ROOT/data/" + echo " logs: $SANDBOX_ROOT/iii.log" + return 0 2>/dev/null || exit 0 + fi + sleep 1 +done + +echo "sandbox failed to come up within 30s. last log lines:" >&2 +tail -10 "$SANDBOX_ROOT/iii.log" >&2 +exit 1 diff --git a/integrations/hermes/plugin.yaml b/integrations/hermes/plugin.yaml index b4f32151..9ea5cb98 100644 --- a/integrations/hermes/plugin.yaml +++ b/integrations/hermes/plugin.yaml @@ -4,6 +4,9 @@ description: "Persistent cross-session memory for Hermes Agent via agentmemory. author: "Rohit Ghumare" homepage: "https://github.com/rohitg00/agentmemory" hooks: + - prefetch + - sync_turn - on_session_end - on_pre_compress - on_memory_write + - system_prompt_block diff --git a/package.json b/package.json index 820fc8f7..bc245a2f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@agentmemory/agentmemory", - "version": "0.9.20", + "version": "0.9.21", "description": "Persistent memory for AI coding agents, powered by iii-engine's three primitives", "type": "module", "main": "dist/index.mjs", @@ -25,7 +25,9 @@ "test:watch": "vitest --exclude test/integration.test.ts", "test:integration": "vitest run test/integration.test.ts", "test:all": "vitest run", - "bench:load": "node --import tsx benchmark/load-100k.ts" + "bench:load": "node --import tsx benchmark/load-100k.ts", + "eval:longmemeval": "tsx eval/runner/longmemeval.ts", + "eval:coding-life": "tsx eval/runner/coding-life.ts" }, "keywords": [ "ai", @@ -60,7 +62,7 @@ "@anthropic-ai/sdk": "^0.39.0", "@clack/prompts": "^1.2.0", "dotenv": "^17.4.2", - "iii-sdk": "^0.11.2", + "iii-sdk": "0.11.2", "zod": "^4.0.0" }, "optionalDependencies": { diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 403295dd..96da3ae4 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -1,6 +1,6 @@ { "name": "@agentmemory/mcp", - "version": "0.9.20", + "version": "0.9.21", "description": "Standalone MCP server for agentmemory — thin shim that re-exposes @agentmemory/agentmemory's MCP entrypoint", "type": "module", "bin": { diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index a18860e4..e53f8088 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "agentmemory", - "version": "0.9.20", + "version": "0.9.21", "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 51 MCP tools, 4 skills, real-time viewer.", "author": { "name": "Rohit Ghumare", diff --git a/plugin/.codex-plugin/plugin.json b/plugin/.codex-plugin/plugin.json index f8d676f6..0a7cc173 100644 --- a/plugin/.codex-plugin/plugin.json +++ b/plugin/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "agentmemory", - "version": "0.9.20", + "version": "0.9.21", "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 6 hooks, 51 MCP tools, 4 skills, real-time viewer.", "author": { "name": "Rohit Ghumare", diff --git a/plugin/.mcp.copilot.json b/plugin/.mcp.copilot.json new file mode 100644 index 00000000..01d03f7d --- /dev/null +++ b/plugin/.mcp.copilot.json @@ -0,0 +1,14 @@ +{ + "mcpServers": { + "agentmemory": { + "type": "local", + "command": "npx", + "args": ["-y", "@agentmemory/mcp"], + "env": { + "AGENTMEMORY_URL": "${AGENTMEMORY_URL}", + "AGENTMEMORY_SECRET": "${AGENTMEMORY_SECRET}" + }, + "tools": ["*"] + } + } +} diff --git a/plugin/hooks/hooks.codex.json b/plugin/hooks/hooks.codex.json index 73e43c66..d2c3a3b6 100644 --- a/plugin/hooks/hooks.codex.json +++ b/plugin/hooks/hooks.codex.json @@ -5,7 +5,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\"", "statusMessage": "agentmemory: loading session context" } ] @@ -16,7 +16,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\"", "statusMessage": "agentmemory: recalling relevant memories" } ] @@ -28,7 +28,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\"" } ] } @@ -38,7 +38,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\"" } ] } @@ -48,7 +48,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\"" } ] } @@ -58,7 +58,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\"" } ] } diff --git a/plugin/hooks/hooks.copilot.json b/plugin/hooks/hooks.copilot.json new file mode 100644 index 00000000..b7d09f8b --- /dev/null +++ b/plugin/hooks/hooks.copilot.json @@ -0,0 +1,72 @@ +{ + "version": 1, + "hooks": { + "sessionStart": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-start.mjs" + } + ], + "userPromptSubmitted": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/prompt-submit.mjs" + } + ], + "preToolUse": [ + { + "type": "command", + "matcher": "edit|write|create|read|view|glob|grep", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-tool-use.mjs" + } + ], + "postToolUse": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-use.mjs" + } + ], + "postToolUseFailure": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/post-tool-failure.mjs" + } + ], + "preCompact": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/pre-compact.mjs" + } + ], + "agentStop": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/stop.mjs" + } + ], + "sessionEnd": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/session-end.mjs" + } + ], + "subagentStart": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-start.mjs" + } + ], + "subagentStop": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/subagent-stop.mjs" + } + ], + "notification": [ + { + "type": "command", + "command": "node ${COPILOT_PLUGIN_ROOT}/scripts/notification.mjs" + } + ] + } +} diff --git a/plugin/hooks/hooks.json b/plugin/hooks/hooks.json index d60d664a..a13c9973 100644 --- a/plugin/hooks/hooks.json +++ b/plugin/hooks/hooks.json @@ -5,7 +5,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.mjs\"" } ] } @@ -15,7 +15,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/prompt-submit.mjs\"" } ] } @@ -26,7 +26,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-tool-use.mjs\"" } ] } @@ -36,7 +36,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use.mjs\"" } ] } @@ -46,7 +46,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-failure.mjs\"" } ] } @@ -56,7 +56,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.mjs\"" } ] } @@ -66,7 +66,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start.mjs\"" } ] } @@ -76,7 +76,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-stop.mjs\"" } ] } @@ -86,7 +86,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/notification.mjs\"" } ] } @@ -96,7 +96,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/task-completed.mjs\"" } ] } @@ -106,7 +106,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/stop.mjs\"" } ] } @@ -116,7 +116,7 @@ "hooks": [ { "type": "command", - "command": "node ${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs" + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/session-end.mjs\"" } ] } diff --git a/plugin/plugin.json b/plugin/plugin.json new file mode 100644 index 00000000..4dd30bb7 --- /dev/null +++ b/plugin/plugin.json @@ -0,0 +1,15 @@ +{ + "name": "agentmemory", + "version": "0.9.21", + "description": "Persistent memory for AI coding agents -- captures tool usage, compresses via LLM, injects context into future sessions. 12 hooks, 53 MCP tools, 4 skills, real-time viewer.", + "author": { + "name": "Rohit Ghumare", + "url": "https://github.com/rohitg00" + }, + "license": "Apache-2.0", + "homepage": "https://github.com/rohitg00/agentmemory", + "repository": "https://github.com/rohitg00/agentmemory", + "skills": "skills/", + "mcpServers": ".mcp.copilot.json", + "hooks": "hooks/hooks.copilot.json" +} diff --git a/plugin/scripts/notification.mjs b/plugin/scripts/notification.mjs index a318848d..8ba2c9b0 100755 --- a/plugin/scripts/notification.mjs +++ b/plugin/scripts/notification.mjs @@ -22,8 +22,10 @@ async function main() { return; } if (isSdkChildContext(data)) return; - if (data.notification_type !== "permission_prompt") return; - const sessionId = data.session_id || "unknown"; + const notificationType = data.notification_type ?? data.notificationType; + if (notificationType !== "permission_prompt") return; + const rawSessionId = data.session_id ?? data.sessionId; + const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown"; try { await fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", @@ -35,7 +37,7 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), data: { - notification_type: data.notification_type, + notification_type: notificationType, title: data.title, message: data.message } diff --git a/plugin/scripts/post-tool-failure.mjs b/plugin/scripts/post-tool-failure.mjs index 3a593f3a..902a0930 100755 --- a/plugin/scripts/post-tool-failure.mjs +++ b/plugin/scripts/post-tool-failure.mjs @@ -22,8 +22,11 @@ async function main() { return; } if (isSdkChildContext(data)) return; - if (data.is_interrupt) return; - const sessionId = data.session_id || "unknown"; + if (data.is_interrupt || data.isInterrupt) return; + const sessionId = data.session_id || data.sessionId || "unknown"; + const toolName = data.tool_name ?? data.toolName; + const toolInput = data.tool_input ?? data.toolArgs; + const error = data.error ?? data.errorMessage; try { await fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", @@ -35,9 +38,9 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), data: { - tool_name: data.tool_name, - tool_input: typeof data.tool_input === "string" ? data.tool_input.slice(0, 4e3) : JSON.stringify(data.tool_input ?? "").slice(0, 4e3), - error: typeof data.error === "string" ? data.error.slice(0, 4e3) : JSON.stringify(data.error ?? "").slice(0, 4e3) + tool_name: toolName, + tool_input: typeof toolInput === "string" ? toolInput.slice(0, 4e3) : JSON.stringify(toolInput ?? "").slice(0, 4e3), + error: typeof error === "string" ? error.slice(0, 4e3) : JSON.stringify(error ?? "").slice(0, 4e3) } }), signal: AbortSignal.timeout(3e3) diff --git a/plugin/scripts/post-tool-use.mjs b/plugin/scripts/post-tool-use.mjs index 5ebec645..68a78ef7 100755 --- a/plugin/scripts/post-tool-use.mjs +++ b/plugin/scripts/post-tool-use.mjs @@ -22,8 +22,10 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; - const { imageData, cleanOutput } = extractImageData(data.tool_output); + const sessionId = data.session_id || data.sessionId || "unknown"; + const toolName = data.tool_name ?? data.toolName; + const toolInput = data.tool_input ?? data.toolArgs; + const { imageData, cleanOutput } = extractImageData(toolOutput(data)); try { await fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", @@ -35,8 +37,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), data: { - tool_name: data.tool_name, - tool_input: data.tool_input, + tool_name: toolName, + tool_input: toolInput, tool_output: truncate(cleanOutput, 8e3), ...imageData ? { image_data: imageData } : {} } @@ -45,6 +47,16 @@ async function main() { }); } catch {} } +function toolOutput(data) { + if (data.tool_response !== void 0) return data.tool_response; + if (data.tool_output !== void 0) return data.tool_output; + const result = data.tool_result ?? data.toolResult; + if (typeof result === "object" && result !== null) { + const obj = result; + return obj.text_result_for_llm ?? obj.textResultForLlm ?? result; + } + return result; +} function isBase64Image(val) { return typeof val === "string" && (val.startsWith("data:image/") || val.startsWith("iVBORw0KGgo") || val.startsWith("/9j/")); } diff --git a/plugin/scripts/pre-compact.mjs b/plugin/scripts/pre-compact.mjs index bff9e7fa..b68bf025 100755 --- a/plugin/scripts/pre-compact.mjs +++ b/plugin/scripts/pre-compact.mjs @@ -22,7 +22,7 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; const project = data.cwd || process.cwd(); if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") try { await fetch(`${REST_URL}/agentmemory/claude-bridge/sync`, { diff --git a/plugin/scripts/pre-tool-use.mjs b/plugin/scripts/pre-tool-use.mjs index 561b6b0d..16892fcd 100755 --- a/plugin/scripts/pre-tool-use.mjs +++ b/plugin/scripts/pre-tool-use.mjs @@ -24,18 +24,22 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const toolName = data.tool_name; + const toolName = typeof data.tool_name === "string" ? data.tool_name : typeof data.toolName === "string" ? data.toolName : void 0; if (!toolName) return; + const normalizedToolName = toolName.toLowerCase(); if (![ - "Edit", - "Write", - "Read", - "Glob", - "Grep" - ].includes(toolName)) return; - const toolInput = data.tool_input || {}; + "edit", + "write", + "create", + "read", + "view", + "glob", + "grep" + ].includes(normalizedToolName)) return; + const rawToolInput = data.tool_input ?? data.toolArgs; + const toolInput = typeof rawToolInput === "object" && rawToolInput !== null && !Array.isArray(rawToolInput) ? rawToolInput : {}; const files = []; - const fileKeys = toolName === "Grep" ? ["path", "file"] : [ + const fileKeys = normalizedToolName === "grep" ? ["path", "file"] : [ "file_path", "path", "file", @@ -47,11 +51,12 @@ async function main() { } if (files.length === 0) return; const terms = []; - if (toolName === "Grep" || toolName === "Glob") { + if (normalizedToolName === "grep" || normalizedToolName === "glob") { const pattern = toolInput["pattern"]; if (typeof pattern === "string" && pattern.length > 0) terms.push(pattern); } - const sessionId = data.session_id || "unknown"; + const rawSessionId = data.session_id || data.sessionId; + const sessionId = typeof rawSessionId === "string" && rawSessionId.length > 0 ? rawSessionId : "unknown"; try { const res = await fetch(`${REST_URL}/agentmemory/enrich`, { method: "POST", diff --git a/plugin/scripts/prompt-submit.mjs b/plugin/scripts/prompt-submit.mjs index 18aa040a..a8a61192 100755 --- a/plugin/scripts/prompt-submit.mjs +++ b/plugin/scripts/prompt-submit.mjs @@ -22,7 +22,7 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; try { await fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", @@ -33,7 +33,7 @@ async function main() { project: data.cwd || process.cwd(), cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), - data: { prompt: data.prompt } + data: { prompt: data.prompt ?? data.userPrompt } }), signal: AbortSignal.timeout(3e3) }); diff --git a/plugin/scripts/session-end.mjs b/plugin/scripts/session-end.mjs index 8e1de092..7707e357 100755 --- a/plugin/scripts/session-end.mjs +++ b/plugin/scripts/session-end.mjs @@ -22,7 +22,7 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; try { await fetch(`${REST_URL}/agentmemory/session/end`, { method: "POST", diff --git a/plugin/scripts/session-start.mjs b/plugin/scripts/session-start.mjs index 9e573e24..f1ec1be6 100755 --- a/plugin/scripts/session-start.mjs +++ b/plugin/scripts/session-start.mjs @@ -25,7 +25,7 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || `ses_${Date.now().toString(36)}`; + const sessionId = data.session_id || data.sessionId || `ses_${Date.now().toString(36)}`; const project = data.cwd || process.cwd(); const url = `${REST_URL}/agentmemory/session/start`; const init = { diff --git a/plugin/scripts/stop.mjs b/plugin/scripts/stop.mjs index e0ffa350..3fe5cb36 100755 --- a/plugin/scripts/stop.mjs +++ b/plugin/scripts/stop.mjs @@ -22,7 +22,7 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; try { await fetch(`${REST_URL}/agentmemory/summarize`, { method: "POST", diff --git a/plugin/scripts/subagent-start.mjs b/plugin/scripts/subagent-start.mjs index db143459..c0d0b5eb 100755 --- a/plugin/scripts/subagent-start.mjs +++ b/plugin/scripts/subagent-start.mjs @@ -23,7 +23,9 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; + const agentId = data.agent_id || data.agentName; + const agentType = data.agent_type || data.agentDisplayName || data.agentName; fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", headers: authHeaders(), @@ -34,8 +36,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), data: { - agent_id: data.agent_id, - agent_type: data.agent_type + agent_id: agentId, + agent_type: agentType } }), signal: AbortSignal.timeout(TIMEOUT_MS) diff --git a/plugin/scripts/subagent-stop.mjs b/plugin/scripts/subagent-stop.mjs index 7ec66a7d..8765756d 100755 --- a/plugin/scripts/subagent-stop.mjs +++ b/plugin/scripts/subagent-stop.mjs @@ -22,7 +22,9 @@ async function main() { return; } if (isSdkChildContext(data)) return; - const sessionId = data.session_id || "unknown"; + const sessionId = data.session_id || data.sessionId || "unknown"; + const agentId = data.agent_id || data.agentName; + const agentType = data.agent_type || data.agentDisplayName || data.agentName; const lastMsg = typeof data.last_assistant_message === "string" ? data.last_assistant_message.slice(0, 4e3) : ""; try { await fetch(`${REST_URL}/agentmemory/observe`, { @@ -35,8 +37,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: (/* @__PURE__ */ new Date()).toISOString(), data: { - agent_id: data.agent_id, - agent_type: data.agent_type, + agent_id: agentId, + agent_type: agentType, last_message: lastMsg } }), diff --git a/scripts/backfill-imported-sessions.sh b/scripts/backfill-imported-sessions.sh new file mode 100755 index 00000000..a247a57e --- /dev/null +++ b/scripts/backfill-imported-sessions.sh @@ -0,0 +1,259 @@ +#!/usr/bin/env bash +# Backfill memory artifacts for sessions imported via `agentmemory import-jsonl`. +# +# The import path only persists Session + Observation rows (via synthetic, +# zero-LLM compression) and the deterministic crystal/lesson derivation. +# It does NOT call mem::summarize, so the semantic/procedural/reflect tiers +# of the consolidation pipeline have nothing to roll up. +# +# This script walks every session tagged `jsonl-import` and: +# 1. POSTs /agentmemory/summarize per session (LLM call) +# 2. POSTs /agentmemory/consolidate-pipeline once at the end +# +# Graph extraction (/agentmemory/graph/extract) is intentionally skipped — +# its API takes a per-observation payload, which is cost-prohibitive for +# bulk imports. `reflect` falls back to a no-graph clustering mode. +# +# Usage: +# scripts/backfill-imported-sessions.sh --dry-run +# scripts/backfill-imported-sessions.sh --limit 5 +# scripts/backfill-imported-sessions.sh # process all + +set -euo pipefail + +URL="${AGENTMEMORY_URL:-http://localhost:3111}" +DRY_RUN=0 +LIMIT=0 # 0 = no limit +ONLY_TAG="jsonl-import" +SKIP_CONSOLIDATE=0 +SKIP_AGENTS=0 # drop sessions whose project starts with "agent-" +MAX_OBS=0 # 0 = no cap; skip sessions with more observations than this +DEBUG_ON_ERROR=0 # on failure, dump session metadata + obs to DEBUG_DIR +DEBUG_DIR="${AGENTMEMORY_DEBUG_DIR:-./agentmemory-debug}" +PROJECT_PATTERN="" # jq test() regex against .project; "" means no filter + +# Cost-estimate knobs (defaults tuned for DeepSeek V4 Flash on DeepInfra: +# $0.14 / 1M input, $0.28 / 1M output). Override via env if needed. +COST_IN_PER_1M="${AGENTMEMORY_COST_IN_PER_1M:-0.14}" +COST_OUT_PER_1M="${AGENTMEMORY_COST_OUT_PER_1M:-0.28}" +# Rough token weight per compressed observation, derived from inspecting +# real synthetic-compression payloads in the kv store (mostly 100-300 tok, +# heavy-tailed). Override if your sessions are unusually verbose. +TOKENS_PER_OBS="${AGENTMEMORY_TOKENS_PER_OBS:-200}" +# Reserved per-call output budget (XML summary is small). +TOKENS_OUT_PER_SESSION="${AGENTMEMORY_TOKENS_OUT_PER_SESSION:-500}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --limit) LIMIT="${2:?--limit needs a number}"; shift 2 ;; + --tag) ONLY_TAG="${2:?--tag needs a value (use empty string for all)}"; shift 2 ;; + --skip-consolidate) SKIP_CONSOLIDATE=1; shift ;; + --skip-agents) SKIP_AGENTS=1; shift ;; + --max-obs) MAX_OBS="${2:?--max-obs needs a number}"; shift 2 ;; + --debug-on-error) DEBUG_ON_ERROR=1; shift ;; + --project-pattern) PROJECT_PATTERN="${2:?--project-pattern needs a regex}"; shift 2 ;; + -h|--help) + sed -n '2,28p' "$0" + exit 0 ;; + *) echo "unknown flag: $1" >&2; exit 2 ;; + esac +done + +for bin in curl jq; do + command -v "$bin" >/dev/null || { echo "missing dependency: $bin" >&2; exit 1; } +done + +# Curl timeout profiles. Metadata reads (livez, sessions list, observations +# pull for debug dumps) should fail fast and retry transient blips. The LLM +# work calls (summarize, consolidate) intentionally have no --retry and a +# wide --max-time: each call can legitimately take minutes for chunked +# summarize on large sessions, and retrying a half-finished LLM job is +# expensive both in dollars and in duplicated server-side work. +META_CURL_OPTS=(--connect-timeout 10 --max-time 30 --retry 2 --retry-delay 1) +WORK_CURL_OPTS=(--connect-timeout 10 --max-time 1800) + +echo "agentmemory backfill — server: $URL" +[[ "$DRY_RUN" == 1 ]] && echo "DRY RUN: no POSTs will be made." + +# --- liveness --- +if ! curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/livez" >/dev/null; then + echo "server not reachable at $URL (try: npx @agentmemory/agentmemory)" >&2 + exit 1 +fi + +# --- collect session ids --- +sessions_json="$(curl -fsS "${META_CURL_OPTS[@]}" "$URL/agentmemory/sessions")" +filter='.sessions[] | select(.status=="completed")' +if [[ -n "$ONLY_TAG" ]]; then + filter+=" | select((.tags // []) | index(\"$ONLY_TAG\"))" +fi +if [[ "$SKIP_AGENTS" == 1 ]]; then + filter+=' | select((.project // "") | startswith("agent-") | not)' +fi +if [[ -n "$PROJECT_PATTERN" ]]; then + # jq's test() applies a regex against the project string. + filter+=" | select((.project // \"\") | test(\"$PROJECT_PATTERN\"))" +fi +if [[ "$MAX_OBS" -gt 0 ]]; then + filter+=" | select((.observationCount // 0) <= $MAX_OBS)" +fi +filter+=' | "\(.id)\t\(.observationCount // 0)\t\(.project // "")"' + +rows=() +while IFS= read -r line; do + rows+=("$line") +done < <(echo "$sessions_json" | jq -r "$filter") +total="${#rows[@]}" + +if [[ "$total" -eq 0 ]]; then + echo "no sessions matched (tag='$ONLY_TAG'); nothing to do." + exit 0 +fi + +if [[ "$LIMIT" -gt 0 && "$LIMIT" -lt "$total" ]]; then + rows=("${rows[@]:0:$LIMIT}") +fi + +echo "matched $total session(s); will process ${#rows[@]}." +total_obs=0 +for row in "${rows[@]}"; do + obs="$(cut -f2 <<<"$row")" + total_obs=$(( total_obs + obs )) +done +est_in=$(( total_obs * TOKENS_PER_OBS + ${#rows[@]} * 500 )) +est_out=$(( ${#rows[@]} * TOKENS_OUT_PER_SESSION )) +est_cost="$(awk -v i="$est_in" -v o="$est_out" -v ci="$COST_IN_PER_1M" -v co="$COST_OUT_PER_1M" \ + 'BEGIN { printf "%.2f", (i*ci + o*co) / 1000000 }')" + +echo "≈ ${#rows[@]} summarize LLM calls (one per session, covering $total_obs observations)" +printf '≈ %d input tok + %d output tok → $%s (rates: in=$%s/1M out=$%s/1M, %s tok/obs)\n' \ + "$est_in" "$est_out" "$est_cost" "$COST_IN_PER_1M" "$COST_OUT_PER_1M" "$TOKENS_PER_OBS" +echo + +if [[ "$DRY_RUN" == 1 ]]; then + printf '%-40s %10s %s\n' "session" "obs" "project" + for row in "${rows[@]}"; do + id="$(cut -f1 <<<"$row")" + obs="$(cut -f2 <<<"$row")" + proj="$(cut -f3 <<<"$row")" + printf '%-40s %10s %s\n' "$id" "$obs" "$proj" + done + echo + echo "(dry run) next steps if you re-run without --dry-run:" + echo " for each session above: POST $URL/agentmemory/summarize {sessionId}" + if [[ "$SKIP_CONSOLIDATE" == 0 ]]; then + echo " then: POST $URL/agentmemory/consolidate-pipeline {}" + fi + exit 0 +fi + +# --- summarize loop --- +if [[ "$DEBUG_ON_ERROR" == 1 ]]; then + mkdir -p "$DEBUG_DIR" + echo "debug mode: failed calls will dump to $DEBUG_DIR/" + echo +fi + +dump_failure() { + local id="$1" obs="$2" resp="$3" + # Replace anything outside [A-Za-z0-9._-] with `_` before joining with + # DEBUG_DIR. Session IDs from the API are UUIDs in practice, but the + # server doesn't enforce that — a hostile or buggy id containing `/` or + # `..` would otherwise escape the debug directory. + local safe_id + safe_id="$(printf '%s' "$id" | tr -c 'A-Za-z0-9._-' '_')" + local file="$DEBUG_DIR/${safe_id}.json" + # Pull the raw observations (what would have gone into the prompt) so the + # operator can reconstruct the upstream payload locally. We also compute + # narrative size stats so size-related rejections are immediately visible. + # Stream observations through stdin (avoids exec-arg overflow on + # multi-thousand-obs sessions — macOS argv ceiling is ~256k). + # `--get --data-urlencode` percent-encodes the session id so special + # characters can't corrupt the query string. + curl -fsS "${META_CURL_OPTS[@]}" --get \ + --data-urlencode "sessionId=$id" \ + "$URL/agentmemory/observations" \ + | jq \ + --arg id "$id" \ + --argjson obsCount "$obs" \ + --arg url "$URL/agentmemory/summarize" \ + --argjson response "$resp" \ + '. as $root + | .observations as $obs + | { + sessionId: $id, + observationCount: $obsCount, + request: { url: $url, method: "POST", body: { sessionId: $id } }, + response: $response, + observations: $obs, + stats: { + totalNarrativeBytes: ($obs | map(.narrative // "" | length) | add // 0), + maxNarrativeBytes: ($obs | map(.narrative // "" | length) | max // 0), + titleHistogram: ($obs | group_by(.title) | map({title: .[0].title, count: length}) | sort_by(-.count)) + } + }' >"$file" + echo " → $file" +} + +ok=0; skipped=0; failed=0 +i=0 +for row in "${rows[@]}"; do + i=$(( i + 1 )) + id="$(cut -f1 <<<"$row")" + obs="$(cut -f2 <<<"$row")" + + body="$(jq -nc --arg id "$id" '{sessionId:$id}')" + resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/summarize" \ + -H 'content-type: application/json' --data "$body" || echo '{"success":false,"error":"curl_failed"}')" + # iii's HTTP layer occasionally returns non-JSON (HTML 5xx, empty body + # on timeout, etc.). Validate before parsing so `set -e` doesn't abort + # the whole backfill loop on a single bad response. + if jq -e . >/dev/null 2>&1 <<<"$resp"; then + status="$(jq -r '.success // false' <<<"$resp")" + err="$(jq -r '.error // ""' <<<"$resp")" + title="$(jq -r '.summary.title // ""' <<<"$resp")" + else + status="false" + err="invalid_json_response" + title="" + fi + + if [[ "$status" == "true" ]]; then + ok=$(( ok + 1 )) + printf '[%3d/%3d] OK %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$title" + elif [[ "$err" == "no_observations" || "$err" == "no_provider" ]]; then + skipped=$(( skipped + 1 )) + printf '[%3d/%3d] SKIP %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err" + else + failed=$(( failed + 1 )) + printf '[%3d/%3d] FAIL %s obs=%-5s %s\n' "$i" "${#rows[@]}" "$id" "$obs" "$err" + [[ "$DEBUG_ON_ERROR" == 1 ]] && dump_failure "$id" "$obs" "$resp" + fi +done + +echo +echo "summarize: ok=$ok skipped=$skipped failed=$failed" + +# --- consolidate --- +if [[ "$SKIP_CONSOLIDATE" == 1 ]]; then + echo "skipping consolidate-pipeline (--skip-consolidate)" + exit 0 +fi + +if [[ "$ok" -eq 0 ]]; then + echo "no summaries produced; skipping consolidate-pipeline." + exit 0 +fi + +echo +echo "running consolidate-pipeline …" +resp="$(curl -sS "${WORK_CURL_OPTS[@]}" -X POST "$URL/agentmemory/consolidate-pipeline" \ + -H 'content-type: application/json' --data '{}' || echo '{"success":false,"error":"curl_failed"}')" +if jq -e . >/dev/null 2>&1 <<<"$resp"; then + echo "$resp" | jq . +else + echo "consolidate-pipeline returned non-JSON (likely a timeout or upstream error):" + printf '%s\n' "$resp" | head -c 500 + echo +fi diff --git a/src/cli.ts b/src/cli.ts index 5eca18ce..d3d33855 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -117,8 +117,9 @@ Usage: agentmemory [command] [options] Commands: (default) Start agentmemory worker init Copy bundled .env.example to ~/.agentmemory/.env if absent - connect [agent] Wire agentmemory into an installed agent (claude-code, codex, - cursor, gemini-cli, openclaw, hermes, pi, openhuman). + connect [agent] Wire agentmemory into an installed agent (claude-code, + copilot-cli, codex, cursor, gemini-cli, openclaw, + hermes, pi, openhuman). No arg = interactive picker. --all wires every detected agent. --dry-run shows what would change. --force re-installs. status Show connection status, memory count, flags, and health @@ -195,9 +196,36 @@ function getBaseUrl(): string { return `http://localhost:${getRestPort()}`; } +let discoveredViewerPort: number | null = null; + +export async function discoverViewerPort(): Promise { + if (discoveredViewerPort !== null) return; + try { + const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, { + signal: AbortSignal.timeout(1000), + }); + if (res.ok) { + const data = await res.json() as { viewerPort?: number | null }; + if (typeof data.viewerPort === "number") { + discoveredViewerPort = data.viewerPort; + } + } + } catch {} +} + function getViewerUrl(): string { const envUrl = process.env["AGENTMEMORY_VIEWER_URL"]; if (envUrl) return envUrl.replace(/\/+$/, ""); + + if (discoveredViewerPort !== null) { + try { + const u = new URL(getBaseUrl()); + return `${u.protocol}//${u.hostname}:${discoveredViewerPort}`; + } catch { + return `http://localhost:${discoveredViewerPort}`; + } + } + try { const u = new URL(getBaseUrl()); const vPort = @@ -257,7 +285,18 @@ async function isAgentmemoryReady(): Promise { const res = await fetch(`${getBaseUrl()}/agentmemory/livez`, { signal: AbortSignal.timeout(2000), }); - return res.ok; + if (!res.ok) return false; + try { + const data = await res.json() as { viewerPort?: number | null; viewerSkipped?: boolean }; + if (typeof data.viewerPort === "number") { + discoveredViewerPort = data.viewerPort; + return true; + } + if (data.viewerSkipped) return true; + return false; + } catch { + return false; + } } catch { return false; } @@ -497,17 +536,8 @@ function detectIiiConsole(): IiiConsoleState { return { kind: "missing" }; } -// install.iii.dev/console/main/install.sh has a bug in its release-tag -// filter that rejects every stable release for iii-hq/iii: the jq -// predicate uses `startswith("v")` while the actual tags are -// `iii/v0.12.0` (slash-prefixed). The `--next` path uses a regex -// without the startswith constraint and therefore works today, -// installing the most recent prerelease (e.g. iii/v0.14.0-next.1). -// -// Pass `--next` until the upstream fix lands (iii-hq/iii#1652). -// Switch back to the bare invocation once the script is patched. const III_CONSOLE_INSTALL_CMD = - "curl -fsSL https://install.iii.dev/console/main/install.sh | bash -s -- --next"; + "curl -fsSL https://install.iii.dev/console/main/install.sh | sh"; async function ensureIiiConsole(): Promise { const state = detectIiiConsole(); @@ -1101,6 +1131,9 @@ async function runStatus() { apiFetch(base, "config/flags"), ]); + if (typeof healthRes?.viewerPort === "number") { + discoveredViewerPort = healthRes.viewerPort; + } const h = healthRes?.health; const status = healthRes?.status || "unknown"; const version = healthRes?.version || "?"; @@ -1260,6 +1293,7 @@ function buildDoctorEffects(): DoctorEffects { iiiBinaryVersion: (binPath: string) => iiiBinVersion(binPath), viewerReachable: async (timeoutMs = 2000) => { try { + await discoverViewerPort(); const res = await fetch(getViewerUrl(), { signal: AbortSignal.timeout(timeoutMs), }); @@ -1975,8 +2009,8 @@ async function runUpgrade() { label: "Refreshing dependencies (pnpm install)", }); requireSuccess(installOk, "pnpm install"); - runCommand(pnpmBin, ["up", "iii-sdk@latest"], { - label: "Upgrading iii-sdk to latest", + runCommand(pnpmBin, ["up", "iii-sdk@0.11.2"], { + label: "Pinning iii-sdk@0.11.2", optional: true, }); } else if (npmBin) { @@ -1984,8 +2018,8 @@ async function runUpgrade() { label: "Refreshing dependencies (npm install)", }); requireSuccess(installOk, "npm install"); - runCommand(npmBin, ["install", "iii-sdk@latest"], { - label: "Upgrading iii-sdk to latest", + runCommand(npmBin, ["install", "iii-sdk@0.11.2"], { + label: "Pinning iii-sdk@0.11.2", optional: true, }); } else { diff --git a/src/cli/connect/codex-hooks.ts b/src/cli/connect/codex-hooks.ts new file mode 100644 index 00000000..14b8284a --- /dev/null +++ b/src/cli/connect/codex-hooks.ts @@ -0,0 +1,107 @@ +import { existsSync, readFileSync } from "node:fs"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +/** + * Workaround for openai/codex#16430 — Codex Desktop does not dispatch + * plugin-local `hooks.json` even though both `CodexHooks` and `PluginHooks` + * feature flags are stable + default-enabled in + * `codex-rs/features/src/lib.rs`. Until upstream fixes plugin-scope + * dispatch, the same hook commands can be mirrored into the global + * `~/.codex/hooks.json`, which is loaded reliably. + * + * This module builds that mirror, with `${CLAUDE_PLUGIN_ROOT}` resolved to + * the bundled `plugin/` directory so the user-scope file does not depend + * on env-var expansion (Codex only injects `CLAUDE_PLUGIN_ROOT` for + * plugin-scope hooks). + * + * Identification on re-install: every command we write contains the + * resolved `/scripts/` prefix, so subsequent installs can + * strip our entries and re-add cleanly without touching the user's other + * hook entries. + */ + +type HookHandler = { type: string; command: string }; +type HookEntry = { matcher?: string; hooks: HookHandler[] }; +export type HookManifest = { hooks: Record }; + +/** + * Locate the bundled `plugin/` directory at runtime. Walks up from the + * module's own location looking for `plugin/scripts/` + `plugin/hooks/`, + * both shipped via the npm `files` field. Works for both `dist/cli.mjs` + * (bundled) and `src/cli/connect/codex-hooks.ts` (dev) layouts. + */ +export function findPluginRoot(startUrl: string = import.meta.url): string { + const here = dirname(fileURLToPath(startUrl)); + let dir = here; + for (let i = 0; i < 12; i++) { + if ( + existsSync(join(dir, "plugin", "scripts")) && + existsSync(join(dir, "plugin", "hooks")) + ) { + return resolve(join(dir, "plugin")); + } + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + throw new Error( + `agentmemory: could not locate bundled plugin/ directory (searched up from ${here})`, + ); +} + +/** + * Build the merged hooks.json content. + * + * 1. Strip any entry from `existing` whose first hook command points + * under `/scripts/`. This lets us re-install idempotently + * without leaving stale references. + * 2. Append fresh entries from the bundled Codex manifest with + * `${CLAUDE_PLUGIN_ROOT}` rewritten to the absolute plugin path. + * Matcher values from the bundled manifest are preserved so PreToolUse + * event routing keeps working. + */ +export function buildMergedHooks( + existing: HookManifest | null, + pluginRoot: string, +): HookManifest { + const codexManifestPath = join(pluginRoot, "hooks", "hooks.codex.json"); + const ours = JSON.parse(readFileSync(codexManifestPath, "utf-8")) as HookManifest; + const scriptsDir = join(pluginRoot, "scripts"); + + const out: HookManifest = { hooks: {} }; + + if (existing?.hooks) { + for (const [event, entries] of Object.entries(existing.hooks)) { + const kept = entries.filter((entry) => !isAgentmemoryEntry(entry, scriptsDir)); + if (kept.length > 0) out.hooks[event] = kept; + } + } + + for (const [event, entries] of Object.entries(ours.hooks)) { + const resolvedEntries: HookEntry[] = entries.map((entry) => { + const next: HookEntry = { + hooks: entry.hooks.map((handler) => ({ + type: handler.type, + command: handler.command.replace(/\$\{CLAUDE_PLUGIN_ROOT\}/g, pluginRoot), + })), + }; + if (entry.matcher !== undefined) next.matcher = entry.matcher; + return next; + }); + out.hooks[event] = [...(out.hooks[event] ?? []), ...resolvedEntries]; + } + + return out; +} + +function isAgentmemoryEntry(entry: HookEntry, scriptsDir: string): boolean { + const normalizedScriptsDir = normalizePathForCommandMatch(scriptsDir); + return entry.hooks.some((handler) => + normalizePathForCommandMatch(handler.command).includes(normalizedScriptsDir), + ); +} + +function normalizePathForCommandMatch(value: string): string { + return value.replace(/\\/g, "/"); +} diff --git a/src/cli/connect/codex.ts b/src/cli/connect/codex.ts index 003dc99a..a87b2858 100644 --- a/src/cli/connect/codex.ts +++ b/src/cli/connect/codex.ts @@ -8,10 +8,18 @@ import { logAlreadyWired, logBackup, logInstalled, + readJsonSafe, + writeJsonAtomic, } from "./util.js"; +import { + buildMergedHooks, + findPluginRoot, + type HookManifest, +} from "./codex-hooks.js"; const CODEX_DIR = join(homedir(), ".codex"); const CODEX_TOML = join(CODEX_DIR, "config.toml"); +const CODEX_HOOKS = join(CODEX_DIR, "hooks.json"); const TOML_BLOCK = `[mcp_servers.agentmemory] command = "npx" @@ -57,7 +65,7 @@ export const adapter: ConnectAdapter = { displayName: "Codex CLI", docs: "https://github.com/rohitg00/agentmemory#codex-cli-codex-plugin-platform", protocolNote: - "→ Using MCP. Hooks are also available — see docs/codex.md.", + "→ Using MCP. Hooks ship via the Codex plugin; on Codex Desktop, also pass --with-hooks to install the global hooks.json workaround for openai/codex#16430.", detect(): boolean { return existsSync(CODEX_DIR); @@ -77,6 +85,7 @@ export const adapter: ConnectAdapter = { p.log.info( `[dry-run] Would ${wired ? "rewrite" : "append"} [mcp_servers.agentmemory] in ${CODEX_TOML}`, ); + if (opts.withHooks) installCodexHooks(opts); return { kind: "installed", mutatedPath: CODEX_TOML }; } @@ -105,6 +114,16 @@ export const adapter: ConnectAdapter = { p.log.info( "Codex picks up MCP servers on next launch. For the deeper plugin install, run: codex plugin marketplace add rohitg00/agentmemory && codex plugin install agentmemory", ); + + if (opts.withHooks) { + const hookResult = installCodexHooks(opts); + if (hookResult.kind === "skipped") { + p.log.warn( + `Codex hooks fallback skipped: ${hookResult.reason}. MCP wiring still applied.`, + ); + } + } + return { kind: "installed", mutatedPath: CODEX_TOML, @@ -112,3 +131,50 @@ export const adapter: ConnectAdapter = { }; }, }; + +/** + * Install the global `~/.codex/hooks.json` fallback. See + * `codex-hooks.ts` for context (openai/codex#16430). Returns a result + * describing the side effect for the caller's summary; failures here do + * not roll back the MCP wiring. + */ +function installCodexHooks(opts: ConnectOptions): ConnectResult { + let pluginRoot: string; + try { + pluginRoot = findPluginRoot(); + } catch (err) { + return { + kind: "skipped", + reason: err instanceof Error ? err.message : String(err), + }; + } + + const existing = readJsonSafe(CODEX_HOOKS); + const merged = buildMergedHooks(existing, pluginRoot); + + if (opts.dryRun) { + p.log.info( + `[dry-run] Would ${existing ? "merge" : "create"} ${CODEX_HOOKS} with ${Object.keys(merged.hooks).length} event(s)`, + ); + return { kind: "installed", mutatedPath: CODEX_HOOKS }; + } + + let backupPath: string | undefined; + if (existsSync(CODEX_HOOKS)) { + backupPath = backupFile(CODEX_HOOKS, "codex-hooks", "json"); + logBackup(backupPath); + } + + writeJsonAtomic(CODEX_HOOKS, merged); + + logInstalled("Codex hooks (workaround for openai/codex#16430)", CODEX_HOOKS); + p.log.info( + "User-scope hooks reference absolute paths under the bundled plugin/ dir. Re-run `agentmemory connect codex --with-hooks` after upgrading agentmemory to refresh them.", + ); + + return { + kind: "installed", + mutatedPath: CODEX_HOOKS, + ...(backupPath !== undefined && { backupPath }), + }; +} diff --git a/src/cli/connect/copilot-cli.ts b/src/cli/connect/copilot-cli.ts new file mode 100644 index 00000000..8cce5a54 --- /dev/null +++ b/src/cli/connect/copilot-cli.ts @@ -0,0 +1,91 @@ +import { existsSync, mkdirSync } from "node:fs"; +import { homedir } from "node:os"; +import { dirname, join } from "node:path"; +import * as p from "@clack/prompts"; +import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js"; +import { + AGENTMEMORY_COPILOT_MCP_BLOCK, + backupFile, + logAlreadyWired, + logBackup, + logInstalled, + readJsonSafe, + writeJsonAtomic, +} from "./util.js"; + +const COPILOT_DIR = process.env["COPILOT_HOME"] || join(homedir(), ".copilot"); +const COPILOT_MCP_JSON = join(COPILOT_DIR, "mcp-config.json"); + +type CopilotMcpEntry = typeof AGENTMEMORY_COPILOT_MCP_BLOCK; +type CopilotConfig = { + mcpServers?: Record; + [key: string]: unknown; +}; + +function entryMatches(entry: unknown): boolean { + if (!entry || typeof entry !== "object") return false; + return JSON.stringify(entry) === JSON.stringify(AGENTMEMORY_COPILOT_MCP_BLOCK); +} + +export const adapter: ConnectAdapter = { + name: "copilot-cli", + displayName: "GitHub Copilot CLI", + docs: "https://github.com/rohitg00/agentmemory#github-copilot-cli", + protocolNote: + "→ Using MCP. Install the plugin too for full hooks/skills coverage.", + + detect(): boolean { + return existsSync(COPILOT_DIR); + }, + + async install(opts: ConnectOptions): Promise { + const existing = readJsonSafe(COPILOT_MCP_JSON); + const next: CopilotConfig = existing ? { ...existing } : {}; + const servers: Record = { + ...((next.mcpServers as Record) ?? {}), + }; + + const alreadyHas = entryMatches(servers["agentmemory"]); + if (alreadyHas && !opts.force) { + logAlreadyWired("GitHub Copilot CLI", COPILOT_MCP_JSON); + return { kind: "already-wired", mutatedPath: COPILOT_MCP_JSON }; + } + + if (opts.dryRun) { + p.log.info( + `[dry-run] Would ${alreadyHas ? "overwrite" : "add"} mcpServers.agentmemory in ${COPILOT_MCP_JSON}`, + ); + return { kind: "installed", mutatedPath: COPILOT_MCP_JSON }; + } + + let backupPath: string | undefined; + if (existsSync(COPILOT_MCP_JSON)) { + backupPath = backupFile(COPILOT_MCP_JSON, "copilot-cli"); + logBackup(backupPath); + } else { + mkdirSync(dirname(COPILOT_MCP_JSON), { recursive: true }); + } + + servers["agentmemory"] = AGENTMEMORY_COPILOT_MCP_BLOCK; + next.mcpServers = servers; + writeJsonAtomic(COPILOT_MCP_JSON, next); + + const verify = readJsonSafe(COPILOT_MCP_JSON); + if (!entryMatches(verify?.mcpServers?.["agentmemory"])) { + p.log.error( + `Verification failed: ${COPILOT_MCP_JSON} did not contain mcpServers.agentmemory after write.`, + ); + return { kind: "skipped", reason: "verification-failed" }; + } + + logInstalled("GitHub Copilot CLI", COPILOT_MCP_JSON); + p.log.info( + "Copilot picks up MCP servers on next launch or after `/mcp`. Install the plugin too for full hooks/skills.", + ); + return { + kind: "installed", + mutatedPath: COPILOT_MCP_JSON, + ...(backupPath !== undefined && { backupPath }), + }; + }, +}; diff --git a/src/cli/connect/index.ts b/src/cli/connect/index.ts index 17aedf8f..48f86817 100644 --- a/src/cli/connect/index.ts +++ b/src/cli/connect/index.ts @@ -2,6 +2,7 @@ import { platform } from "node:os"; import * as p from "@clack/prompts"; import type { ConnectAdapter, ConnectOptions, ConnectResult } from "./types.js"; import { adapter as claudeCode } from "./claude-code.js"; +import { adapter as copilotCli } from "./copilot-cli.js"; import { adapter as codex } from "./codex.js"; import { adapter as cursor } from "./cursor.js"; import { adapter as geminiCli } from "./gemini-cli.js"; @@ -12,6 +13,7 @@ import { adapter as pi } from "./pi.js"; export const ADAPTERS: readonly ConnectAdapter[] = [ claudeCode, + copilotCli, codex, cursor, geminiCli, @@ -34,19 +36,22 @@ function parseFlags(args: string[]): { dryRun: boolean; force: boolean; all: boolean; + withHooks: boolean; positional: string[]; } { const positional: string[] = []; let dryRun = false; let force = false; let all = false; + let withHooks = false; for (const a of args) { if (a === "--dry-run") dryRun = true; else if (a === "--force") force = true; else if (a === "--all") all = true; + else if (a === "--with-hooks") withHooks = true; else if (!a.startsWith("-")) positional.push(a); } - return { dryRun, force, all, positional }; + return { dryRun, force, all, withHooks, positional }; } export async function runAdapter( @@ -74,7 +79,10 @@ export async function runAdapter( } export async function runConnect(args: string[]): Promise { - if (platform() === "win32") { + const { dryRun, force, all, withHooks, positional } = parseFlags(args); + const allowWindowsAdapter = + positional.length === 1 && positional[0]?.toLowerCase() === "copilot-cli"; + if (platform() === "win32" && !allowWindowsAdapter) { p.intro("agentmemory connect"); p.log.warn( "Windows: automated `connect` is not supported yet. See https://github.com/rohitg00/agentmemory#other-agents for manual install steps.", @@ -83,8 +91,7 @@ export async function runConnect(args: string[]): Promise { return; } - const { dryRun, force, all, positional } = parseFlags(args); - const opts: ConnectOptions = { dryRun, force }; + const opts: ConnectOptions = { dryRun, force, withHooks }; p.intro("agentmemory connect"); diff --git a/src/cli/connect/types.ts b/src/cli/connect/types.ts index 4f64c867..8abd2745 100644 --- a/src/cli/connect/types.ts +++ b/src/cli/connect/types.ts @@ -1,6 +1,13 @@ export type ConnectOptions = { dryRun: boolean; force: boolean; + /** + * When true, the Codex adapter additionally writes a global + * `~/.codex/hooks.json` block referencing absolute paths to bundled hook + * scripts. Workaround for openai/codex#16430, which prevents plugin-local + * hooks from dispatching on Codex Desktop. No-op for other adapters. + */ + withHooks?: boolean; }; export type ConnectAdapter = { diff --git a/src/cli/connect/util.ts b/src/cli/connect/util.ts index 6d5f61ac..8902e3ef 100644 --- a/src/cli/connect/util.ts +++ b/src/cli/connect/util.ts @@ -26,6 +26,27 @@ export const AGENTMEMORY_MCP_BLOCK = { }, }; +const COPILOT_MCP_COMMAND = + process.platform === "win32" + ? { + command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe", + args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"], + } + : { + command: "npx", + args: ["-y", "@agentmemory/mcp"], + }; + +export const AGENTMEMORY_COPILOT_MCP_BLOCK = { + type: "local" as const, + ...COPILOT_MCP_COMMAND, + env: { + AGENTMEMORY_URL: "${AGENTMEMORY_URL}", + AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}", + }, + tools: ["*"], +}; + export function backupsDir(): string { return join(homedir(), ".agentmemory", "backups"); } diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts index 92b23d62..2e148a1b 100644 --- a/src/cli/onboarding.ts +++ b/src/cli/onboarding.ts @@ -36,6 +36,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); // where they overlap; the rest fall back to the generic `◇`. const NATIVE_AGENTS: { value: string; label: string; glyph: string }[] = [ { value: "claude-code", label: "Claude Code", glyph: "⟁" }, + { value: "copilot-cli", label: "GitHub Copilot CLI", glyph: "◈" }, { value: "codex", label: "Codex", glyph: "◎" }, { value: "openhuman", label: "OpenHuman", glyph: "◇" }, { value: "openclaw", label: "OpenClaw", glyph: "◇" }, @@ -67,7 +68,7 @@ const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [ { value: "skip", label: "Skip — BM25-only mode (no LLM key)", envKey: null }, ]; -function buildAgentOptions(): { value: string; label: string; hint?: string }[] { +export function buildAgentOptions(): { value: string; label: string; hint?: string }[] { return [ ...NATIVE_AGENTS.map((a) => ({ value: a.value, @@ -82,6 +83,15 @@ function buildAgentOptions(): { value: string; label: string; hint?: string }[] ]; } +export function getInitialAgentValues( + env: Record = process.env, +): string[] { + if (env["COPILOT_CLI"] === "1" || env["COPILOT_AGENT_SESSION_ID"]) { + return ["copilot-cli"]; + } + return ["claude-code"]; +} + // Mirror src/cli.ts findEnvExample so onboarding ships the same .env // skeleton whether called directly or via `agentmemory init`. We // duplicate (rather than import) so the onboarding module doesn't @@ -137,7 +147,31 @@ export interface OnboardingResult { provider: string | null; } +function shouldSkipInteractiveOnboarding(): boolean { + const ci = process.env["CI"]; + return ( + process.stdin.isTTY !== true || + process.stdout.isTTY !== true || + (ci !== undefined && ci !== "" && ci !== "0" && ci.toLowerCase() !== "false") + ); +} + +function writeDefaultOnboardingPrefs(): OnboardingResult { + writePrefs({ + lastAgent: null, + lastAgents: [], + lastProvider: null, + skipSplash: true, + firstRunAt: new Date().toISOString(), + }); + return { agents: [], provider: null }; +} + export async function runOnboarding(): Promise { + if (shouldSkipInteractiveOnboarding()) { + return writeDefaultOnboardingPrefs(); + } + p.note( [ "Welcome to agentmemory.", @@ -153,7 +187,7 @@ export async function runOnboarding(): Promise { message: "Which agents will use agentmemory? (space to toggle, enter to confirm)", options: buildAgentOptions(), required: false, - initialValues: ["claude-code"], + initialValues: getInitialAgentValues(), }); if (p.isCancel(agentsPicked)) { p.cancel("Setup cancelled. Re-run any time with: agentmemory --reset"); @@ -166,7 +200,7 @@ export async function runOnboarding(): Promise { [ "━ how this works ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", "All selected agents share the same memory at :3111.", - "A memory saved by Claude Code is visible to Codex + Cursor instantly.", + "A memory saved by Claude Code is visible to Copilot + Codex + Cursor instantly.", "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", ].join("\n"), ); diff --git a/src/config.ts b/src/config.ts index 4a416ed1..eed5725e 100644 --- a/src/config.ts +++ b/src/config.ts @@ -159,6 +159,10 @@ export function getEnvVar(key: string): string | undefined { return getMergedEnv()[key]; } +export function isDropStaleIndexEnabled(): boolean { + return getMergedEnv()["AGENTMEMORY_DROP_STALE_INDEX"] === "true"; +} + export function detectLlmProviderKind(): "llm" | "noop" { const env = getMergedEnv(); if ( diff --git a/src/functions/diagnostics.ts b/src/functions/diagnostics.ts index 42f822cb..a63d7959 100644 --- a/src/functions/diagnostics.ts +++ b/src/functions/diagnostics.ts @@ -7,8 +7,14 @@ import type { Action, ActionEdge, DiagnosticCheck, + Insight, Lease, + Lesson, Checkpoint, + Crystal, + ProceduralMemory, + SemanticMemory, + SessionSummary, Signal, Sentinel, Sketch, @@ -25,6 +31,12 @@ const ALL_CATEGORIES = [ "signals", "sessions", "memories", + "lessons", + "summaries", + "semantic", + "procedural", + "crystals", + "insights", "mesh", ]; @@ -354,6 +366,186 @@ export function registerDiagnosticsFunction(sdk: ISdk, kv: StateKV): void { } } + if (categories.includes("lessons")) { + // Counts only live lessons (deleted=true rows are tombstoned). + // Catches bad confidence values that would silently break recall + // scoring (memory_lesson_recall multiplies by confidence). + const lessons = await kv.list(KV.lessons); + const live = lessons.filter((l) => !l.deleted); + let lessonIssues = 0; + for (const l of live) { + // Number.isFinite rejects NaN / Infinity / non-numbers; a + // corrupted row passing those would silently survive the < / > + // range check (e.g. NaN < 0 is false, NaN > 1 is false, so the + // bad row would be "healthy") and skew memory_lesson_recall's + // scoring downstream. Surface as warning. + if ( + !Number.isFinite(l.confidence) || + l.confidence < 0 || + l.confidence > 1 + ) { + checks.push({ + name: `lesson-bad-confidence:${l.id}`, + category: "lessons", + status: "warn", + message: `Lesson ${l.id} has confidence ${l.confidence} (expected finite number in 0..1)`, + fixable: false, + }); + lessonIssues++; + } + } + if (lessonIssues === 0) { + checks.push({ + name: "lessons-ok", + category: "lessons", + status: "pass", + message: `All ${live.length} lessons are healthy (${lessons.length - live.length} tombstoned)`, + fixable: false, + }); + } + } + + if (categories.includes("summaries")) { + const summaries = await kv.list(KV.summaries); + let summaryIssues = 0; + for (const s of summaries) { + // typeof guard before .trim() — a corrupted row with title=null + // or title=42 would otherwise throw and abort the whole diagnose + // run before later categories get checked. + if (typeof s.title !== "string" || s.title.trim().length === 0) { + checks.push({ + name: `summary-missing-title:${s.sessionId}`, + category: "summaries", + status: "warn", + message: `Summary for session ${s.sessionId} has no title`, + fixable: false, + }); + summaryIssues++; + } + } + if (summaryIssues === 0) { + checks.push({ + name: "summaries-ok", + category: "summaries", + status: "pass", + message: `All ${summaries.length} session summaries are consistent`, + fixable: false, + }); + } + } + + if (categories.includes("semantic")) { + const semantic = await kv.list(KV.semantic); + let semanticIssues = 0; + for (const s of semantic) { + if ( + !Number.isFinite(s.confidence) || + s.confidence < 0 || + s.confidence > 1 + ) { + checks.push({ + name: `semantic-bad-confidence:${s.id}`, + category: "semantic", + status: "warn", + message: `Semantic fact ${s.id} has confidence ${s.confidence} (expected finite number in 0..1)`, + fixable: false, + }); + semanticIssues++; + } + } + if (semanticIssues === 0) { + checks.push({ + name: "semantic-ok", + category: "semantic", + status: "pass", + message: `All ${semantic.length} semantic memories are consistent`, + fixable: false, + }); + } + } + + if (categories.includes("procedural")) { + const procedural = await kv.list(KV.procedural); + let proceduralIssues = 0; + for (const p of procedural) { + if (!Array.isArray(p.steps) || p.steps.length === 0) { + checks.push({ + name: `procedural-empty-steps:${p.id}`, + category: "procedural", + status: "warn", + message: `Procedural memory "${p.name}" (${p.id}) has no steps`, + fixable: false, + }); + proceduralIssues++; + } + } + if (proceduralIssues === 0) { + checks.push({ + name: "procedural-ok", + category: "procedural", + status: "pass", + message: `All ${procedural.length} procedural memories are consistent`, + fixable: false, + }); + } + } + + if (categories.includes("crystals")) { + const crystals = await kv.list(KV.crystals); + let crystalIssues = 0; + for (const c of crystals) { + if (typeof c.narrative !== "string" || c.narrative.trim().length === 0) { + checks.push({ + name: `crystal-empty-narrative:${c.id}`, + category: "crystals", + status: "warn", + message: `Crystal ${c.id} has empty narrative`, + fixable: false, + }); + crystalIssues++; + } + } + if (crystalIssues === 0) { + checks.push({ + name: "crystals-ok", + category: "crystals", + status: "pass", + message: `All ${crystals.length} crystals are consistent`, + fixable: false, + }); + } + } + + if (categories.includes("insights")) { + const insights = await kv.list(KV.insights); + let insightIssues = 0; + for (const i of insights) { + if ( + !Number.isFinite(i.confidence) || + i.confidence < 0 || + i.confidence > 1 + ) { + checks.push({ + name: `insight-bad-confidence:${i.id}`, + category: "insights", + status: "warn", + message: `Insight ${i.id} has confidence ${i.confidence} (expected finite number in 0..1)`, + fixable: false, + }); + insightIssues++; + } + } + if (insightIssues === 0) { + checks.push({ + name: "insights-ok", + category: "insights", + status: "pass", + message: `All ${insights.length} insights are consistent`, + fixable: false, + }); + } + } + if (categories.includes("mesh")) { const peers = await kv.list(KV.mesh); let meshIssues = 0; diff --git a/src/functions/export-import.ts b/src/functions/export-import.ts index 674b14da..4c997630 100644 --- a/src/functions/export-import.ts +++ b/src/functions/export-import.ts @@ -176,7 +176,7 @@ export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void { const strategy = data.strategy || "merge"; const importData = data.exportData; - const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20"]); + const supportedVersions = new Set(["0.3.0", "0.4.0", "0.5.0", "0.6.0", "0.6.1", "0.7.0", "0.7.2", "0.7.3", "0.7.4", "0.7.5", "0.7.6", "0.7.7", "0.7.9", "0.8.0", "0.8.1", "0.8.2", "0.8.3", "0.8.4", "0.8.5", "0.8.6", "0.8.7", "0.8.8", "0.8.9", "0.8.10", "0.8.11", "0.8.12", "0.8.13", "0.9.0", "0.9.1", "0.9.2", "0.9.3", "0.9.4", "0.9.5", "0.9.6", "0.9.7", "0.9.8", "0.9.9", "0.9.10", "0.9.11", "0.9.12", "0.9.13", "0.9.14", "0.9.15", "0.9.16", "0.9.17", "0.9.18", "0.9.19", "0.9.20", "0.9.21"]); if (!supportedVersions.has(importData.version)) { return { success: false, diff --git a/src/functions/search.ts b/src/functions/search.ts index 74af9ff1..b4444b48 100644 --- a/src/functions/search.ts +++ b/src/functions/search.ts @@ -86,6 +86,99 @@ export async function vectorIndexAddGuarded( } } +// Batched variant: calls EmbeddingProvider.embedBatch ONCE for the whole +// batch, then writes each resulting vector. Use this for bulk paths +// (rebuildIndex, future bulk-add APIs) where per-item serial awaits +// dominate wallclock. A batch of N has roughly the latency of a single +// embed (network + GPU setup amortized), so backfilling a 500k-obs +// corpus drops from days to hours on a per-batch endpoint like vLLM. +// +// Per-item failure shape: +// - whole-batch network/provider error → all skipped, single warn line +// - per-item dimension mismatch → that item skipped, others continue +export async function vectorIndexAddBatchGuarded( + items: Array<{ + id: string + sessionId: string + text: string + context: { kind: "memory" | "observation" | "synthetic"; logId: string } + }>, +): Promise<{ ok: number; fail: number }> { + const vi = vectorIndex + const ep = currentEmbeddingProvider + if (!vi || !ep || items.length === 0) return { ok: 0, fail: 0 } + + let embeddings: Float32Array[] + try { + embeddings = await ep.embedBatch(items.map((i) => clipEmbedInput(i.text))) + } catch (err) { + logger.warn("vector-index add batch: embed failed — skipping batch", { + batchSize: items.length, + provider: ep.name, + error: err instanceof Error ? err.message : String(err), + }) + return { ok: 0, fail: items.length } + } + + if (embeddings.length !== items.length) { + logger.warn( + "vector-index add batch: provider returned wrong length — skipping batch", + { + batchSize: items.length, + returned: embeddings.length, + provider: ep.name, + }, + ) + return { ok: 0, fail: items.length } + } + + let ok = 0 + let fail = 0 + for (let i = 0; i < items.length; i++) { + const item = items[i] + const embedding = embeddings[i] + if (embedding.length !== ep.dimensions) { + logger.warn("vector-index add batch: dimension mismatch — skipping item", { + kind: item.context.kind, + id: item.context.logId, + provider: ep.name, + expected: ep.dimensions, + received: embedding.length, + }) + fail++ + continue + } + try { + vi.add(item.id, item.sessionId, embedding) + ok++ + } catch (err) { + logger.warn("vector-index add batch: index write failed — skipping item", { + kind: item.context.kind, + id: item.context.logId, + error: err instanceof Error ? err.message : String(err), + }) + fail++ + } + } + return { ok, fail } +} + +// Embed-batch size for rebuild. Each item is one /v1/embeddings call's +// `input` array element; the provider sees the whole batch as one HTTP +// round-trip. 32 fits comfortably under typical per-request token budgets +// (32 × ~110 tok/item ≈ 3.5k tokens) and gets close to per-call +// throughput for GPU-backed endpoints (vLLM, Triton, etc.). Override via +// REBUILD_EMBED_BATCH_SIZE for endpoints that prefer smaller/larger +// batches. Set to 1 to fall back to the legacy per-item path. +const DEFAULT_REBUILD_EMBED_BATCH = 32 + +function getRebuildEmbedBatchSize(): number { + const raw = process.env.REBUILD_EMBED_BATCH_SIZE + if (!raw) return DEFAULT_REBUILD_EMBED_BATCH + const n = parseInt(raw, 10) + return Number.isFinite(n) && n > 0 ? n : DEFAULT_REBUILD_EMBED_BATCH +} + export async function rebuildIndex(kv: StateKV): Promise { const idx = getSearchIndex() idx.clear() @@ -96,8 +189,28 @@ export async function rebuildIndex(kv: StateKV): Promise { // repopulation loops run, so BM25 and vector stay in sync. vectorIndex?.clear() + const batchSize = getRebuildEmbedBatchSize() + // Accumulator for the batched embed flush. BM25 add is synchronous and + // doesn't need batching — only the vector path benefits. + type EmbedJob = { + id: string + sessionId: string + text: string + context: { kind: "memory" | "observation" | "synthetic"; logId: string } + } + const pending: EmbedJob[] = [] let count = 0 + const flush = async (): Promise => { + if (pending.length === 0) return + await vectorIndexAddBatchGuarded(pending) + pending.length = 0 + } + const enqueue = async (job: EmbedJob): Promise => { + pending.push(job) + if (pending.length >= batchSize) await flush() + } + // Memories live in their own KV scope outside per-session observation // scopes, so they need a separate walk. Without this, mem::remember // entries vanish from BM25 on every restart even after the live-write @@ -108,12 +221,12 @@ export async function rebuildIndex(kv: StateKV): Promise { if (memory.isLatest === false) continue if (!memory.title || !memory.content) continue idx.add(memoryToObservation(memory)) - await vectorIndexAddGuarded( - memory.id, - memory.sessionIds[0] ?? 'memory', - memory.title + ' ' + memory.content, - { kind: "memory", logId: memory.id }, - ) + await enqueue({ + id: memory.id, + sessionId: memory.sessionIds[0] ?? 'memory', + text: memory.title + ' ' + memory.content, + context: { kind: "memory", logId: memory.id }, + }) count++ } } catch (err) { @@ -123,7 +236,10 @@ export async function rebuildIndex(kv: StateKV): Promise { } const sessions = await kv.list(KV.sessions) - if (!sessions.length) return count + if (!sessions.length) { + await flush() + return count + } const obsPerSession: CompressedObservation[][] = [] const failedSessions: string[] = [] @@ -148,16 +264,19 @@ export async function rebuildIndex(kv: StateKV): Promise { for (const obs of observations) { if (obs.title && obs.narrative) { idx.add(obs) - await vectorIndexAddGuarded( - obs.id, - obs.sessionId, - obs.title + ' ' + obs.narrative, - { kind: "observation", logId: obs.id }, - ) + await enqueue({ + id: obs.id, + sessionId: obs.sessionId, + text: obs.title + ' ' + obs.narrative, + context: { kind: "observation", logId: obs.id }, + }) count++ } } } + + // Drain the last partial batch. + await flush() return count } diff --git a/src/functions/smart-search.ts b/src/functions/smart-search.ts index fdeed273..c80b1f87 100644 --- a/src/functions/smart-search.ts +++ b/src/functions/smart-search.ts @@ -1,24 +1,32 @@ import type { ISdk } from "iii-sdk"; import type { + CompactLessonResult, CompactSearchResult, CompressedObservation, HybridSearchResult, + Lesson, } from "../types.js"; import { KV } from "../state/schema.js"; import { StateKV } from "../state/kv.js"; import { recordAccessBatch } from "./access-tracker.js"; import { logger } from "../logger.js"; +// Compact mode trims each lesson's content for at-a-glance display. The +// full content is fetched via memory_lesson_recall when the caller needs it. +const LESSON_CONTENT_PREVIEW_CHARS = 240; + export function registerSmartSearchFunction( sdk: ISdk, kv: StateKV, searchFn: (query: string, limit: number) => Promise, ): void { - sdk.registerFunction("mem::smart-search", + sdk.registerFunction("mem::smart-search", async (data: { query?: string; expandIds?: Array; limit?: number; + project?: string; + includeLessons?: boolean; }) => { if (data.expandIds && data.expandIds.length > 0) { @@ -68,7 +76,21 @@ export function registerSmartSearchFunction( } const limit = Math.max(1, Math.min(data.limit ?? 20, 100)); - const hybridResults = await searchFn(data.query, limit); + // Cap lesson results at a smaller number than observations: lessons + // are denser (curated insights) so 10 is usually plenty for a recall. + const lessonLimit = Math.min(limit, 10); + const includeLessons = data.includeLessons !== false; + + // Run observation hybrid-search and lesson recall in parallel so the + // extra lesson lookup adds no wallclock when the underlying calls + // can overlap. Lesson recall is best-effort: if mem::lesson-recall + // fails or returns unexpected shape, log + fall back to empty. + const [hybridResults, lessons] = await Promise.all([ + searchFn(data.query, limit), + includeLessons + ? recallLessons(sdk, data.query, lessonLimit, data.project) + : Promise.resolve([]), + ]); const compact: CompactSearchResult[] = hybridResults.map((r) => ({ obsId: r.observation.id, @@ -87,12 +109,51 @@ export function registerSmartSearchFunction( logger.info("Smart search compact", { query: data.query, results: compact.length, + lessons: lessons.length, }); - return { mode: "compact", results: compact }; + const response: { + mode: "compact"; + results: CompactSearchResult[]; + lessons?: CompactLessonResult[]; + } = { mode: "compact", results: compact }; + if (includeLessons) response.lessons = lessons; + return response; }, ); } +async function recallLessons( + sdk: ISdk, + query: string, + limit: number, + project?: string, +): Promise { + try { + const result = (await sdk.trigger({ + function_id: "mem::lesson-recall", + payload: { query, limit, project }, + })) as { success?: boolean; lessons?: Array }; + if (!result?.success || !Array.isArray(result.lessons)) return []; + return result.lessons.map((l) => ({ + lessonId: l.id, + content: + l.content.length > LESSON_CONTENT_PREVIEW_CHARS + ? l.content.slice(0, LESSON_CONTENT_PREVIEW_CHARS) + "…" + : l.content, + confidence: l.confidence, + score: l.score ?? l.confidence, + createdAt: l.createdAt, + project: l.project, + tags: l.tags ?? [], + })); + } catch (err) { + logger.warn("Smart search: mem::lesson-recall failed; returning empty lesson list", { + error: err instanceof Error ? err.message : String(err), + }); + return []; + } +} + async function findObservation( kv: StateKV, obsId: string, diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts index 140e0e12..80b29a09 100644 --- a/src/functions/summarize.ts +++ b/src/functions/summarize.ts @@ -7,7 +7,12 @@ import type { } from "../types.js"; import { KV } from "../state/schema.js"; import { StateKV } from "../state/kv.js"; -import { SUMMARY_SYSTEM, buildSummaryPrompt } from "../prompts/summary.js"; +import { + SUMMARY_SYSTEM, + buildSummaryPrompt, + REDUCE_SYSTEM, + buildReducePrompt, +} from "../prompts/summary.js"; import { getXmlTag, getXmlChildren } from "../prompts/xml.js"; import { SummaryOutputSchema } from "../eval/schemas.js"; import { validateOutput } from "../eval/validator.js"; @@ -16,6 +21,169 @@ import type { MetricsStore } from "../eval/metrics-store.js"; import { safeAudit } from "./audit.js"; import { logger } from "../logger.js"; +// Per-chunk observation budget when a session is too large to fit in one +// LLM call. Default ≈ 50k input tokens per chunk at ~110 tok/obs — fits +// comfortably in 128k-window models. Override via SUMMARIZE_CHUNK_SIZE. +const CHUNK_SIZE_DEFAULT = 400; +// Concurrent in-flight chunk calls. 6 keeps a 100-chunk session under +// iii's 180s function-invocation timeout at ~8s/call while staying +// inside generous-but-not-unlimited provider rate limits (well below +// OpenAI free tier's 500 RPM). High-throughput providers +// (Novita / DeepInfra / DeepSeek) typically allow 100+ concurrent — set +// SUMMARIZE_CHUNK_CONCURRENCY higher to cover ~1000+ chunk sessions. +const CHUNK_CONCURRENCY_DEFAULT = 6; +// Bail on the merged summary if more than this fraction of chunks fail +// to parse — a half-blind narrative is worse than a clean error. +const MAX_SKIP_RATIO = 0.5; + +function getChunkSize(): number { + const raw = process.env.SUMMARIZE_CHUNK_SIZE; + if (!raw) return CHUNK_SIZE_DEFAULT; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : CHUNK_SIZE_DEFAULT; +} + +function getChunkConcurrency(): number { + const raw = process.env.SUMMARIZE_CHUNK_CONCURRENCY; + if (!raw) return CHUNK_CONCURRENCY_DEFAULT; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : CHUNK_CONCURRENCY_DEFAULT; +} + +// One chunk call with retry-once. Returns null when both attempts fail — +// whether by parse failure, provider 4xx (content rejected by upstream +// filters), or transient network/5xx errors that didn't recover on retry. +// All failure modes are equivalent at this layer: the chunk is unusable, +// skip it and let the caller decide via the skip-ratio bailout whether +// the overall summary is still trustworthy. Errors that affect every +// chunk (auth, model down) will trip the bailout naturally. +async function summarizeChunkWithRetry( + provider: MemoryProvider, + chunk: CompressedObservation[], + sessionId: string, + project: string, + idx: number, + total: number, +): Promise { + for (let attempt = 1; attempt <= 2; attempt++) { + try { + const xml = await provider.summarize( + SUMMARY_SYSTEM, + buildSummaryPrompt(chunk), + ); + const parsed = parseSummaryXml(xml, sessionId, project, chunk.length); + if (parsed) return parsed; + logger.warn("Summarize chunk parse failed", { + sessionId, + chunk: `${idx + 1}/${total}`, + attempt, + }); + } catch (err) { + logger.warn("Summarize chunk LLM call failed", { + sessionId, + chunk: `${idx + 1}/${total}`, + attempt, + error: err instanceof Error ? err.message : String(err), + }); + } + } + return null; +} + +// Returns the final summary XML string. For sessions ≤ chunk size, this is +// a single LLM call (legacy behavior). For larger sessions, observations +// are split into chunks processed in parallel batches, each chunk retried +// once on parse failure, persistently-bad chunks skipped, and remaining +// partials merged via a reduce call. +async function produceSummaryXml( + provider: MemoryProvider, + compressed: CompressedObservation[], + sessionId: string, + project: string, +): Promise<{ + response: string; + mode: "single" | "chunked"; + chunks: number; + skipped?: number; +}> { + const chunkSize = getChunkSize(); + if (compressed.length <= chunkSize) { + const response = await provider.summarize( + SUMMARY_SYSTEM, + buildSummaryPrompt(compressed), + ); + return { response, mode: "single", chunks: 1 }; + } + + const chunks: CompressedObservation[][] = []; + for (let i = 0; i < compressed.length; i += chunkSize) { + chunks.push(compressed.slice(i, i + chunkSize)); + } + const concurrency = getChunkConcurrency(); + logger.info("Summarize chunking session", { + sessionId, + chunks: chunks.length, + chunkSize, + concurrency, + totalObservations: compressed.length, + }); + + // Sparse array preserves chunk → index mapping after parallel resolution, + // so the reduce step sees partials in chronological order even when some + // were skipped. + const partialByIdx: Array = new Array(chunks.length).fill(null); + for (let batchStart = 0; batchStart < chunks.length; batchStart += concurrency) { + const batch = chunks.slice(batchStart, batchStart + concurrency); + await Promise.all( + batch.map(async (chunk, j) => { + const idx = batchStart + j; + partialByIdx[idx] = await summarizeChunkWithRetry( + provider, + chunk, + sessionId, + project, + idx, + chunks.length, + ); + }), + ); + } + + const skipped = partialByIdx.filter((p) => p === null).length; + const partials = partialByIdx.filter((p): p is SessionSummary => p !== null); + + if (skipped > Math.floor(chunks.length * MAX_SKIP_RATIO)) { + throw new Error( + `too_many_chunks_skipped: ${skipped}/${chunks.length} chunks failed to parse after retry`, + ); + } + if (skipped > 0) { + logger.warn("Summarize chunks partially skipped", { + sessionId, + skipped, + total: chunks.length, + }); + } + + const reduceInput = partials.map((p) => { + const originalIdx = partialByIdx.indexOf(p); + return { + title: p.title, + narrative: p.narrative, + keyDecisions: p.keyDecisions, + filesModified: p.filesModified, + concepts: p.concepts, + obsRangeStart: originalIdx * chunkSize + 1, + obsRangeEnd: Math.min((originalIdx + 1) * chunkSize, compressed.length), + }; + }); + const response = await provider.summarize( + REDUCE_SYSTEM, + buildReducePrompt(reduceInput), + ); + return { response, mode: "chunked", chunks: chunks.length, skipped }; +} + function parseSummaryXml( xml: string, sessionId: string, @@ -85,8 +253,12 @@ export function registerSummarizeFunction( } try { - const prompt = buildSummaryPrompt(compressed); - const response = await provider.summarize(SUMMARY_SYSTEM, prompt); + const { response, mode, chunks } = await produceSummaryXml( + provider, + compressed, + sessionId, + session.project, + ); if (!response || !response.trim()) { const latencyMs = Date.now() - startMs; if (metricsStore) { @@ -95,8 +267,8 @@ export function registerSummarizeFunction( logger.warn("Empty provider response on summarize", { sessionId, provider: provider.name, - promptBytes: prompt.length, - systemBytes: SUMMARY_SYSTEM.length, + mode, + chunks, observationCount: compressed.length, }); return { success: false, error: "empty_provider_response" }; diff --git a/src/hooks/notification.ts b/src/hooks/notification.ts index 6c4b7b81..51347d50 100644 --- a/src/hooks/notification.ts +++ b/src/hooks/notification.ts @@ -29,9 +29,14 @@ async function main() { } if (isSdkChildContext(data)) return; - if (data.notification_type !== "permission_prompt") return; + const notificationType = data.notification_type ?? data.notificationType; + if (notificationType !== "permission_prompt") return; - const sessionId = (data.session_id as string) || "unknown"; + const rawSessionId = data.session_id ?? data.sessionId; + const sessionId = + typeof rawSessionId === "string" && rawSessionId.length > 0 + ? rawSessionId + : "unknown"; try { await fetch(`${REST_URL}/agentmemory/observe`, { @@ -44,7 +49,7 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), data: { - notification_type: data.notification_type, + notification_type: notificationType, title: data.title, message: data.message, }, diff --git a/src/hooks/post-tool-failure.ts b/src/hooks/post-tool-failure.ts index 337aebdd..7fa71d05 100644 --- a/src/hooks/post-tool-failure.ts +++ b/src/hooks/post-tool-failure.ts @@ -29,9 +29,12 @@ async function main() { } if (isSdkChildContext(data)) return; - if (data.is_interrupt) return; + if (data.is_interrupt || data.isInterrupt) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; + const toolName = data.tool_name ?? data.toolName; + const toolInput = data.tool_input ?? data.toolArgs; + const error = data.error ?? data.errorMessage; try { await fetch(`${REST_URL}/agentmemory/observe`, { @@ -44,15 +47,15 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), data: { - tool_name: data.tool_name, + tool_name: toolName, tool_input: - typeof data.tool_input === "string" - ? data.tool_input.slice(0, 4000) - : JSON.stringify(data.tool_input ?? "").slice(0, 4000), + typeof toolInput === "string" + ? toolInput.slice(0, 4000) + : JSON.stringify(toolInput ?? "").slice(0, 4000), error: - typeof data.error === "string" - ? data.error.slice(0, 4000) - : JSON.stringify(data.error ?? "").slice(0, 4000), + typeof error === "string" + ? error.slice(0, 4000) + : JSON.stringify(error ?? "").slice(0, 4000), }, }), signal: AbortSignal.timeout(3000), diff --git a/src/hooks/post-tool-use.ts b/src/hooks/post-tool-use.ts index 65afc8b1..c8319c48 100644 --- a/src/hooks/post-tool-use.ts +++ b/src/hooks/post-tool-use.ts @@ -30,9 +30,11 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; + const toolName = data.tool_name ?? data.toolName; + const toolInput = data.tool_input ?? data.toolArgs; - const { imageData, cleanOutput } = extractImageData(data.tool_output); + const { imageData, cleanOutput } = extractImageData(toolOutput(data)); try { await fetch(`${REST_URL}/agentmemory/observe`, { @@ -45,8 +47,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), data: { - tool_name: data.tool_name, - tool_input: data.tool_input, + tool_name: toolName, + tool_input: toolInput, tool_output: truncate(cleanOutput, 8000), ...(imageData ? { image_data: imageData } : {}), }, @@ -57,6 +59,17 @@ async function main() { } } +function toolOutput(data: Record): unknown { + if (data.tool_response !== undefined) return data.tool_response; + if (data.tool_output !== undefined) return data.tool_output; + const result = data.tool_result ?? data.toolResult; + if (typeof result === "object" && result !== null) { + const obj = result as Record; + return obj.text_result_for_llm ?? obj.textResultForLlm ?? result; + } + return result; +} + function isBase64Image(val: unknown): val is string { return typeof val === "string" && ( val.startsWith("data:image/") || diff --git a/src/hooks/pre-compact.ts b/src/hooks/pre-compact.ts index ea13ebec..77fb7a57 100644 --- a/src/hooks/pre-compact.ts +++ b/src/hooks/pre-compact.ts @@ -30,7 +30,7 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; const project = (data.cwd as string) || process.cwd(); if (process.env["CLAUDE_MEMORY_BRIDGE"] === "true") { diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 61f6c443..eea440c8 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -50,16 +50,28 @@ async function main() { if (isSdkChildContext(data)) return; - const toolName = data.tool_name as string; + const toolName = + typeof data.tool_name === "string" + ? data.tool_name + : typeof data.toolName === "string" + ? data.toolName + : undefined; if (!toolName) return; - const fileTools = ["Edit", "Write", "Read", "Glob", "Grep"]; - if (!fileTools.includes(toolName)) return; - - const toolInput = (data.tool_input || {}) as Record; + const normalizedToolName = toolName.toLowerCase(); + const fileTools = ["edit", "write", "create", "read", "view", "glob", "grep"]; + if (!fileTools.includes(normalizedToolName)) return; + + const rawToolInput = data.tool_input ?? data.toolArgs; + const toolInput = + typeof rawToolInput === "object" && + rawToolInput !== null && + !Array.isArray(rawToolInput) + ? (rawToolInput as Record) + : {}; const files: string[] = []; const fileKeys = - toolName === "Grep" + normalizedToolName === "grep" ? ["path", "file"] : ["file_path", "path", "file", "pattern"]; for (const key of fileKeys) { @@ -69,14 +81,18 @@ async function main() { if (files.length === 0) return; const terms: string[] = []; - if (toolName === "Grep" || toolName === "Glob") { + if (normalizedToolName === "grep" || normalizedToolName === "glob") { const pattern = toolInput["pattern"]; if (typeof pattern === "string" && pattern.length > 0) { terms.push(pattern); } } - const sessionId = (data.session_id as string) || "unknown"; + const rawSessionId = data.session_id || data.sessionId; + const sessionId = + typeof rawSessionId === "string" && rawSessionId.length > 0 + ? rawSessionId + : "unknown"; try { const res = await fetch(`${REST_URL}/agentmemory/enrich`, { diff --git a/src/hooks/prompt-submit.ts b/src/hooks/prompt-submit.ts index 971b11be..10265a77 100644 --- a/src/hooks/prompt-submit.ts +++ b/src/hooks/prompt-submit.ts @@ -30,7 +30,7 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; try { await fetch(`${REST_URL}/agentmemory/observe`, { @@ -42,7 +42,7 @@ async function main() { project: data.cwd || process.cwd(), cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), - data: { prompt: data.prompt }, + data: { prompt: data.prompt ?? data.userPrompt }, }), signal: AbortSignal.timeout(3000), }); diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts index 31bef22e..7efa550e 100644 --- a/src/hooks/session-end.ts +++ b/src/hooks/session-end.ts @@ -30,7 +30,7 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; try { await fetch(`${REST_URL}/agentmemory/session/end`, { @@ -76,4 +76,4 @@ async function main() { } } -main(); \ No newline at end of file +main(); diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index a6cefe41..444edc32 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -49,7 +49,8 @@ async function main() { if (isSdkChildContext(data)) return; const sessionId = - (data.session_id as string) || `ses_${Date.now().toString(36)}`; + ((data.session_id || data.sessionId) as string) || + `ses_${Date.now().toString(36)}`; const project = (data.cwd as string) || process.cwd(); const url = `${REST_URL}/agentmemory/session/start`; diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts index 1f2f5b8a..18ca371d 100644 --- a/src/hooks/stop.ts +++ b/src/hooks/stop.ts @@ -37,7 +37,7 @@ async function main() { return; } - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; try { await fetch(`${REST_URL}/agentmemory/summarize`, { @@ -51,4 +51,4 @@ async function main() { } } -main(); \ No newline at end of file +main(); diff --git a/src/hooks/subagent-start.ts b/src/hooks/subagent-start.ts index 3f730adb..3463da0b 100644 --- a/src/hooks/subagent-start.ts +++ b/src/hooks/subagent-start.ts @@ -38,7 +38,9 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; + const agentId = data.agent_id || data.agentName; + const agentType = data.agent_type || data.agentDisplayName || data.agentName; fetch(`${REST_URL}/agentmemory/observe`, { method: "POST", @@ -50,8 +52,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), data: { - agent_id: data.agent_id, - agent_type: data.agent_type, + agent_id: agentId, + agent_type: agentType, }, }), signal: AbortSignal.timeout(TIMEOUT_MS), diff --git a/src/hooks/subagent-stop.ts b/src/hooks/subagent-stop.ts index c555746e..90b99fd6 100644 --- a/src/hooks/subagent-stop.ts +++ b/src/hooks/subagent-stop.ts @@ -30,7 +30,9 @@ async function main() { if (isSdkChildContext(data)) return; - const sessionId = (data.session_id as string) || "unknown"; + const sessionId = ((data.session_id || data.sessionId) as string) || "unknown"; + const agentId = data.agent_id || data.agentName; + const agentType = data.agent_type || data.agentDisplayName || data.agentName; const lastMsg = typeof data.last_assistant_message === "string" ? data.last_assistant_message.slice(0, 4000) @@ -47,8 +49,8 @@ async function main() { cwd: data.cwd || process.cwd(), timestamp: new Date().toISOString(), data: { - agent_id: data.agent_id, - agent_type: data.agent_type, + agent_id: agentId, + agent_type: agentType, last_message: lastMsg, }, }), diff --git a/src/index.ts b/src/index.ts index b9b9e84d..704d4809 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { isAutoCompressEnabled, isConsolidationEnabled, isContextInjectionEnabled, + isDropStaleIndexEnabled, } from "./config.js"; import { createProvider, @@ -376,8 +377,7 @@ async function main() { .map((m) => `${m.obsId} (dim=${m.dim})`) .join(", "); const distinct = Array.from(seenDimensions).sort((a, b) => a - b).join(", "); - const dropStale = - process.env["AGENTMEMORY_DROP_STALE_INDEX"] === "true"; + const dropStale = isDropStaleIndexEnabled(); if (dropStale) { console.warn( `[agentmemory] Persisted vector index has ${mismatches.length} of ` + @@ -412,16 +412,24 @@ async function main() { const needsRebuild = bm25Index.size === 0; if (needsRebuild) { - const indexCount = await rebuildIndex(kv).catch((err) => { - console.warn(`[agentmemory] Failed to rebuild search index:`, err); - return 0; - }); - if (indexCount > 0) { - bootLog( - `Search index rebuilt: ${indexCount} entries`, - ); - indexPersistence.scheduleSave(); - } + // Fire-and-forget. rebuildIndex iterates every observation across + // every session and AWAITS an embedding-provider call per record. + // On a large corpus + rate-limited embedding endpoint that can + // take HOURS; awaiting it here blocks every subsequent boot step + // (including startViewerServer below, leaving the viewer port + // unbound for the duration). The index lazily fills in over time + // and search degrades gracefully — partial coverage > no viewer + // for hours. Errors still surface via the inner .catch. + void rebuildIndex(kv) + .then((indexCount) => { + if (indexCount > 0) { + bootLog(`Search index rebuilt: ${indexCount} entries`); + indexPersistence.scheduleSave(); + } + }) + .catch((err) => { + console.warn(`[agentmemory] Failed to rebuild search index:`, err); + }); } else { // Backfill memories into BM25 for users upgrading from <0.9.5: prior // versions of mem::remember never indexed memories, so the persisted diff --git a/src/mcp/standalone.ts b/src/mcp/standalone.ts index 86678a76..1413cbf8 100644 --- a/src/mcp/standalone.ts +++ b/src/mcp/standalone.ts @@ -89,6 +89,8 @@ interface Validated { files?: string[]; query?: string; limit?: number; + format?: string; + tokenBudget?: number; memoryIds?: string[]; reason?: string; } @@ -118,6 +120,17 @@ function validate(toolName: string, args: Record): Validated { } v.query = query.trim(); v.limit = parseLimit(args["limit"]); + const fmt = args["format"]; + if (typeof fmt === "string" && fmt.trim()) { + v.format = fmt.trim().toLowerCase(); + } + const budget = args["token_budget"]; + if (typeof budget === "number" && Number.isFinite(budget) && budget > 0) { + v.tokenBudget = Math.floor(budget); + } else if (typeof budget === "string" && budget.trim()) { + const n = Number(budget); + if (Number.isFinite(n) && n > 0) v.tokenBudget = Math.floor(n); + } return v; } case "memory_sessions": { @@ -159,11 +172,26 @@ async function handleProxy( }); return textResponse(result); } - case "memory_recall": + case "memory_recall": { + const body: Record = { + query: v.query, + limit: v.limit, + format: v.format ?? "full", + }; + if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget; + const result = await handle.call("/agentmemory/search", { + method: "POST", + body: JSON.stringify(body), + }); + return textResponse(result, true); + } case "memory_smart_search": { + const body: Record = { query: v.query, limit: v.limit }; + if (v.format != null) body["format"] = v.format; + if (v.tokenBudget != null) body["token_budget"] = v.tokenBudget; const result = await handle.call("/agentmemory/smart-search", { method: "POST", - body: JSON.stringify({ query: v.query, limit: v.limit }), + body: JSON.stringify(body), }); return textResponse(result, true); } diff --git a/src/mcp/transport.ts b/src/mcp/transport.ts index 766e6472..759ed019 100644 --- a/src/mcp/transport.ts +++ b/src/mcp/transport.ts @@ -1,5 +1,3 @@ -import { createInterface } from "node:readline"; - export interface JsonRpcRequest { jsonrpc: "2.0"; id?: string | number; @@ -19,6 +17,11 @@ export type RequestHandler = ( params: Record, ) => Promise; +export interface StdioMessageParser { + push: (chunk: Buffer | string) => void; + isFramed: () => boolean; +} + // JSON-RPC 2.0 notifications are messages without an `id` field. The spec // (and the MCP transport contract) requires the server to NOT send a // response for notifications. Some clients tolerate spurious responses; @@ -130,26 +133,131 @@ export async function processLine( } } +function findHeaderEnd(buffer: Buffer): { headerEnd: number; bodyStart: number } | null { + const crlf = buffer.indexOf("\r\n\r\n"); + const lf = buffer.indexOf("\n\n"); + if (crlf === -1 && lf === -1) return null; + if (crlf !== -1 && (lf === -1 || crlf <= lf)) { + return { headerEnd: crlf, bodyStart: crlf + 4 }; + } + return { headerEnd: lf, bodyStart: lf + 2 }; +} + +function parseContentLength(header: string): number | null { + for (const line of header.split(/\r?\n/)) { + const match = line.match(/^content-length:\s*(\d+)\s*$/i); + if (match) return Number(match[1]); + } + return null; +} + +export function formatResponse( + response: JsonRpcResponse, + framed: boolean, +): string | Buffer[] { + const body = JSON.stringify(response); + if (!framed) return `${body}\n`; + const bytes = Buffer.from(body, "utf8"); + return [Buffer.from(`Content-Length: ${bytes.length}\r\n\r\n`, "ascii"), bytes]; +} + +export function createMessageParser( + onMessage: (message: string) => void, + writeErr: (msg: string) => void = (msg) => process.stderr.write(msg), +): StdioMessageParser { + let buffer = Buffer.alloc(0); + let framed = false; + + function processBuffer(): void { + while (buffer.length > 0) { + if (buffer[0] === 10 || buffer[0] === 13) { + buffer = buffer.subarray(1); + continue; + } + + const preview = buffer.toString("ascii", 0, Math.min(buffer.length, 32)); + if (/^content-length:/i.test(preview)) { + const header = findHeaderEnd(buffer); + if (!header) return; + + const headerText = buffer.subarray(0, header.headerEnd).toString("ascii"); + const contentLength = parseContentLength(headerText); + if (contentLength === null) { + writeErr("[mcp-transport] missing Content-Length header\n"); + buffer = buffer.subarray(header.bodyStart); + continue; + } + + const messageEnd = header.bodyStart + contentLength; + if (buffer.length < messageEnd) return; + + framed = true; + const message = buffer.subarray(header.bodyStart, messageEnd).toString("utf8"); + buffer = buffer.subarray(messageEnd); + onMessage(message); + continue; + } + + const newline = buffer.indexOf(10); + if (newline === -1) return; + const line = buffer + .subarray(0, newline) + .toString("utf8") + .replace(/\r$/, ""); + buffer = buffer.subarray(newline + 1); + onMessage(line); + } + } + + return { + push(chunk) { + const bytes = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, "utf8"); + buffer = Buffer.concat([buffer, bytes]); + processBuffer(); + }, + isFramed() { + return framed; + }, + }; +} + export function createStdioTransport(handler: RequestHandler): { start: () => void; stop: () => void; } { - let rl: ReturnType | null = null; + let parser: StdioMessageParser | null = null; + let queue = Promise.resolve(); const writeResponse = (response: JsonRpcResponse) => { - process.stdout.write(JSON.stringify(response) + "\n"); + const formatted = formatResponse(response, parser?.isFramed() ?? false); + if (typeof formatted === "string") { + process.stdout.write(formatted); + return; + } + for (const chunk of formatted) { + process.stdout.write(chunk); + } }; - const onLine = (line: string) => processLine(line, handler, writeResponse); + const onData = (chunk: Buffer) => parser?.push(chunk); return { start() { - rl = createInterface({ input: process.stdin }); - rl.on("line", onLine); + parser = createMessageParser((message) => { + queue = queue.then(() => processLine(message, handler, writeResponse)); + void queue.catch((err) => { + process.stderr.write( + `[mcp-transport] request processing failed: ${ + err instanceof Error ? err.message : String(err) + }\n`, + ); + }); + }); + process.stdin.on("data", onData); }, stop() { - rl?.close(); - rl = null; + process.stdin.off("data", onData); + parser = null; }, }; } diff --git a/src/prompts/summary.ts b/src/prompts/summary.ts index f01b28b8..bd040212 100644 --- a/src/prompts/summary.ts +++ b/src/prompts/summary.ts @@ -36,3 +36,52 @@ export function buildSummaryPrompt(observations: Array<{ }) return `Session observations (${observations.length} total):\n\n${lines.join('\n\n---\n\n')}` } + +export const REDUCE_SYSTEM = `You are merging multiple partial summaries of the SAME coding session into one final session summary. The partials are chronological chunks of one continuous session — not separate sessions. + +Output EXACTLY this XML format with no additional text: + + + Short session title (max 100 chars) + 3-5 sentence narrative covering the whole session + + Key technical decision made + + + path/to/modified/file + + + key concept from session + + + +Rules: +- Synthesize a single narrative that reflects the whole arc, not a chunk-by-chunk recap +- Preserve every distinct decision across chunks +- Union (deduplicate) all files and concepts +- Title should capture the session's overall outcome` + +export function buildReducePrompt(partials: Array<{ + title: string + narrative: string + keyDecisions: string[] + filesModified: string[] + concepts: string[] + obsRangeStart: number + obsRangeEnd: number +}>): string { + const sections = partials.map((p, i) => { + const decisions = p.keyDecisions.map((d) => ` - ${d}`).join('\n') + const files = p.filesModified.map((f) => ` - ${f}`).join('\n') + const concepts = p.concepts.join(', ') + return `[Chunk ${i + 1} of ${partials.length} — obs ${p.obsRangeStart}-${p.obsRangeEnd}] +Title: ${p.title} +Narrative: ${p.narrative} +Decisions: +${decisions} +Files: +${files} +Concepts: ${concepts}` + }) + return `Partial summaries (${partials.length} chunks of one session, chronological):\n\n${sections.join('\n\n---\n\n')}` +} diff --git a/src/providers/openai.ts b/src/providers/openai.ts index bca2370f..88e10829 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -80,6 +80,13 @@ export class OpenAIProvider implements MemoryProvider { const body: Record = { model: this.model, max_tokens: this.maxTokens, + // OpenAI API spec defines `stream` as defaulting to false, so omitting + // it should yield a JSON response. Some OpenAI-compatible proxies + // (notably 9Router < 0.4.56 — see decolua/9router#1260) default to + // text/event-stream when `stream` is absent, which crashes the + // `response.json()` call below with `Unexpected token 'd', "data: {"id"...`. + // Send it explicitly so non-spec endpoints route to non-streaming too. + stream: false, messages: [ { role: "system", content: systemPrompt }, { role: "user", content: userPrompt }, diff --git a/src/triggers/api.ts b/src/triggers/api.ts index 083c2159..66eaadc2 100644 --- a/src/triggers/api.ts +++ b/src/triggers/api.ts @@ -9,6 +9,7 @@ import type { ResilientProvider } from "../providers/resilient.js"; import { VERSION } from "../version.js"; import { timingSafeCompare } from "../auth.js"; import { renderViewerDocument } from "../viewer/document.js"; +import { getBoundViewerPort, getViewerSkipped } from "../viewer/server.js"; import { MAX_FILES_UPPER_BOUND } from "../functions/replay.js"; import { isGraphExtractionEnabled, @@ -143,7 +144,7 @@ export function registerApiTriggers( sdk.registerFunction("api::liveness", async (): Promise => ({ status_code: 200, - body: { status: "ok", service: "agentmemory" }, + body: { status: "ok", service: "agentmemory", viewerPort: getBoundViewerPort(), viewerSkipped: getViewerSkipped() }, }), ); sdk.registerTrigger({ @@ -244,6 +245,8 @@ export function registerApiTriggers( health: health || null, functionMetrics, circuitBreaker, + viewerPort: getBoundViewerPort(), + viewerSkipped: getViewerSkipped(), }, }; }, diff --git a/src/types.ts b/src/types.ts index bc38a058..72e347b3 100644 --- a/src/types.ts +++ b/src/types.ts @@ -266,6 +266,16 @@ export interface CompactSearchResult { timestamp: string; } +export interface CompactLessonResult { + lessonId: string; + content: string; + confidence: number; + score: number; + createdAt: string; + project?: string; + tags: string[]; +} + export interface TimelineEntry { observation: CompressedObservation; sessionId: string; @@ -293,7 +303,7 @@ export interface ExportPagination { } export interface ExportData { - version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20"; + version: "0.3.0" | "0.4.0" | "0.5.0" | "0.6.0" | "0.6.1" | "0.7.0" | "0.7.2" | "0.7.3" | "0.7.4" | "0.7.5" | "0.7.6" | "0.7.7" | "0.7.9" | "0.8.0" | "0.8.1" | "0.8.2" | "0.8.3" | "0.8.4" | "0.8.5" | "0.8.6" | "0.8.7" | "0.8.8" | "0.8.9" | "0.8.10" | "0.8.11" | "0.8.12" | "0.8.13" | "0.9.0" | "0.9.1" | "0.9.2" | "0.9.3" | "0.9.4" | "0.9.5" | "0.9.6" | "0.9.7" | "0.9.8" | "0.9.9" | "0.9.10" | "0.9.11" | "0.9.12" | "0.9.13" | "0.9.14" | "0.9.15" | "0.9.16" | "0.9.17" | "0.9.18" | "0.9.19" | "0.9.20" | "0.9.21"; exportedAt: string; sessions: Session[]; observations: Record; diff --git a/src/version.ts b/src/version.ts index 35bfcbb0..8a1b6acf 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = "0.9.20"; +export const VERSION = "0.9.21"; diff --git a/src/viewer/index.html b/src/viewer/index.html index 4bd9293d..c2c200b8 100644 --- a/src/viewer/index.html +++ b/src/viewer/index.html @@ -1127,6 +1127,39 @@

agentmemory

}; } + // IME_SAFE_SEARCH_V2 + function bindImeSafeSearch(input, ms, onSearch) { + var composing = false; + var justCommitted = false; + var run = debounce(function(value) { onSearch(value); }, ms); + input.addEventListener('compositionstart', function() { composing = true; }); + input.addEventListener('compositionend', function() { + composing = false; + justCommitted = true; + onSearch(input.value); + setTimeout(function() { justCommitted = false; }, 0); + }); + input.addEventListener('input', function(e) { + if (composing || e.isComposing) return; + if (justCommitted) return; + run(input.value); + }); + } + function captureSearchFocus(ids) { + var a = document.activeElement; + if (!a || ids.indexOf(a.id) < 0) return null; + return { id: a.id, start: a.selectionStart, end: a.selectionEnd }; + } + function restoreSearchFocus(focus) { + if (!focus) return; + var el = document.getElementById(focus.id); + if (!el) return; + el.focus(); + if (typeof el.setSelectionRange === 'function') { + try { el.setSelectionRange(focus.start, focus.end); } catch (e) {} + } + } + async function api(path, opts) { try { var url = REST + '/agentmemory/' + path; @@ -1629,6 +1662,7 @@

agentmemory

html += ''; html += '
'; + var __focus = captureSearchFocus(['graph-search']); sb.innerHTML = html; sb.querySelectorAll('input[type="checkbox"]').forEach(function(cb) { @@ -1640,11 +1674,9 @@

agentmemory

var searchInput = document.getElementById('graph-search'); if (searchInput) { - searchInput.addEventListener('input', debounce(function() { - graphSearchTerm = this.value.toLowerCase(); - renderGraph(); - }, 150)); + bindImeSafeSearch(searchInput, 200, function(v){ graphSearchTerm = v.toLowerCase(); renderGraph(); }); } + restoreSearchFocus(__focus); } function initGraph() { @@ -2198,7 +2230,26 @@

agentmemory

var filtered = items.filter(function(m) { if (typeFilter && m.type !== typeFilter) return false; - if (search && !(m.title || '').toLowerCase().includes(search) && !(m.content || '').toLowerCase().includes(search)) return false; + const normalizedSearch = (search || '') + .normalize("NFKC") + .toLowerCase(); + + const normalizedTitle = (m.title || '') + .normalize("NFKC") + .toLowerCase(); + + const normalizedContent = (m.content || '') + .normalize("NFKC") + .toLowerCase(); + + if ( + search && + !normalizedTitle.includes(normalizedSearch) && + !normalizedContent.includes(normalizedSearch) + ) { + return false; + } + return true; }); @@ -2261,14 +2312,12 @@

agentmemory

html += ''; } + var __focus = captureSearchFocus(['mem-search']); el.innerHTML = html; var searchInput = document.getElementById('mem-search'); if (searchInput) { - searchInput.addEventListener('input', debounce(function() { - state.memories.search = this.value; - renderMemories(); - }, 200)); + bindImeSafeSearch(searchInput, 200, function(v){ state.memories.search = v; renderMemories(); }); } var typeSelect = document.getElementById('mem-type-filter'); if (typeSelect) { @@ -2277,6 +2326,7 @@

agentmemory

renderMemories(); }); } + restoreSearchFocus(__focus); } function deleteMemory(id, title) { @@ -2853,7 +2903,7 @@

agentmemory

html += ''; html += '
'; - html += ''; + html += ''; html += '' + items.length + ' lessons'; html += '
'; @@ -2882,7 +2932,11 @@

agentmemory

html += ''; } + var __focus = captureSearchFocus(['lessons-search']); el.innerHTML = html; + var __ls = document.getElementById('lessons-search'); + if (__ls) bindImeSafeSearch(__ls, 200, function(v){ state.lessons.search = v; renderLessons(); }); + restoreSearchFocus(__focus); } async function loadActions() { @@ -2912,8 +2966,8 @@

agentmemory

} var html = '
'; - html += ''; - html += ''; + html += ''; + html += ''; html += '' + items.length + ' crystals'; html += '
'; @@ -3060,7 +3120,11 @@

agentmemory

}); } + var __focus = captureSearchFocus(['crystals-search']); el.innerHTML = html; + var __cs = document.getElementById('crystals-search'); + if (__cs) bindImeSafeSearch(__cs, 200, function(v){ state.crystals.search = v; renderCrystals(); }); + restoreSearchFocus(__focus); } async function loadAudit() { diff --git a/src/viewer/server.ts b/src/viewer/server.ts index bd8e3c63..71598690 100644 --- a/src/viewer/server.ts +++ b/src/viewer/server.ts @@ -131,6 +131,16 @@ function readBody(req: IncomingMessage): Promise { const MAX_VIEWER_PORT_RETRIES = 10; +let boundViewerPort: number | null = null; +let viewerSkipped = false; + +export function getBoundViewerPort(): number | null { + return boundViewerPort; +} +export function getViewerSkipped(): boolean { + return viewerSkipped; +} + export function startViewerServer( port: number, _kv: unknown, @@ -138,6 +148,10 @@ export function startViewerServer( secret?: string, restPort?: number, ): Server { + // Reset exported runtime state for each start attempt. + boundViewerPort = null; + viewerSkipped = false; + const resolvedRestPort = restPort ?? port - 2; const requestedPort = port; // Computed lazily on first request — `port` may be 0 here (OS-assigned) @@ -227,6 +241,12 @@ export function startViewerServer( }; server.on("listening", () => { + const addr = server.address(); + boundViewerPort = + addr && typeof addr === "object" && "port" in addr + ? addr.port + : currentPort; + viewerSkipped = false; if (currentPort === requestedPort) { console.log(`[agentmemory] Viewer: http://localhost:${currentPort}`); } else { @@ -244,10 +264,14 @@ export function startViewerServer( return; } if (err.code === "EADDRINUSE") { + boundViewerPort = null; + viewerSkipped = true; console.warn( `[agentmemory] Viewer ports ${requestedPort}-${requestedPort + MAX_VIEWER_PORT_RETRIES} all in use, skipping viewer.`, ); } else { + boundViewerPort = null; + viewerSkipped = true; console.error(`[agentmemory] Viewer error:`, err.message); } }); diff --git a/test/cli-connect.test.ts b/test/cli-connect.test.ts index 99174dac..fbb8c2b5 100644 --- a/test/cli-connect.test.ts +++ b/test/cli-connect.test.ts @@ -10,6 +10,17 @@ import { } from "../src/cli/connect/index.js"; import type { ConnectAdapter } from "../src/cli/connect/types.js"; +const EXPECTED_COPILOT_MCP_COMMAND = + process.platform === "win32" + ? { + command: process.env["ComSpec"] || process.env["COMSPEC"] || "cmd.exe", + args: ["/d", "/s", "/c", "npx", "-y", "@agentmemory/mcp"], + } + : { + command: "npx", + args: ["-y", "@agentmemory/mcp"], + }; + describe("agentmemory connect — dispatcher", () => { it("resolves every known agent by lowercase name", () => { for (const name of knownAgents()) { @@ -29,10 +40,11 @@ describe("agentmemory connect — dispatcher", () => { expect(resolveAdapter("")).toBeNull(); }); - it("ships exactly the 8 agents specified by the spec", () => { + it("ships exactly the 9 agents specified by the spec", () => { expect(knownAgents().sort()).toEqual( [ "claude-code", + "copilot-cli", "codex", "cursor", "gemini-cli", @@ -42,7 +54,7 @@ describe("agentmemory connect — dispatcher", () => { "pi", ].sort(), ); - expect(ADAPTERS.length).toBe(8); + expect(ADAPTERS.length).toBe(9); }); it("every adapter exposes detect() and install()", () => { @@ -175,7 +187,193 @@ describe("agentmemory connect — claude-code adapter (mock filesystem)", () => if (result.kind === "installed") { expect(result.backupPath).toBeDefined(); expect(existsSync(result.backupPath!)).toBe(true); - expect(result.backupPath!).toContain(".agentmemory/backups"); + expect(result.backupPath!).toContain(join(".agentmemory", "backups")); + } + }); +}); + +describe("agentmemory connect — copilot-cli adapter (mock filesystem)", () => { + let tmpHome: string; + let originalHome: string | undefined; + let originalUserprofile: string | undefined; + let originalCopilotHome: string | undefined; + let importCounter = 0; + + beforeEach(() => { + tmpHome = mkdtempSync(join(tmpdir(), "am-connect-")); + originalHome = process.env["HOME"]; + originalUserprofile = process.env["USERPROFILE"]; + originalCopilotHome = process.env["COPILOT_HOME"]; + process.env["HOME"] = tmpHome; + process.env["USERPROFILE"] = tmpHome; + delete process.env["COPILOT_HOME"]; + vi.resetModules(); + }); + + afterEach(() => { + if (originalHome !== undefined) process.env["HOME"] = originalHome; + else delete process.env["HOME"]; + if (originalUserprofile !== undefined) + process.env["USERPROFILE"] = originalUserprofile; + else delete process.env["USERPROFILE"]; + if (originalCopilotHome !== undefined) + process.env["COPILOT_HOME"] = originalCopilotHome; + else delete process.env["COPILOT_HOME"]; + rmSync(tmpHome, { recursive: true, force: true }); + vi.resetModules(); + }); + + async function loadAdapter(): Promise { + const mod = await import( + "../src/cli/connect/copilot-cli.js?t=" + Date.now() + "-" + importCounter++ + ); + return (mod as { adapter: ConnectAdapter }).adapter; + } + + it("detect() returns false when ~/.copilot doesn't exist", async () => { + const a = await loadAdapter(); + expect(a.detect()).toBe(false); + }); + + it("install() writes mcpServers.agentmemory into ~/.copilot/mcp-config.json and is idempotent", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + + const a = await loadAdapter(); + expect(a.detect()).toBe(true); + + const first = await a.install({ dryRun: false, force: false }); + expect(first.kind).toBe("installed"); + + const config = JSON.parse( + readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"), + ); + expect(config.mcpServers.agentmemory).toEqual({ + type: "local", + ...EXPECTED_COPILOT_MCP_COMMAND, + env: { + AGENTMEMORY_URL: "${AGENTMEMORY_URL}", + AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}", + }, + tools: ["*"], + }); + + const second = await a.install({ dryRun: false, force: false }); + expect(second.kind).toBe("already-wired"); + }); + + it("honors COPILOT_HOME when locating mcp-config.json", async () => { + const customCopilotHome = join(tmpHome, "custom-copilot-home"); + process.env["COPILOT_HOME"] = customCopilotHome; + require("node:fs").mkdirSync(customCopilotHome, { recursive: true }); + + const a = await loadAdapter(); + expect(a.detect()).toBe(true); + + const result = await a.install({ dryRun: false, force: false }); + expect(result.kind).toBe("installed"); + expect(result.mutatedPath).toBe(join(customCopilotHome, "mcp-config.json")); + expect(existsSync(join(customCopilotHome, "mcp-config.json"))).toBe(true); + expect(existsSync(join(tmpHome, ".copilot", "mcp-config.json"))).toBe(false); + }); + + it("install() preserves unrelated top-level keys and mcpServers entries", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + writeFileSync( + join(tmpHome, ".copilot", "mcp-config.json"), + JSON.stringify({ + otherTopLevel: { keep: true }, + mcpServers: { other: { type: "local", command: "other" } }, + }), + ); + + const a = await loadAdapter(); + const result = await a.install({ dryRun: false, force: false }); + expect(result.kind).toBe("installed"); + + const config = JSON.parse( + readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"), + ); + expect(config.otherTopLevel).toEqual({ keep: true }); + expect(config.mcpServers.other).toEqual({ type: "local", command: "other" }); + expect(config.mcpServers.agentmemory.command).toBe( + EXPECTED_COPILOT_MCP_COMMAND.command, + ); + }); + + it("install() writes env passthrough block for AGENTMEMORY_URL + AGENTMEMORY_SECRET", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + + const a = await loadAdapter(); + const result = await a.install({ dryRun: false, force: false }); + expect(result.kind).toBe("installed"); + + const config = JSON.parse( + readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"), + ); + const entry = config.mcpServers.agentmemory; + expect(entry.env.AGENTMEMORY_URL).toBe("${AGENTMEMORY_URL}"); + expect(entry.env.AGENTMEMORY_SECRET).toBe("${AGENTMEMORY_SECRET}"); + }); + + it("install() with --force rewrites even when already wired", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + writeFileSync( + join(tmpHome, ".copilot", "mcp-config.json"), + JSON.stringify({ + mcpServers: { + agentmemory: { + type: "local", + ...EXPECTED_COPILOT_MCP_COMMAND, + env: { + AGENTMEMORY_URL: "${AGENTMEMORY_URL}", + AGENTMEMORY_SECRET: "${AGENTMEMORY_SECRET}", + }, + tools: ["memory_save"], + }, + }, + }), + ); + + const a = await loadAdapter(); + const result = await a.install({ dryRun: false, force: true }); + expect(result.kind).toBe("installed"); + + const config = JSON.parse( + readFileSync(join(tmpHome, ".copilot", "mcp-config.json"), "utf-8"), + ); + expect(config.mcpServers.agentmemory.tools).toEqual(["*"]); + }); + + it("install() with --dry-run does not mutate the file", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + const before = JSON.stringify({ mcpServers: {} }); + writeFileSync(join(tmpHome, ".copilot", "mcp-config.json"), before); + + const a = await loadAdapter(); + const result = await a.install({ dryRun: true, force: false }); + expect(result.kind).toBe("installed"); + + const after = readFileSync( + join(tmpHome, ".copilot", "mcp-config.json"), + "utf-8", + ); + expect(after).toBe(before); + }); + + it("install() creates a backup file when config pre-exists", async () => { + require("node:fs").mkdirSync(join(tmpHome, ".copilot"), { recursive: true }); + writeFileSync( + join(tmpHome, ".copilot", "mcp-config.json"), + JSON.stringify({ mcpServers: {} }), + ); + + const a = await loadAdapter(); + const result = await a.install({ dryRun: false, force: false }); + expect(result.kind).toBe("installed"); + if (result.kind === "installed") { + expect(result.backupPath).toBeDefined(); + expect(existsSync(result.backupPath!)).toBe(true); + expect(result.backupPath!).toContain(join(".agentmemory", "backups")); } }); }); diff --git a/test/cli-onboarding.test.ts b/test/cli-onboarding.test.ts new file mode 100644 index 00000000..9779a7e9 --- /dev/null +++ b/test/cli-onboarding.test.ts @@ -0,0 +1,94 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const prompts = vi.hoisted(() => ({ + note: vi.fn(), + multiselect: vi.fn(async () => { + throw new Error("interactive multiselect should not run in non-TTY onboarding"); + }), + select: vi.fn(async () => { + throw new Error("interactive select should not run in non-TTY onboarding"); + }), + confirm: vi.fn(async () => true), + isCancel: vi.fn(() => false), + cancel: vi.fn(), + log: { + warn: vi.fn(), + step: vi.fn(), + error: vi.fn(), + }, +})); + +vi.mock("@clack/prompts", () => prompts); +vi.mock("../src/cli/connect/index.js", () => ({ + resolveAdapter: vi.fn(), + runAdapter: vi.fn(), +})); + +const ORIGINAL_HOME = process.env["HOME"]; +const ORIGINAL_USERPROFILE = process.env["USERPROFILE"]; +const stdinTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdin, "isTTY"); +const stdoutTtyDescriptor = Object.getOwnPropertyDescriptor(process.stdout, "isTTY"); + +let sandboxHome: string; + +function setTTY(value: boolean): void { + Object.defineProperty(process.stdin, "isTTY", { value, configurable: true }); + Object.defineProperty(process.stdout, "isTTY", { value, configurable: true }); +} + +function restoreTTY(): void { + if (stdinTtyDescriptor) Object.defineProperty(process.stdin, "isTTY", stdinTtyDescriptor); + else delete (process.stdin as NodeJS.ReadStream & { isTTY?: boolean }).isTTY; + if (stdoutTtyDescriptor) Object.defineProperty(process.stdout, "isTTY", stdoutTtyDescriptor); + else delete (process.stdout as NodeJS.WriteStream & { isTTY?: boolean }).isTTY; +} + +async function freshOnboarding() { + vi.resetModules(); + return await import("../src/cli/onboarding.js"); +} + +describe("cli onboarding", () => { + beforeEach(() => { + sandboxHome = mkdtempSync(join(tmpdir(), "agentmemory-onboarding-")); + process.env["HOME"] = sandboxHome; + process.env["USERPROFILE"] = sandboxHome; + setTTY(false); + vi.clearAllMocks(); + }); + + afterEach(() => { + restoreTTY(); + if (ORIGINAL_HOME === undefined) delete process.env["HOME"]; + else process.env["HOME"] = ORIGINAL_HOME; + if (ORIGINAL_USERPROFILE === undefined) delete process.env["USERPROFILE"]; + else process.env["USERPROFILE"] = ORIGINAL_USERPROFILE; + rmSync(sandboxHome, { recursive: true, force: true }); + }); + + it("does not prompt and records default preferences when onboarding runs without a TTY", async () => { + const { runOnboarding } = await freshOnboarding(); + + const result = await runOnboarding(); + + expect(result).toEqual({ agents: [], provider: null }); + expect(prompts.multiselect).not.toHaveBeenCalled(); + expect(prompts.select).not.toHaveBeenCalled(); + expect(prompts.confirm).not.toHaveBeenCalled(); + + const preferencesPath = join(sandboxHome, ".agentmemory", "preferences.json"); + expect(existsSync(preferencesPath)).toBe(true); + const preferences = JSON.parse(readFileSync(preferencesPath, "utf-8")); + expect(preferences).toMatchObject({ + schemaVersion: 1, + lastAgent: null, + lastAgents: [], + lastProvider: null, + skipSplash: true, + }); + expect(typeof preferences.firstRunAt).toBe("string"); + }); +}); diff --git a/test/codex-connect-hooks.test.ts b/test/codex-connect-hooks.test.ts new file mode 100644 index 00000000..75accbee --- /dev/null +++ b/test/codex-connect-hooks.test.ts @@ -0,0 +1,137 @@ +import { describe, it, expect } from "vitest"; +import { writeFileSync, readFileSync, mkdirSync, rmSync } from "node:fs"; +import { join, resolve } from "node:path"; +import { tmpdir } from "node:os"; +import { + buildMergedHooks, + findPluginRoot, + type HookManifest, +} from "../src/cli/connect/codex-hooks.js"; + +const PLUGIN_ROOT = resolve(__dirname, "..", "plugin"); + +describe("findPluginRoot", () => { + it("locates the bundled plugin/ directory from src/cli/connect/", () => { + const root = findPluginRoot(); + expect(root).toBe(PLUGIN_ROOT); + }); +}); + +describe("buildMergedHooks", () => { + it("rewrites ${CLAUDE_PLUGIN_ROOT} to absolute pluginRoot in every command", () => { + const merged = buildMergedHooks(null, PLUGIN_ROOT); + for (const entries of Object.values(merged.hooks)) { + for (const entry of entries) { + for (const handler of entry.hooks) { + expect(handler.command).not.toContain("${CLAUDE_PLUGIN_ROOT}"); + expect(handler.command).toContain(`${PLUGIN_ROOT}/scripts/`); + } + } + } + }); + + it("preserves matchers from the bundled manifest (e.g. PreToolUse)", () => { + const merged = buildMergedHooks(null, PLUGIN_ROOT); + const preToolUse = merged.hooks["PreToolUse"]; + expect(preToolUse).toBeDefined(); + expect(preToolUse!.length).toBeGreaterThan(0); + expect(preToolUse![0].matcher).toBe("Edit|Write|Read|Glob|Grep"); + }); + + it("includes all six expected lifecycle events", () => { + const merged = buildMergedHooks(null, PLUGIN_ROOT); + for (const event of [ + "SessionStart", + "UserPromptSubmit", + "PreToolUse", + "PostToolUse", + "PreCompact", + "Stop", + ]) { + expect(Object.keys(merged.hooks)).toContain(event); + } + }); + + it("appends to existing user hooks without dropping them", () => { + const existing: HookManifest = { + hooks: { + SessionStart: [ + { + hooks: [{ type: "command", command: "echo user-custom" }], + }, + ], + UserPromptSubmit: [ + { + hooks: [{ type: "command", command: "echo another-user-hook" }], + }, + ], + }, + }; + const merged = buildMergedHooks(existing, PLUGIN_ROOT); + const sessionStart = merged.hooks["SessionStart"]!; + const userHook = sessionStart.find((e) => + e.hooks.some((h) => h.command === "echo user-custom"), + ); + expect(userHook, "user's SessionStart hook should survive").toBeDefined(); + const ours = sessionStart.find((e) => + e.hooks.some((h) => h.command.includes(`${PLUGIN_ROOT}/scripts/session-start.mjs`)), + ); + expect(ours, "agentmemory SessionStart hook should be appended").toBeDefined(); + }); + + it("re-install strips previous agentmemory entries (idempotent by script path)", () => { + const first = buildMergedHooks(null, PLUGIN_ROOT); + const second = buildMergedHooks(first, PLUGIN_ROOT); + for (const event of Object.keys(first.hooks)) { + expect( + second.hooks[event]!.length, + `${event} should not double after second install`, + ).toBe(first.hooks[event]!.length); + } + }); + + it("re-install preserves unrelated user entries", () => { + const userEntry = { + hooks: [{ type: "command", command: "echo user-untouchable" }], + }; + const withUser: HookManifest = { + hooks: { + SessionStart: [userEntry], + Stop: [{ hooks: [{ type: "command", command: "echo also-user" }] }], + }, + }; + const installed = buildMergedHooks(withUser, PLUGIN_ROOT); + const reinstalled = buildMergedHooks(installed, PLUGIN_ROOT); + expect( + reinstalled.hooks["SessionStart"]!.some((e) => + e.hooks.some((h) => h.command === "echo user-untouchable"), + ), + ).toBe(true); + expect( + reinstalled.hooks["Stop"]!.some((e) => + e.hooks.some((h) => h.command === "echo also-user"), + ), + ).toBe(true); + }); + + it("handles empty existing manifest object", () => { + const merged = buildMergedHooks({ hooks: {} }, PLUGIN_ROOT); + expect(Object.keys(merged.hooks).length).toBeGreaterThan(0); + }); +}); + +describe("buildMergedHooks file round-trip", () => { + it("produces JSON that parses back to a structurally equivalent manifest", () => { + const dir = join(tmpdir(), `agentmemory-codex-hooks-${process.pid}-${Date.now()}`); + mkdirSync(dir, { recursive: true }); + const path = join(dir, "hooks.json"); + try { + const merged = buildMergedHooks(null, PLUGIN_ROOT); + writeFileSync(path, `${JSON.stringify(merged, null, 2)}\n`, "utf-8"); + const reread = JSON.parse(readFileSync(path, "utf-8")) as HookManifest; + expect(Object.keys(reread.hooks).sort()).toEqual(Object.keys(merged.hooks).sort()); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/test/codex-plugin.test.ts b/test/codex-plugin.test.ts index bb380876..bbbd88db 100644 --- a/test/codex-plugin.test.ts +++ b/test/codex-plugin.test.ts @@ -9,6 +9,29 @@ function readJson(path: string): T { return JSON.parse(readFileSync(path, "utf-8")) as T; } +type HookHandler = { type: string; command: string }; +type HookEntry = { hooks: HookHandler[] }; + +function hookCommands(path: string): string[] { + const manifest = readJson<{ hooks: Record }>(path); + return Object.values(manifest.hooks).flatMap((entries) => + entries.flatMap((entry) => entry.hooks.map((handler) => handler.command)), + ); +} + +describe("Plugin hook manifests", () => { + it("quote plugin script paths so roots with spaces stay intact", () => { + for (const manifest of ["hooks.json", "hooks.codex.json"]) { + const commands = hookCommands(join(pluginRoot, "hooks", manifest)); + expect(commands.length, `${manifest} should contain hook commands`).toBeGreaterThan(0); + + for (const command of commands) { + expect(command).toMatch(/^node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/[^\s"]+\.mjs"$/); + } + } + }); +}); + describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => { it("ships .codex-plugin/plugin.json with kebab-case name + version + references", () => { const manifestPath = join(pluginRoot, ".codex-plugin/plugin.json"); @@ -72,8 +95,6 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => { }); it("hook command scripts referenced in hooks.codex.json exist on disk", () => { - type HookHandler = { type: string; command: string }; - type HookEntry = { hooks: HookHandler[] }; const hooks = readJson<{ hooks: Record }>( join(pluginRoot, "hooks/hooks.codex.json"), ); @@ -81,7 +102,7 @@ describe("Codex plugin manifest (developers.openai.com/codex/plugins)", () => { for (const entries of Object.values(hooks.hooks)) { for (const entry of entries) { for (const handler of entry.hooks) { - const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s]+)/); + const match = handler.command.match(/\$\{CLAUDE_PLUGIN_ROOT\}\/(scripts\/[^\s"]+)/); if (match) scriptRefs.add(match[1]); } } diff --git a/test/copilot-plugin.test.ts b/test/copilot-plugin.test.ts new file mode 100644 index 00000000..e4121688 --- /dev/null +++ b/test/copilot-plugin.test.ts @@ -0,0 +1,377 @@ +import { describe, expect, it } from "vitest"; +import { readFileSync, existsSync } from "node:fs"; +import { join, resolve } from "node:path"; +import { createServer } from "node:http"; +import { spawn } from "node:child_process"; + +const repoRoot = resolve(__dirname, ".."); +const pluginRoot = join(repoRoot, "plugin"); + +function readJson(path: string): T { + return JSON.parse(readFileSync(path, "utf-8")) as T; +} + +const SUPPORTED_COPILOT_EVENTS = new Set([ + "sessionStart", + "userPromptSubmitted", + "preToolUse", + "postToolUse", + "postToolUseFailure", + "preCompact", + "agentStop", + "sessionEnd", + "subagentStart", + "subagentStop", + "notification", +]); + +const REQUIRED_MINIMUM_EVENTS = [ + "sessionStart", + "userPromptSubmitted", + "preToolUse", + "postToolUse", + "agentStop", +]; + +const KNOWN_SKILL_DIRS = [ + "recall", + "remember", + "session-history", + "forget", + "handoff", + "recap", + "commit-context", + "commit-history", +]; + +describe("Copilot plugin manifest (plugin/plugin.json)", () => { + it("manifest exists with kebab-case name, version, and required fields", () => { + const manifestPath = join(pluginRoot, "plugin.json"); + expect(existsSync(manifestPath)).toBe(true); + const manifest = readJson<{ + name: string; + version: string; + description?: string; + skills?: string; + mcpServers?: string; + hooks?: string; + }>(manifestPath); + expect(manifest.name).toBe("agentmemory"); + expect(manifest.name).toMatch(/^[a-z][a-z0-9-]*$/); + expect(manifest.version).toMatch(/^\d+\.\d+\.\d+/); + expect(manifest.skills).toBeDefined(); + expect(manifest.mcpServers).toBeDefined(); + expect(manifest.hooks).toBeDefined(); + }); + + it("manifest version matches main package.json", () => { + const pkgVer = readJson<{ version: string }>(join(repoRoot, "package.json")).version; + const pluginVer = readJson<{ version: string }>( + join(pluginRoot, "plugin.json"), + ).version; + expect(pluginVer).toBe(pkgVer); + }); + + it("all referenced manifest paths resolve to existing files / directories", () => { + const manifest = readJson<{ skills: string; mcpServers: string; hooks: string }>( + join(pluginRoot, "plugin.json"), + ); + const manifestDir = pluginRoot; + expect(existsSync(resolve(manifestDir, manifest.skills))).toBe(true); + expect(existsSync(resolve(manifestDir, manifest.mcpServers))).toBe(true); + expect(existsSync(resolve(manifestDir, manifest.hooks))).toBe(true); + }); + + it("skills path resolves and contains all known skill directories", () => { + const manifest = readJson<{ skills: string }>(join(pluginRoot, "plugin.json")); + const manifestDir = pluginRoot; + const skillsPath = resolve(manifestDir, manifest.skills); + for (const skill of KNOWN_SKILL_DIRS) { + expect( + existsSync(join(skillsPath, skill)), + `missing skill directory: ${skill}`, + ).toBe(true); + } + }); +}); + +describe("Copilot MCP config (.mcp.copilot.json)", () => { + it("file exists with expected shape", () => { + const mcpPath = join(pluginRoot, ".mcp.copilot.json"); + expect(existsSync(mcpPath)).toBe(true); + const config = readJson<{ + mcpServers: { + agentmemory: { + type: string; + command: string; + args: string[]; + env: Record; + tools: string[]; + }; + }; + }>(mcpPath); + const server = config.mcpServers.agentmemory; + expect(server.type).toBe("local"); + expect(server.command).toBe("npx"); + expect(server.args).toEqual(["-y", "@agentmemory/mcp"]); + expect(server.env["AGENTMEMORY_URL"]).toBe("${AGENTMEMORY_URL}"); + expect(server.env["AGENTMEMORY_SECRET"]).toBe("${AGENTMEMORY_SECRET}"); + expect(server.tools).toContain("*"); + }); +}); + +describe("Copilot hooks config (hooks/hooks.copilot.json)", () => { + type HookEntry = { + type: string; + command?: string; + bash?: string; + powershell?: string; + matcher?: string; + }; + + function loadHooks() { + return readJson<{ version: number; hooks: Record }>( + join(pluginRoot, "hooks/hooks.copilot.json"), + ); + } + + it("has top-level version === 1 and hooks object", () => { + const config = loadHooks(); + expect(config.version).toBe(1); + expect(config.hooks).toBeDefined(); + expect(typeof config.hooks).toBe("object"); + }); + + it("contains only supported Copilot event names", () => { + const config = loadHooks(); + for (const event of Object.keys(config.hooks)) { + expect( + SUPPORTED_COPILOT_EVENTS.has(event), + `unsupported event "${event}" in hooks.copilot.json`, + ).toBe(true); + } + }); + + it("contains all required minimum events", () => { + const config = loadHooks(); + const events = Object.keys(config.hooks); + for (const event of REQUIRED_MINIMUM_EVENTS) { + expect(events, `missing required event: ${event}`).toContain(event); + } + }); + + it("PreToolUse entry has the correct matcher", () => { + const config = loadHooks(); + const preToolEntries = config.hooks["preToolUse"]; + expect(preToolEntries).toBeDefined(); + const withMatcher = preToolEntries.find( + (e) => e.matcher === "edit|write|create|read|view|glob|grep", + ); + expect( + withMatcher, + "PreToolUse must have matcher edit|write|create|read|view|glob|grep", + ).toBeDefined(); + }); + + it("every handler has type === 'command' and exactly one of command/bash/powershell", () => { + const config = loadHooks(); + for (const [event, entries] of Object.entries(config.hooks)) { + for (const handler of entries) { + expect(handler.type, `${event} handler type`).toBe("command"); + const commandFields = [handler.command, handler.bash, handler.powershell].filter( + (v): v is string => typeof v === "string" && v.trim().length > 0, + ); + expect( + commandFields.length, + `${event} handler must have exactly one of command/bash/powershell`, + ).toBe(1); + } + } + }); + + it("every referenced script exists on disk", () => { + const config = loadHooks(); + const scriptRefs = new Set(); + for (const entries of Object.values(config.hooks)) { + for (const handler of entries) { + const cmd = handler.command ?? handler.bash ?? handler.powershell ?? ""; + const match = cmd.match(/\$\{(?:COPILOT_PLUGIN_ROOT|CLAUDE_PLUGIN_ROOT)\}\/(scripts\/[^\s]+)/); + if (match) scriptRefs.add(match[1]); + } + } + expect(scriptRefs.size).toBeGreaterThan(0); + for (const rel of scriptRefs) { + expect(existsSync(join(pluginRoot, rel)), `missing hook script: ${rel}`).toBe(true); + } + }); +}); + +describe("Copilot hook scripts", () => { + type ObservedRequest = { path: string; body: Record }; + + async function runHook( + script: string, + payload: Record, + env: Record = {}, + ): Promise<{ requests: ObservedRequest[]; stdout: string }> { + const requests: ObservedRequest[] = []; + const server = createServer((req, res) => { + let raw = ""; + req.on("data", (chunk) => { + raw += chunk; + }); + req.on("end", () => { + requests.push({ + path: req.url ?? "", + body: raw ? (JSON.parse(raw) as Record) : {}, + }); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ context: "remembered context" })); + }); + }); + + await new Promise((resolveServer) => { + server.listen(0, "127.0.0.1", resolveServer); + }); + + const address = server.address(); + if (!address || typeof address === "string") { + server.close(); + throw new Error("test server did not bind to a TCP port"); + } + + try { + const child = spawn(process.execPath, [join(pluginRoot, script)], { + env: { + ...process.env, + AGENTMEMORY_URL: `http://127.0.0.1:${address.port}`, + AGENTMEMORY_SECRET: "", + ...env, + }, + stdio: ["pipe", "pipe", "pipe"], + }); + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (chunk) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + child.stdin.end(JSON.stringify(payload)); + + const exitCode = await new Promise((resolveExit, reject) => { + const timeout = setTimeout(() => { + child.kill(); + reject(new Error(`hook ${script} timed out`)); + }, 5000); + child.on("error", reject); + child.on("close", (code) => { + clearTimeout(timeout); + resolveExit(code); + }); + }); + + expect(exitCode, stderr).toBe(0); + return { requests, stdout }; + } finally { + await new Promise((resolveClose) => { + server.close(() => resolveClose()); + }); + } + } + + it("session-start accepts Copilot camelCase sessionId", async () => { + const result = await runHook( + "scripts/session-start.mjs", + { sessionId: "copilot-session", cwd: "C:\\repo" }, + { AGENTMEMORY_INJECT_CONTEXT: "true" }, + ); + + expect(result.stdout).toBe("remembered context"); + expect(result.requests[0]?.path).toBe("/agentmemory/session/start"); + expect(result.requests[0]?.body).toMatchObject({ + sessionId: "copilot-session", + project: "C:\\repo", + cwd: "C:\\repo", + }); + }); + + it("pre-tool-use narrows Copilot sessionId to strings", async () => { + const result = await runHook( + "scripts/pre-tool-use.mjs", + { + sessionId: 123, + toolName: "read", + toolArgs: { path: "src/index.ts" }, + }, + { AGENTMEMORY_INJECT_CONTEXT: "true" }, + ); + + expect(result.stdout).toBe("remembered context"); + expect(result.requests[0]?.path).toBe("/agentmemory/enrich"); + expect(result.requests[0]?.body).toMatchObject({ + sessionId: "unknown", + files: ["src/index.ts"], + terms: [], + toolName: "read", + }); + }); + + it("prompt-submit accepts Copilot camelCase prompt payload", async () => { + const result = await runHook("scripts/prompt-submit.mjs", { + sessionId: "copilot-session", + cwd: "C:\\repo", + userPrompt: "remember this prompt", + }); + + expect(result.requests[0]?.path).toBe("/agentmemory/observe"); + expect(result.requests[0]?.body).toMatchObject({ + hookType: "prompt_submit", + sessionId: "copilot-session", + data: { prompt: "remember this prompt" }, + }); + }); + + it("post-tool-failure accepts Copilot camelCase tool and error payloads", async () => { + const result = await runHook("scripts/post-tool-failure.mjs", { + sessionId: "copilot-session", + cwd: "C:\\repo", + toolName: "edit", + toolArgs: { filePath: "src/index.ts" }, + errorMessage: "failed", + }); + + expect(result.requests[0]?.path).toBe("/agentmemory/observe"); + expect(result.requests[0]?.body).toMatchObject({ + hookType: "post_tool_failure", + sessionId: "copilot-session", + data: { + tool_name: "edit", + tool_input: JSON.stringify({ filePath: "src/index.ts" }), + error: "failed", + }, + }); + }); + + it("notification accepts Copilot camelCase notificationType", async () => { + const result = await runHook("scripts/notification.mjs", { + sessionId: "copilot-session", + cwd: "C:\\repo", + notificationType: "permission_prompt", + title: "Tool approval", + message: "Approve edit", + }); + + expect(result.requests[0]?.path).toBe("/agentmemory/observe"); + expect(result.requests[0]?.body).toMatchObject({ + hookType: "notification", + sessionId: "copilot-session", + data: { + notification_type: "permission_prompt", + title: "Tool approval", + message: "Approve edit", + }, + }); + }); +}); diff --git a/test/diagnostics.test.ts b/test/diagnostics.test.ts index d2dc706e..053e1c40 100644 --- a/test/diagnostics.test.ts +++ b/test/diagnostics.test.ts @@ -195,7 +195,10 @@ describe("Diagnostics Functions", () => { }; expect(result.success).toBe(true); - expect(result.summary.pass).toBe(8); + // 14 = 8 original (actions, leases, sentinels, sketches, signals, + // sessions, memories, mesh) + 6 added in #lesson-visibility + // (lessons, summaries, semantic, procedural, crystals, insights). + expect(result.summary.pass).toBe(14); expect(result.summary.warn).toBe(0); expect(result.summary.fail).toBe(0); expect(result.summary.fixable).toBe(0); @@ -636,4 +639,229 @@ describe("Diagnostics Functions", () => { expect(unchanged!.status).toBe("blocked"); }); }); + + describe("per-store tally categories (#lesson-visibility)", () => { + it("lessons category: passes with valid live lessons + ignores tombstoned", async () => { + await kv.set(KV.lessons, "lsn_live", { + id: "lsn_live", content: "x", context: "", confidence: 0.8, + reinforcements: 0, source: "manual", sourceIds: [], tags: [], + createdAt: "", updatedAt: "", decayRate: 0.05, + }); + await kv.set(KV.lessons, "lsn_tomb", { + id: "lsn_tomb", content: "x", context: "", confidence: 0.5, + reinforcements: 0, source: "manual", sourceIds: [], tags: [], + createdAt: "", updatedAt: "", decayRate: 0.05, deleted: true, + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["lessons"], + })) as { checks: DiagnosticCheck[] }; + + const ok = result.checks.find((c) => c.name === "lessons-ok"); + expect(ok?.status).toBe("pass"); + expect(ok?.message).toMatch(/All 1 lessons.*1 tombstoned/); + }); + + it("lessons category: warns on out-of-range confidence", async () => { + await kv.set(KV.lessons, "lsn_bad", { + id: "lsn_bad", content: "x", context: "", confidence: 1.5, + reinforcements: 0, source: "manual", sourceIds: [], tags: [], + createdAt: "", updatedAt: "", decayRate: 0.05, + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["lessons"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:")); + expect(warn?.status).toBe("warn"); + }); + + it("summaries category: warns on missing title", async () => { + await kv.set(KV.summaries, "ses_1", { + sessionId: "ses_1", project: "p", createdAt: "", title: "", + narrative: "n", keyDecisions: [], filesModified: [], concepts: [], + observationCount: 1, + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["summaries"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:")); + expect(warn?.status).toBe("warn"); + }); + + it("procedural category: warns on empty steps", async () => { + await kv.set(KV.procedural, "proc_1", { + id: "proc_1", name: "noop", steps: [], triggerCondition: "x", + frequency: 1, sourceSessionIds: [], strength: 0.5, + createdAt: "", updatedAt: "", + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["procedural"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("procedural-empty-steps:")); + expect(warn?.status).toBe("warn"); + }); + + it("crystals category: warns on empty narrative", async () => { + await kv.set(KV.crystals, "cry_1", { + id: "cry_1", narrative: "", keyOutcomes: [], filesAffected: [], + lessons: [], sourceActionIds: [], createdAt: "", + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["crystals"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:")); + expect(warn?.status).toBe("warn"); + }); + + it("insights category: warns on out-of-range confidence", async () => { + await kv.set(KV.insights, "ins_bad", { + id: "ins_bad", title: "t", content: "c", confidence: -0.1, + reinforcements: 0, sourceConceptCluster: [], sourceMemoryIds: [], + sourceLessonIds: [], sourceCrystalIds: [], tags: [], + createdAt: "", updatedAt: "", decayRate: 0.05, + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["insights"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("insight-bad-confidence:")); + expect(warn?.status).toBe("warn"); + }); + + it("semantic category: warns on out-of-range confidence", async () => { + await kv.set(KV.semantic, "sem_bad", { + id: "sem_bad", fact: "f", confidence: 2.0, sourceSessionIds: [], + sourceMemoryIds: [], accessCount: 0, lastAccessedAt: "", + strength: 0, createdAt: "", updatedAt: "", + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["semantic"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("semantic-bad-confidence:")); + expect(warn?.status).toBe("warn"); + }); + + it("categories filter accepts new categories and skips others", async () => { + const result = (await sdk.trigger("mem::diagnose", { + categories: ["lessons", "summaries"], + })) as { checks: DiagnosticCheck[] }; + + expect(result.checks.every((c) => c.category === "lessons" || c.category === "summaries")).toBe(true); + expect(result.checks.some((c) => c.category === "lessons")).toBe(true); + expect(result.checks.some((c) => c.category === "summaries")).toBe(true); + }); + + describe("defensive row-shape handling (CodeRabbit #473 review)", () => { + it("NaN/Infinity confidence on a lesson is flagged as warn, not silently passed", async () => { + await kv.set(KV.lessons, "lsn_nan", { + id: "lsn_nan", content: "x", context: "", confidence: NaN, + reinforcements: 0, source: "manual", sourceIds: [], tags: [], + createdAt: "", updatedAt: "", decayRate: 0.05, + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["lessons"], + })) as { checks: DiagnosticCheck[] }; + + const warn = result.checks.find((c) => c.name.startsWith("lesson-bad-confidence:")); + expect(warn?.status).toBe("warn"); + }); + + it("non-string summary title doesn't throw — surfaces as warn", async () => { + await kv.set(KV.summaries, "ses_bad_title", { + sessionId: "ses_bad_title", + project: "p", + createdAt: "", + title: null as unknown as string, // simulate corrupted row + narrative: "n", + keyDecisions: [], + filesModified: [], + concepts: [], + observationCount: 1, + }); + + // The bug to guard against: the old code called .trim() unconditionally, + // which throws on null/number, which aborts the whole diagnose run and + // any later category check never executes. Verify diagnose completes + // AND surfaces the bad row. + const result = (await sdk.trigger("mem::diagnose", { + categories: ["summaries", "lessons"], + })) as { checks: DiagnosticCheck[]; success?: boolean }; + + expect(result.success).toBe(true); + const warn = result.checks.find((c) => c.name.startsWith("summary-missing-title:")); + expect(warn?.status).toBe("warn"); + // Later category still ran: + expect(result.checks.some((c) => c.category === "lessons")).toBe(true); + }); + + it("non-string crystal narrative doesn't throw — surfaces as warn", async () => { + await kv.set(KV.crystals, "cry_bad", { + id: "cry_bad", + narrative: undefined as unknown as string, + keyOutcomes: [], + filesAffected: [], + lessons: [], + sourceActionIds: [], + createdAt: "", + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["crystals"], + })) as { checks: DiagnosticCheck[]; success?: boolean }; + + expect(result.success).toBe(true); + const warn = result.checks.find((c) => c.name.startsWith("crystal-empty-narrative:")); + expect(warn?.status).toBe("warn"); + }); + + it("Infinity confidence on insight + semantic both flagged", async () => { + await kv.set(KV.insights, "ins_inf", { + id: "ins_inf", + title: "t", + content: "c", + confidence: Infinity, + reinforcements: 0, + sourceConceptCluster: [], + sourceMemoryIds: [], + sourceLessonIds: [], + sourceCrystalIds: [], + tags: [], + createdAt: "", + updatedAt: "", + decayRate: 0.05, + }); + await kv.set(KV.semantic, "sem_nan", { + id: "sem_nan", + fact: "f", + confidence: NaN, + sourceSessionIds: [], + sourceMemoryIds: [], + accessCount: 0, + lastAccessedAt: "", + strength: 0, + createdAt: "", + updatedAt: "", + }); + + const result = (await sdk.trigger("mem::diagnose", { + categories: ["insights", "semantic"], + })) as { checks: DiagnosticCheck[] }; + + expect(result.checks.find((c) => c.name === "insight-bad-confidence:ins_inf")?.status).toBe("warn"); + expect(result.checks.find((c) => c.name === "semantic-bad-confidence:sem_nan")?.status).toBe("warn"); + }); + }); + }); }); diff --git a/test/env-loader.test.ts b/test/env-loader.test.ts index 9c6f2955..17ff6a8e 100644 --- a/test/env-loader.test.ts +++ b/test/env-loader.test.ts @@ -25,6 +25,7 @@ describe("loadEnvFile", () => { process.env["HOME"] = sandboxHome; process.env["USERPROFILE"] = sandboxHome; delete process.env["AGENTMEMORY_AUTO_COMPRESS"]; + delete process.env["AGENTMEMORY_DROP_STALE_INDEX"]; delete process.env["CONSOLIDATION_ENABLED"]; delete process.env["GRAPH_EXTRACTION_ENABLED"]; delete process.env["TOKEN"]; @@ -82,4 +83,10 @@ describe("loadEnvFile", () => { const cfg = await freshConfig(); expect(cfg.getEnvVar("TOKEN")).toBe("abc"); }); + + it("reads AGENTMEMORY_DROP_STALE_INDEX from the env file", async () => { + writeEnv("AGENTMEMORY_DROP_STALE_INDEX=true"); + const cfg = await freshConfig(); + expect(cfg.isDropStaleIndexEnabled()).toBe(true); + }); }); diff --git a/test/eval-adapters.test.ts b/test/eval-adapters.test.ts new file mode 100644 index 00000000..90f914f5 --- /dev/null +++ b/test/eval-adapters.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect } from "vitest"; +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; +import { grepAdapter } from "../eval/runner/adapters/grep.js"; +import { aggregate, scoreQuestion } from "../eval/runner/score.js"; +import type { Question, Session } from "../eval/runner/types.js"; + +const DATA_DIR = resolve(__dirname, "..", "eval", "data", "coding-agent-life-v1"); +const sessions = JSON.parse(readFileSync(`${DATA_DIR}/sessions.json`, "utf8")) as Session[]; +const queries = JSON.parse(readFileSync(`${DATA_DIR}/queries.json`, "utf8")) as Array< + Omit +>; + +describe("eval scaffold", () => { + it("coding-agent-life-v1 corpus is well-formed", () => { + expect(sessions.length).toBeGreaterThan(0); + expect(queries.length).toBeGreaterThan(0); + const sessionIds = new Set(sessions.map((s) => s.id)); + for (const q of queries) { + expect(q.goldSessionIds.length).toBeGreaterThan(0); + for (const id of q.goldSessionIds) { + expect(sessionIds.has(id)).toBe(true); + } + } + }); + + it("grep adapter ranks gold session in top-5 for most queries", async () => { + const state = await grepAdapter.init(sessions); + let hits = 0; + for (const q of queries) { + const ranked = await grepAdapter.query(q.question, state, 5); + const topIds = new Set(ranked.map((r) => r.sessionId)); + if (q.goldSessionIds.some((id) => topIds.has(id))) hits += 1; + } + expect(hits / queries.length).toBeGreaterThan(0.5); + }); + + it("scoreQuestion computes P@K, R@K, hit, topGoldRank", () => { + const q: Question = { + id: "test", + type: "single-session", + question: "?", + goldSessionIds: ["a", "b"], + haystack: [], + }; + const ranked = [ + { sessionId: "x", score: 0.9 }, + { sessionId: "a", score: 0.7 }, + { sessionId: "y", score: 0.5 }, + { sessionId: "b", score: 0.3 }, + ]; + const row = scoreQuestion(q, ranked, 5, "test", 12); + expect(row.hit).toBe(true); + expect(row.recallAtK).toBe(1); + expect(row.precisionAtK).toBeCloseTo(2 / 5); + expect(row.topGoldRank).toBe(2); + }); + + it("scoreQuestion handles miss", () => { + const q: Question = { + id: "test", + type: "x", + question: "?", + goldSessionIds: ["a"], + haystack: [], + }; + const ranked = [ + { sessionId: "x", score: 1 }, + { sessionId: "y", score: 0.5 }, + ]; + const row = scoreQuestion(q, ranked, 5, "test", 5); + expect(row.hit).toBe(false); + expect(row.recallAtK).toBe(0); + expect(row.topGoldRank).toBeNull(); + }); + + it("aggregate computes per-adapter and per-type means", () => { + const q: Question = { + id: "1", + type: "t1", + question: "?", + goldSessionIds: ["a"], + haystack: [], + }; + const row1 = scoreQuestion(q, [{ sessionId: "a", score: 1 }], 5, "grep", 10); + const row2 = scoreQuestion(q, [{ sessionId: "x", score: 1 }], 5, "grep", 20); + const agg = aggregate([row1, row2]); + expect(agg.byAdapter.grep.hit).toBe(1); + expect(agg.byAdapter.grep.n).toBe(2); + expect(agg.byType.t1.grep.n).toBe(2); + }); +}); diff --git a/test/export-import.test.ts b/test/export-import.test.ts index 4426ce8e..373d2518 100644 --- a/test/export-import.test.ts +++ b/test/export-import.test.ts @@ -119,7 +119,7 @@ describe("Export/Import Functions", () => { it("export produces valid ExportData structure", async () => { const result = (await sdk.trigger("mem::export", {})) as ExportData; - expect(result.version).toBe("0.9.20"); + expect(result.version).toBe("0.9.21"); expect(result.exportedAt).toBeDefined(); expect(result.sessions.length).toBe(1); expect(result.sessions[0].id).toBe("ses_1"); diff --git a/test/fs-watcher.test.ts b/test/fs-watcher.test.ts index 76212b06..48c1b094 100644 --- a/test/fs-watcher.test.ts +++ b/test/fs-watcher.test.ts @@ -12,7 +12,7 @@ function wait(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } -describe("FilesystemWatcher", () => { +describe("FilesystemWatcher", { retry: 2 }, () => { let root: string; const originalFetch = globalThis.fetch; let captured: Array<{ url: string; body: unknown; headers: Record }>; @@ -49,7 +49,7 @@ describe("FilesystemWatcher", () => { w.start(); try { writeFileSync(join(root, "notes.md"), "hello world\n"); - await wait(800); + await wait(1500); expect(captured.length).toBeGreaterThanOrEqual(1); const obs = captured[captured.length - 1]; expect(obs.url).toBe("http://localhost:3111/agentmemory/observe"); @@ -87,7 +87,7 @@ describe("FilesystemWatcher", () => { w.start(); try { unlinkSync(join(root, "old.md")); - await wait(800); + await wait(1500); const deletes = captured.filter( (c) => (c.body as { data: { changeKind: string } }).data?.changeKind === "file_delete", ); @@ -116,7 +116,7 @@ describe("FilesystemWatcher", () => { w.start(); try { writeFileSync(join(root, "node_modules", "ignored.js"), "x"); - await wait(800); + await wait(1500); const matches = captured.filter((c) => (c.body as { data: { files: string[] } }).data?.files?.some((f) => f.includes("ignored.js")), ); @@ -136,7 +136,7 @@ describe("FilesystemWatcher", () => { w.start(); try { writeFileSync(join(root, "secret.md"), "bearer test\n"); - await wait(800); + await wait(1500); expect(captured.length).toBeGreaterThanOrEqual(1); const headers = captured[captured.length - 1].headers as Record; expect(headers.authorization).toBe("Bearer shhh"); diff --git a/test/hermes-plugin.test.ts b/test/hermes-plugin.test.ts new file mode 100644 index 00000000..f13f06f3 --- /dev/null +++ b/test/hermes-plugin.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from "vitest"; +import { readFileSync } from "node:fs"; + +const expectedHermesHooks = [ + "prefetch", + "sync_turn", + "on_session_end", + "on_pre_compress", + "on_memory_write", + "system_prompt_block", +]; + +function readHermesPluginHooks(): string[] { + const manifest = readFileSync("integrations/hermes/plugin.yaml", "utf8"); + const hooks: string[] = []; + let inHooks = false; + + for (const line of manifest.split(/\r?\n/)) { + if (line.trim() === "hooks:") { + inHooks = true; + continue; + } + if (!inHooks) continue; + if (line.trim() === "") continue; + if (!line.startsWith(" ")) break; + + const match = line.match(/^\s*-\s*([A-Za-z_][A-Za-z0-9_]*)\s*$/); + if (match) hooks.push(match[1]); + } + + return hooks; +} + +function isHermesLifecycleHook(methodName: string): boolean { + return ( + methodName === "prefetch" || + methodName === "sync_turn" || + methodName === "system_prompt_block" || + methodName.startsWith("on_") + ); +} + +function readAgentMemoryProviderHookMethods(): string[] { + const source = readFileSync("integrations/hermes/__init__.py", "utf8"); + const methods: string[] = []; + const providerMethodPattern = /^ def ([a-z_][a-z0-9_]*)\(/gm; + + for (const match of source.matchAll(providerMethodPattern)) { + const methodName = match[1]; + if (isHermesLifecycleHook(methodName)) methods.push(methodName); + } + + return methods; +} + +describe("Hermes plugin manifest", () => { + it("declares every implemented lifecycle hook", () => { + const declaredHooks = readHermesPluginHooks(); + const implementedHooks = readAgentMemoryProviderHookMethods(); + + expect([...declaredHooks].sort()).toEqual([...implementedHooks].sort()); + expect(declaredHooks).toEqual(expectedHermesHooks); + }); +}); diff --git a/test/mcp-standalone-proxy.test.ts b/test/mcp-standalone-proxy.test.ts index 0d93b227..dc08a024 100644 --- a/test/mcp-standalone-proxy.test.ts +++ b/test/mcp-standalone-proxy.test.ts @@ -75,6 +75,61 @@ describe("@agentmemory/mcp standalone — server proxy (issue #159)", () => { expect(body.results[0].id).toBe("m1"); }); + it("proxies memory_recall to POST /agentmemory/search and forwards format/token_budget (#507)", async () => { + const calls: Array<{ url: string; body?: unknown }> = []; + installFetch((url, init) => { + if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 }); + const body = init?.body ? JSON.parse(init.body as string) : undefined; + calls.push({ url, body }); + if (url.endsWith("/agentmemory/search")) { + return new Response( + JSON.stringify({ + mode: "full", + facts: [{ id: "m1" }], + narrative: "n", + concepts: ["c"], + files: ["f"], + }), + { status: 200, headers: { "content-type": "application/json" } }, + ); + } + return new Response("not found", { status: 404 }); + }); + const res = await handleToolCall("memory_recall", { + query: "auth bug", + limit: 5, + format: "full", + token_budget: 800, + }); + const body = JSON.parse(res.content[0].text); + expect(body.mode).toBe("full"); + expect(body.facts[0].id).toBe("m1"); + const searchCall = calls.find((c) => c.url.endsWith("/agentmemory/search")); + expect(searchCall).toBeDefined(); + expect(searchCall?.body).toEqual({ + query: "auth bug", + limit: 5, + format: "full", + token_budget: 800, + }); + expect(calls.find((c) => c.url.endsWith("/agentmemory/smart-search"))).toBeUndefined(); + }); + + it("memory_recall defaults format to 'full' when omitted (#507)", async () => { + let recallBody: Record | undefined; + installFetch((url, init) => { + if (url.endsWith("/agentmemory/livez")) return new Response("ok", { status: 200 }); + if (url.endsWith("/agentmemory/search")) { + recallBody = init?.body ? JSON.parse(init.body as string) : undefined; + return new Response(JSON.stringify({ mode: "full", facts: [] }), { status: 200 }); + } + return new Response("not found", { status: 404 }); + }); + await handleToolCall("memory_recall", { query: "x" }); + expect(recallBody?.["format"]).toBe("full"); + expect(recallBody).not.toHaveProperty("token_budget"); + }); + it("proxies memory_governance_delete to the DELETE REST endpoint", async () => { const calls: Array<{ url: string; method: string; body?: unknown }> = []; installFetch((url, init) => { diff --git a/test/mcp-transport.test.ts b/test/mcp-transport.test.ts index bb8627dc..006ecc9e 100644 --- a/test/mcp-transport.test.ts +++ b/test/mcp-transport.test.ts @@ -1,5 +1,7 @@ import { describe, it, expect, vi } from "vitest"; import { + createMessageParser, + formatResponse, processLine, type JsonRpcResponse, type RequestHandler, @@ -227,3 +229,47 @@ describe("processLine — id type validation (JSON-RPC §4)", () => { expect(c.out[0].result).toEqual({ method: "ping" }); }); }); + +describe("stdio framing", () => { + it("parses Content-Length framed MCP messages split across chunks", () => { + const messages: string[] = []; + const parser = createMessageParser((message) => messages.push(message)); + const body = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "initialize" }); + const framed = `Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n${body}`; + + parser.push(framed.slice(0, 12)); + parser.push(framed.slice(12)); + + expect(messages).toEqual([body]); + expect(parser.isFramed()).toBe(true); + }); + + it("parses newline-delimited JSON for existing clients", () => { + const messages: string[] = []; + const parser = createMessageParser((message) => messages.push(message)); + const first = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/list" }); + const second = JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized" }); + + parser.push(`${first}\n${second}\n`); + + expect(messages).toEqual([first, second]); + expect(parser.isFramed()).toBe(false); + }); + + it("formats responses with Content-Length framing when requested", () => { + const response: JsonRpcResponse = { + jsonrpc: "2.0", + id: 1, + result: { ok: true }, + }; + const formatted = formatResponse(response, true); + + expect(Array.isArray(formatted)).toBe(true); + if (!Array.isArray(formatted)) throw new Error("expected framed response"); + const header = formatted[0].toString("ascii"); + const body = formatted[1].toString("utf8"); + + expect(header).toBe(`Content-Length: ${Buffer.byteLength(body, "utf8")}\r\n\r\n`); + expect(JSON.parse(body)).toEqual(response); + }); +}); diff --git a/test/onboarding.test.ts b/test/onboarding.test.ts new file mode 100644 index 00000000..053085b8 --- /dev/null +++ b/test/onboarding.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from "vitest"; + +import { buildAgentOptions, getInitialAgentValues } from "../src/cli/onboarding.js"; + +describe("first-run onboarding", () => { + it("offers GitHub Copilot CLI as a native setup target", () => { + const options = buildAgentOptions(); + expect(options).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + value: "copilot-cli", + label: expect.stringContaining("GitHub Copilot CLI"), + hint: "native plugin", + }), + ]), + ); + }); + + it("selects GitHub Copilot CLI by default when running inside Copilot CLI", () => { + expect(getInitialAgentValues({ COPILOT_CLI: "1" })).toEqual(["copilot-cli"]); + expect(getInitialAgentValues({ COPILOT_AGENT_SESSION_ID: "session" })).toEqual(["copilot-cli"]); + }); + + it("keeps Claude Code as the default outside known agent environments", () => { + expect(getInitialAgentValues({})).toEqual(["claude-code"]); + }); +}); diff --git a/test/smart-search.test.ts b/test/smart-search.test.ts index 4f22d1a9..9d0c94e0 100644 --- a/test/smart-search.test.ts +++ b/test/smart-search.test.ts @@ -193,4 +193,102 @@ describe("Smart Search Function", () => { } | null; expect(log?.count).toBe(1); }); + + describe("lesson inclusion (#lesson-visibility)", () => { + it("compact mode returns lessons array alongside observation results", async () => { + sdk.registerFunction("mem::lesson-recall", async (payload: any) => ({ + success: true, + lessons: [ + { id: "lsn_a", content: "always rebase before push", confidence: 0.9, createdAt: "2026-04-01T00:00:00Z", project: "p", tags: ["git"], score: 0.81 }, + { id: "lsn_b", content: "never force-push to main", confidence: 0.95, createdAt: "2026-04-02T00:00:00Z", project: "p", tags: ["git"], score: 0.76 }, + ], + })); + + const result = (await sdk.trigger("mem::smart-search", { + query: "rebase", + })) as { mode: string; results: CompactSearchResult[]; lessons?: any[] }; + + expect(result.mode).toBe("compact"); + expect(result.results.length).toBe(2); // observations unchanged + expect(result.lessons).toBeDefined(); + expect(result.lessons!.length).toBe(2); + expect(result.lessons![0]).toMatchObject({ + lessonId: "lsn_a", + confidence: 0.9, + score: 0.81, + }); + expect(result.lessons![0].tags).toEqual(["git"]); + }); + + it("compact mode truncates long lesson content for preview", async () => { + const long = "x".repeat(500); + sdk.registerFunction("mem::lesson-recall", async () => ({ + success: true, + lessons: [{ id: "lsn_long", content: long, confidence: 0.5, createdAt: "", tags: [], score: 0.4 }], + })); + + const result = (await sdk.trigger("mem::smart-search", { + query: "x", + })) as { lessons: any[] }; + + expect(result.lessons[0].content.length).toBeLessThan(long.length); + expect(result.lessons[0].content).toMatch(/…$/); + }); + + it("includeLessons:false omits the lessons array entirely", async () => { + // No lesson-recall handler registered — would throw if invoked. + const result = (await sdk.trigger("mem::smart-search", { + query: "auth", + includeLessons: false, + })) as { mode: string; results: CompactSearchResult[]; lessons?: unknown }; + + expect(result.results.length).toBe(2); + expect(result.lessons).toBeUndefined(); + }); + + it("forwards project filter to mem::lesson-recall", async () => { + let receivedPayload: any = null; + sdk.registerFunction("mem::lesson-recall", async (payload: any) => { + receivedPayload = payload; + return { success: true, lessons: [] }; + }); + + await sdk.trigger("mem::smart-search", { + query: "rebase", + project: "gitops-assistant", + }); + + expect(receivedPayload).toMatchObject({ + query: "rebase", + project: "gitops-assistant", + }); + }); + + it("tolerates mem::lesson-recall failure: returns empty lessons, observations unchanged", async () => { + sdk.registerFunction("mem::lesson-recall", async () => { + throw new Error("lessons store unavailable"); + }); + + const result = (await sdk.trigger("mem::smart-search", { + query: "auth", + })) as { results: CompactSearchResult[]; lessons: any[] }; + + expect(result.results.length).toBe(2); + expect(result.lessons).toEqual([]); + }); + + it("tolerates non-success lesson-recall response shape", async () => { + sdk.registerFunction("mem::lesson-recall", async () => ({ + success: false, + error: "query is required", + })); + + const result = (await sdk.trigger("mem::smart-search", { + query: "auth", + })) as { results: CompactSearchResult[]; lessons: any[] }; + + expect(result.results.length).toBe(2); + expect(result.lessons).toEqual([]); + }); + }); }); diff --git a/test/summarize.test.ts b/test/summarize.test.ts new file mode 100644 index 00000000..03aa1926 --- /dev/null +++ b/test/summarize.test.ts @@ -0,0 +1,417 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; + +vi.mock("../src/logger.js", () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +vi.mock("../src/state/schema.js", () => ({ + KV: { + sessions: "sessions", + summaries: "summaries", + observations: (sessionId: string) => `obs:${sessionId}`, + audit: "audit", + }, +})); + +vi.mock("../src/eval/schemas.js", () => ({ + SummaryOutputSchema: {}, +})); + +vi.mock("../src/eval/validator.js", () => ({ + validateOutput: () => ({ valid: true, result: { errors: [] } }), +})); + +vi.mock("../src/eval/quality.js", () => ({ + scoreSummary: () => 100, +})); + +vi.mock("../src/functions/audit.js", () => ({ + safeAudit: vi.fn(), +})); + +import { registerSummarizeFunction } from "../src/functions/summarize.js"; +import type { + CompressedObservation, + Session, + MemoryProvider, +} from "../src/types.js"; + +function mockKV() { + const store = new Map>(); + return { + store, + get: async (scope: string, key: string): Promise => + (store.get(scope)?.get(key) as T) ?? null, + set: async (scope: string, key: string, data: T): Promise => { + if (!store.has(scope)) store.set(scope, new Map()); + store.get(scope)!.set(key, data); + return data; + }, + delete: async (scope: string, key: string): Promise => { + store.get(scope)?.delete(key); + }, + list: async (scope: string): Promise => { + const entries = store.get(scope); + return entries ? (Array.from(entries.values()) as T[]) : []; + }, + }; +} + +function mockSdk() { + const functions = new Map(); + return { + functions, + registerFunction: (id: string, handler: Function) => { + functions.set(id, handler); + }, + registerTrigger: () => {}, + trigger: async () => ({}), + }; +} + +function makeObs(i: number, sessionId: string): CompressedObservation { + return { + id: `obs_${i}`, + sessionId, + timestamp: new Date().toISOString(), + type: "conversation", + title: `obs ${i}`, + facts: [`fact ${i}`], + narrative: `narrative for obs ${i}`, + concepts: [], + files: [`src/file_${i}.ts`], + importance: 5, + }; +} + +function makeProvider(responses: string[]): MemoryProvider & { + calls: Array<{ system: string; user: string }>; +} { + const calls: Array<{ system: string; user: string }> = []; + let i = 0; + return { + name: "test", + calls, + compress: async () => "", + summarize: async (system: string, user: string) => { + calls.push({ system, user }); + const r = responses[i] ?? responses[responses.length - 1]; + i += 1; + return r; + }, + }; +} + +function summaryXml(opts: { + title: string; + narrative?: string; + decisions?: string[]; + files?: string[]; + concepts?: string[]; +}): string { + const d = (opts.decisions ?? []).map((x) => `${x}`).join(""); + const f = (opts.files ?? []).map((x) => `${x}`).join(""); + const c = (opts.concepts ?? []).map((x) => `${x}`).join(""); + return ` +${opts.title} +${opts.narrative ?? "narrative"} +${d} +${f} +${c} +`; +} + +async function setupHandler(opts: { + sessionId: string; + obsCount: number; + provider: MemoryProvider; +}) { + const sdk = mockSdk(); + const kv = mockKV(); + const session: Session = { + id: opts.sessionId, + project: "test-project", + cwd: "/tmp", + startedAt: new Date().toISOString(), + status: "completed", + observationCount: opts.obsCount, + }; + await kv.set("sessions", opts.sessionId, session); + for (let i = 0; i < opts.obsCount; i++) { + const o = makeObs(i, opts.sessionId); + await kv.set(`obs:${opts.sessionId}`, o.id, o); + } + registerSummarizeFunction(sdk as any, kv as any, opts.provider); + const handler = sdk.functions.get("mem::summarize")!; + return { handler, kv }; +} + +describe("mem::summarize chunking", () => { + const ORIGINAL_ENV = { ...process.env }; + + beforeEach(() => { + delete process.env.SUMMARIZE_CHUNK_SIZE; + delete process.env.SUMMARIZE_CHUNK_CONCURRENCY; + }); + + afterEach(() => { + process.env = { ...ORIGINAL_ENV }; + }); + + it("small session takes the single-call path (no chunking, no reduce)", async () => { + const provider = makeProvider([ + summaryXml({ + title: "Small session", + decisions: ["decision A"], + files: ["src/a.ts"], + concepts: ["concept-a"], + }), + ]); + const { handler, kv } = await setupHandler({ + sessionId: "ses_small", + obsCount: 10, + provider, + }); + + const result: any = await handler({ sessionId: "ses_small" }); + + expect(result.success).toBe(true); + expect(provider.calls).toHaveLength(1); + expect(provider.calls[0].user).toContain("Session observations (10 total)"); + const stored: any = await kv.get("summaries", "ses_small"); + expect(stored?.title).toBe("Small session"); + }); + + it("large session map-reduces: N chunk calls + 1 reduce call", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; // serial keeps call ordering deterministic + const provider = makeProvider([ + summaryXml({ title: "Chunk 1", decisions: ["dA"], files: ["src/a.ts"], concepts: ["ca"] }), + summaryXml({ title: "Chunk 2", decisions: ["dB"], files: ["src/b.ts"], concepts: ["cb"] }), + summaryXml({ title: "Chunk 3", decisions: ["dC"], files: ["src/c.ts"], concepts: ["cc"] }), + summaryXml({ + title: "Merged", + decisions: ["dA", "dB", "dC"], + files: ["src/a.ts", "src/b.ts", "src/c.ts"], + concepts: ["ca", "cb", "cc"], + }), + ]); + const { handler, kv } = await setupHandler({ + sessionId: "ses_large", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_large" }); + + expect(result.success).toBe(true); + expect(provider.calls).toHaveLength(4); + // First three are chunk calls (use the summary system prompt). + expect(provider.calls[0].system).toContain("session summarizer"); + expect(provider.calls[2].system).toContain("session summarizer"); + // Last is the reduce call (uses the merge system prompt). + expect(provider.calls[3].system).toContain("merging multiple partial summaries"); + expect(provider.calls[3].user).toContain("Chunk 1 of 3"); + expect(provider.calls[3].user).toContain("Chunk 3 of 3"); + + const stored: any = await kv.get("summaries", "ses_large"); + expect(stored?.title).toBe("Merged"); + // observationCount on the persisted summary should reflect the full session, + // not just the final chunk. + expect(stored?.observationCount).toBe(250); + expect(stored?.keyDecisions).toEqual(["dA", "dB", "dC"]); + }); + + it("SUMMARIZE_CHUNK_SIZE env override is respected", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "50"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + const provider = makeProvider([ + summaryXml({ title: "chunk" }), + summaryXml({ title: "chunk" }), + summaryXml({ title: "chunk" }), + summaryXml({ title: "chunk" }), + summaryXml({ title: "merged" }), + ]); + const { handler } = await setupHandler({ + sessionId: "ses_env", + obsCount: 175, + provider, + }); + + const result: any = await handler({ sessionId: "ses_env" }); + + expect(result.success).toBe(true); + // 175 obs ÷ 50 = 4 chunks (last chunk has 25) + 1 reduce = 5 calls. + expect(provider.calls).toHaveLength(5); + }); + + it("flaky chunk: parse fails once, retried, then succeeds — no skip", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + const provider = makeProvider([ + summaryXml({ title: "ok1" }), + "", // chunk 2 attempt 1: parse-fail + summaryXml({ title: "ok2" }), // chunk 2 attempt 2 (retry): success + summaryXml({ title: "ok3" }), + summaryXml({ title: "merged" }), + ]); + const { handler, kv } = await setupHandler({ + sessionId: "ses_flaky", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_flaky" }); + + expect(result.success).toBe(true); + // 3 chunks × 1 attempt + 1 retry on chunk 2 + 1 reduce = 5 calls. + expect(provider.calls).toHaveLength(5); + const stored: any = await kv.get("summaries", "ses_flaky"); + expect(stored?.title).toBe("merged"); + }); + + it("persistently-broken chunk is skipped, reduce still runs on remaining partials", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + const provider = makeProvider([ + summaryXml({ title: "ok1" }), + "", "", // chunk 2: both attempts parse-fail + summaryXml({ title: "ok3" }), + summaryXml({ title: "merged-with-skip" }), + ]); + const { handler, kv } = await setupHandler({ + sessionId: "ses_skip", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_skip" }); + + expect(result.success).toBe(true); + // 1 ok + (1 + 1 retry skip) + 1 ok + 1 reduce = 5 calls. + expect(provider.calls).toHaveLength(5); + // Reduce input should mention only 2 of 3 chunks (chunk 2 skipped) — + // but the chunk indices in the reduce labels should reflect chunk 1 and 3, + // preserving chronological boundaries. + const reduceCall = provider.calls[4]; + expect(reduceCall.user).toContain("Chunk 1 of 2"); + expect(reduceCall.user).toContain("Chunk 2 of 2"); + expect(reduceCall.user).toContain("obs 1-100"); // first surviving chunk + expect(reduceCall.user).toContain("obs 201-250"); // third surviving chunk (was idx 2, range 201-250) + const stored: any = await kv.get("summaries", "ses_skip"); + expect(stored?.title).toBe("merged-with-skip"); + }); + + it("too many skipped chunks bails out with a clear error", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + // 3 chunks, 2 fully broken → >50% skipped → bail. + const provider = makeProvider([ + summaryXml({ title: "ok1" }), + "", "", + "", "", + ]); + const { handler } = await setupHandler({ + sessionId: "ses_too_broken", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_too_broken" }); + + expect(result.success).toBe(false); + expect(result.error).toMatch(/too_many_chunks_skipped: 2\/3/); + }); + + it("provider error on one chunk after retry is skipped, not propagated", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + let i = 0; + const provider: MemoryProvider & { calls: any[] } = { + name: "test", + calls: [], + compress: async () => "", + summarize: async (system: string, user: string) => { + (provider as any).calls.push({ system, user }); + i += 1; + if (i === 1) return summaryXml({ title: "ok1" }); + // chunk 2: both attempts throw (e.g. provider 400) + if (i === 2 || i === 3) throw new Error("OpenAI API error (400): content rejected"); + if (i === 4) return summaryXml({ title: "ok3" }); + return summaryXml({ title: "merged-with-skip" }); + }, + }; + const { handler, kv } = await setupHandler({ + sessionId: "ses_net", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_net" }); + + expect(result.success).toBe(true); + // 1 ok + 2 fail + 1 ok + 1 reduce = 5 calls. + expect((provider as any).calls.length).toBe(5); + const stored: any = await kv.get("summaries", "ses_net"); + expect(stored?.title).toBe("merged-with-skip"); + }); + + it("every chunk failing on provider error trips too_many_chunks_skipped", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "1"; + // 3 chunks, all chunk calls throw → 3/3 skipped → bail. + const provider: MemoryProvider & { calls: any[] } = { + name: "test", + calls: [], + compress: async () => "", + summarize: async (system: string, user: string) => { + (provider as any).calls.push({ system, user }); + throw new Error("OpenAI API error (400): invalid request"); + }, + }; + const { handler } = await setupHandler({ + sessionId: "ses_all_400", + obsCount: 250, + provider, + }); + + const result: any = await handler({ sessionId: "ses_all_400" }); + + expect(result.success).toBe(false); + expect(result.error).toMatch(/too_many_chunks_skipped: 3\/3/); + }); + + it("chunks run in parallel batches according to SUMMARIZE_CHUNK_CONCURRENCY", async () => { + process.env.SUMMARIZE_CHUNK_SIZE = "100"; + process.env.SUMMARIZE_CHUNK_CONCURRENCY = "2"; + let inflight = 0; + let maxInflight = 0; + const provider: MemoryProvider & { calls: any[] } = { + name: "test", + calls: [], + compress: async () => "", + summarize: async (system: string, user: string) => { + (provider as any).calls.push({ system, user }); + inflight += 1; + maxInflight = Math.max(maxInflight, inflight); + // Yield to event loop so siblings can also enter before we resolve. + await new Promise((r) => setTimeout(r, 5)); + inflight -= 1; + if (system.includes("merging")) return summaryXml({ title: "merged" }); + return summaryXml({ title: "ok" }); + }, + }; + const { handler } = await setupHandler({ + sessionId: "ses_par", + obsCount: 400, // 4 chunks at chunkSize=100 + provider, + }); + + const result: any = await handler({ sessionId: "ses_par" }); + + expect(result.success).toBe(true); + // 4 chunks at concurrency 2 → max 2 in flight at once during the chunk phase. + // Reduce is a single call so doesn't bump it. + expect(maxInflight).toBe(2); + }); +});