From 70d2f8c5c90d18c005b1748f84bc42b092f732e3 Mon Sep 17 00:00:00 2001 From: Ryan Wyler Date: Sun, 8 Mar 2026 19:16:31 -0700 Subject: [PATCH 1/2] feat: collapse/float compaction modes and knowledge pack support Collapse compaction mode: - Selectively compresses oldest 65% of tokens instead of entire conversation - Merges historical summaries for continuity (configurable: previousSummaries) - Places summary at correct breakpoint position in timeline - TUI toggle cycles standard -> collapse -> float via command palette - insertTriggers=false prevents timestamp collision infinite loops - Preserves real user messages; only deletes synthetic trigger messages Float compaction mode: - Sub-collapses oldest conversation chains before overflow evaluation - Chain detection requires 2+ assistant messages (skips simple Q&A pairs) - bookend algorithm by default; configurable per chainThreshold - Soft-deletes sub-collapsed messages with flux=compacted (not hard-delete) - summary:true flag on sub-collapse result prevents re-trigger loops - detectChains skips already-processed messages (summary:true or flux set) to prevent infinite sub-collapse loop - Token adjustment accounts for sub-collapse savings to prevent re-trigger - Reloads TUI messages after sub-collapse via session.compacted event - Re-parents orphaned chain messages after mid-chain split - splitChainMinThreshold gate prevents processing chains too small for benefit - minFloat threshold: skip sub-collapse until total tokens reach configurable limit - Float pre-check also fires on stop finish (not just before LLM calls) Knowledge pack support: - New KnowledgePack module for injecting knowledge pack messages into sessions - Knowledge pack messages sit at time_created=1,2,... (before compaction breakpoints) and are prepended explicitly at prompt build time (filterCompacted never sees them) - Propagates manually-enabled knowledge packs from parent to subagent sessions - Knowledge pack agent prompt overrides: KP can declare agent..prompt to replace a built-in agent system prompt for the session (global registry not mutated) - Plugin transform hook receives knowledge pack messages via toModelMessages injection - TUI sidebar: add/remove knowledge packs, mirror to project config on change - Server routes: /knowledge-pack list/add/remove/enable/disable endpoints - Sidebar click race condition fix: refetchActive deferred after server response - Sidebar prompt refocus: restores focus after KP interactions only - Global knowledge packs mirrored to project config on sidebar add/remove - Project config written to .opencode/opencode.json (not config.json) Upstream overflow (413) integration: - Collapse/float modes propagate overflow=true through to the continuation message: when a 413 triggers compaction in collapse or float mode, an overflow explanation message is injected after the collapse completes so the user understands their media attachments were too large - filterCompacted: errored summary messages no longer treated as valid compaction breakpoints (upstream guard: !msg.info.error) - filterCompacted: breakpoint detection no longer requires finish flag on assistant summary (collapse compaction may complete without setting finish) Implementation details: - compaction-extension.ts: self-contained for easy rebasing; all debug logging uses COLLAPSE tag (grep: tail -f ~/.local/share/opencode/log/dev.log | grep COLLAPSE) - detectChains: handles mid-run user interjections in full chain detection - copyUserMessage: inserts duplicate chain anchor when mid-chain compaction splits leave orphaned assistant messages - Identifier.insertCopy / Identifier.insert: generate IDs that sort between existing messages without timestamp collisions - isOverflow routes to CompactionExtension for collapse/float modes (configurable trigger) - Config schema: compaction.method, extractRatio, recentRatio, summaryMaxTokens, previousSummaries, splitChain, splitChainMinThreshold, minFloat, insertTriggers - knowledge.enabled, knowledge.paths, knowledge.packs config fields --- .../opencode/src/cli/cmd/tui/context/sync.tsx | 23 +- .../src/cli/cmd/tui/routes/session/index.tsx | 17 + .../cli/cmd/tui/routes/session/sidebar.tsx | 151 +- packages/opencode/src/config/config.ts | 114 +- packages/opencode/src/id/id.ts | 78 +- .../opencode/src/server/routes/session.ts | 221 ++ .../src/session/compaction-extension.ts | 1919 +++++++++++++++++ packages/opencode/src/session/compaction.ts | 48 +- packages/opencode/src/session/index.ts | 103 + .../opencode/src/session/knowledge-pack.ts | 381 ++++ packages/opencode/src/session/message-v2.ts | 100 +- packages/opencode/src/session/prompt.ts | 137 +- packages/sdk/js/src/v2/gen/sdk.gen.ts | 160 ++ packages/sdk/js/src/v2/gen/types.gen.ts | 258 +++ packages/sdk/openapi.json | 453 ++++ 15 files changed, 4134 insertions(+), 29 deletions(-) create mode 100644 packages/opencode/src/session/compaction-extension.ts create mode 100644 packages/opencode/src/session/knowledge-pack.ts diff --git a/packages/opencode/src/cli/cmd/tui/context/sync.tsx b/packages/opencode/src/cli/cmd/tui/context/sync.tsx index 269ed7ae0bd1..79829d02284a 100644 --- a/packages/opencode/src/cli/cmd/tui/context/sync.tsx +++ b/packages/opencode/src/cli/cmd/tui/context/sync.tsx @@ -103,6 +103,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ }) const sdk = useSDK() + const fullSyncedSessions = new Set() sdk.event.listen((e) => { const event = e.details @@ -194,6 +195,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ break case "session.deleted": { + if (!store.session) break const result = Binary.search(store.session, event.properties.info.id, (s) => s.id) if (result.found) { setStore( @@ -206,6 +208,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ break } case "session.updated": { + if (!store.session) break const result = Binary.search(store.session, event.properties.info.id, (s) => s.id) if (result.found) { setStore("session", result.index, reconcile(event.properties.info)) @@ -225,6 +228,23 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ break } + case "session.compacted": { + // Compaction modified messages, invalidate cache and reload + const sessionID = event.properties.sessionID + fullSyncedSessions.delete(sessionID) + sdk.client.session.messages({ sessionID, limit: 100 }).then((messages) => { + setStore( + produce((draft) => { + draft.message[sessionID] = messages.data!.map((x) => x.info) + for (const message of messages.data!) { + draft.part[message.info.id] = message.parts + } + }), + ) + }) + break + } + case "message.updated": { const messages = store.message[event.properties.info.sessionID] if (!messages) { @@ -266,6 +286,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ } case "message.removed": { const messages = store.message[event.properties.sessionID] + if (!messages) break const result = Binary.search(messages, event.properties.messageID, (m) => m.id) if (result.found) { setStore( @@ -319,6 +340,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ case "message.part.removed": { const parts = store.part[event.properties.messageID] + if (!parts) break const result = Binary.search(parts, event.properties.partID, (p) => p.id) if (result.found) setStore( @@ -431,7 +453,6 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ bootstrap() }) - const fullSyncedSessions = new Set() const result = { data: store, set: setStore, diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index d3a4ff81e015..90631ffe0cc0 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -159,6 +159,10 @@ export function Session() { const [diffWrapMode] = kv.signal<"word" | "none">("diff_wrap_mode", "word") const [animationsEnabled, setAnimationsEnabled] = kv.signal("animations_enabled", true) const [showGenericToolOutput, setShowGenericToolOutput] = kv.signal("generic_tool_output_visibility", false) + const [compactionMethod, setCompactionMethod] = kv.signal<"standard" | "collapse" | "float">( + "compaction_method", + sync.data.config.compaction?.method ?? "standard", + ) const wide = createMemo(() => dimensions().width > 120) const sidebarVisible = createMemo(() => { @@ -465,6 +469,19 @@ export function Session() { dialog.clear() }, }, + { + title: `Compaction: ${compactionMethod()} -> ${compactionMethod() === "standard" ? "collapse" : compactionMethod() === "collapse" ? "float" : "standard"}`, + value: "session.toggle.compaction_method", + category: "Session", + onSelect: (dialog) => { + setCompactionMethod((prev) => { + if (prev === "standard") return "collapse" + if (prev === "collapse") return "float" + return "standard" + }) + dialog.clear() + }, + }, { title: "Unshare session", value: "session.unshare", diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx index 42ac5fbe080a..9ddadadf0638 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx @@ -1,5 +1,5 @@ import { useSync } from "@tui/context/sync" -import { createMemo, For, Show, Switch, Match } from "solid-js" +import { createMemo, createResource, createSignal, For, Show, Switch, Match } from "solid-js" import { createStore } from "solid-js/store" import { useTheme } from "../../context/theme" import { Locale } from "@/util/locale" @@ -11,9 +11,12 @@ import { useKeybind } from "../../context/keybind" import { useDirectory } from "../../context/directory" import { useKV } from "../../context/kv" import { TodoItem } from "../../component/todo-item" +import { useSDK } from "@tui/context/sdk" +import { usePromptRef } from "../../context/prompt" export function Sidebar(props: { sessionID: string; overlay?: boolean }) { const sync = useSync() + const sdk = useSDK() const { theme } = useTheme() const session = createMemo(() => sync.session.get(props.sessionID)!) const diff = createMemo(() => sync.data.session_diff[props.sessionID] ?? []) @@ -60,6 +63,77 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { } }) + type KPEntry = { id?: string; name: string; displayName: string; version: string; enabled: boolean } + + // Whether the KP section is expanded to show all available packs + // Default true: new sessions show the full library so users can add packs immediately + const [kpExpanded, setKpExpanded] = createSignal(true) + + // sdk transport helper — routes through Unix socket, not bare fetch + const sdkGet = (url: string, path: Record) => (sdk.client as any).client.get({ url, path }) + const sdkPost = (url: string, path: Record) => (sdk.client as any).client.post({ url, path }) + const sdkDelete = (url: string, path: Record) => (sdk.client as any).client.delete({ url, path }) + + // Count of knowledge-pack messages in the sync store — changes whenever the server + // injects or removes a KP (message.updated / message.removed events), driving a refetch. + const kpMessageCount = createMemo( + () => (sync.data.message[props.sessionID] ?? []).filter((m) => (m as any).flux === "knowledge").length, + ) + + // When collapsed: fetch only active packs (fast, session-scoped) + const [activePacks, { refetch: refetchActive }] = createResource( + () => ({ sessionID: props.sessionID, kpCount: kpMessageCount() }), + async ({ sessionID }) => { + const res = await sdkGet("/session/{sessionID}/knowledge-packs", { sessionID }) + if (res.error) return [] as KPEntry[] + return (res.data as { id: string; name: string; displayName: string; version?: string }[]).map( + (p) => ({ ...p, enabled: true }) as KPEntry, + ) + }, + ) + + // When expanded: fetch all available packs with enabled flag (reads library dir). + // Depends on activePacks() so it re-fetches whenever active packs change. + const [allPacks, { refetch: refetchAll }] = createResource( + () => (kpExpanded() ? { sessionID: props.sessionID, active: activePacks() } : null), + async ({ sessionID }) => { + const res = await sdkGet("/session/{sessionID}/knowledge-packs/available", { sessionID }) + if (res.error) return [] as KPEntry[] + return res.data as KPEntry[] + }, + ) + + const visiblePacks = () => (kpExpanded() ? (allPacks() ?? []) : (activePacks() ?? [])) + + function togglePack(name: string, version: string, enabled: boolean) { + const sessionID = props.sessionID + // Fire-and-forget the SDK call, then refetch once the server responds. + // The refetch is deferred with setTimeout so the DOM update happens + // outside opentui's mouse event processing — avoiding the race that + // destroys renderables mid-event and corrupts focus state. + const req = enabled + ? sdkDelete("/session/{sessionID}/knowledge-packs/{name}/{version}", { sessionID, name, version }) + : sdkPost("/session/{sessionID}/knowledge-packs/{name}/{version}", { sessionID, name, version }) + req.then(() => setTimeout(() => refetchActive(), 1)) + } + + const promptRef = usePromptRef() + + // After any sidebar mouse interaction opentui clears currentFocusedRenderable + // because sidebar box elements are not focusable renderables. The native + // layer may also do post-processing (hover recheck, mouseUp dispatch) after + // the JS callback returns, so a synchronous focus() can be overwritten. + // Use setTimeout like the dialog system does, and schedule a second check + // to catch focus loss from async re-renders triggered by resource refetch. + function refocusPrompt() { + setTimeout(() => { + promptRef.current?.focus() + }, 1) + setTimeout(() => { + if (!promptRef.current?.focused) promptRef.current?.focus() + }, 50) + } + const directory = useDirectory() const kv = useKV() @@ -102,6 +176,13 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { Context + + compact{" "} + {sync.data.config.compaction?.auto === false + ? "disabled" + : kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard")} + + {context()?.tokens ?? 0} tokens {context()?.percentage ?? 0}% used {cost()} spent @@ -111,7 +192,10 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { mcpEntries().length > 2 && setExpanded("mcp", !expanded.mcp)} + onMouseDown={() => { + mcpEntries().length > 2 && setExpanded("mcp", !expanded.mcp) + refocusPrompt() + }} > 2}> {expanded.mcp ? "▼" : "▶"} @@ -167,11 +251,54 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { + + + + Knowledge Packs + + { + setKpExpanded(!kpExpanded()) + if (!kpExpanded()) refetchAll() + refocusPrompt() + }} + > + {kpExpanded() ? "−" : "+"} + + + + + {(kp) => ( + { + togglePack(kp.name, kp.version, kp.enabled) + refocusPrompt() + }} + > + + {kp.enabled ? "•" : "◦"} + + + {kp.displayName} + + {kp.version} + + + + )} + + sync.data.lsp.length > 2 && setExpanded("lsp", !expanded.lsp)} + onMouseDown={() => { + sync.data.lsp.length > 2 && setExpanded("lsp", !expanded.lsp) + refocusPrompt() + }} > 2}> {expanded.lsp ? "▼" : "▶"} @@ -215,7 +342,10 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { todo().length > 2 && setExpanded("todo", !expanded.todo)} + onMouseDown={() => { + todo().length > 2 && setExpanded("todo", !expanded.todo) + refocusPrompt() + }} > 2}> {expanded.todo ? "▼" : "▶"} @@ -234,7 +364,10 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { diff().length > 2 && setExpanded("diff", !expanded.diff)} + onMouseDown={() => { + diff().length > 2 && setExpanded("diff", !expanded.diff) + refocusPrompt() + }} > 2}> {expanded.diff ? "▼" : "▶"} @@ -288,7 +421,13 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) { Getting started - kv.set("dismissed_getting_started", true)}> + { + kv.set("dismissed_getting_started", true) + refocusPrompt() + }} + > ✕ diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 6b4242a225aa..bc6d338218b6 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1139,6 +1139,93 @@ export namespace Config { .object({ auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"), prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"), + method: z + .enum(["standard", "collapse", "float"]) + .optional() + .describe( + "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint, 'float' automatically sub-collapses oldest chains before evaluating context overflow (default: standard)", + ), + trigger: z + .number() + .min(0) + .max(1) + .optional() + .describe("Trigger compaction at this fraction of total context (default: 0.85 = 85%)"), + extractRatio: z + .number() + .min(0) + .max(1) + .optional() + .describe("For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)"), + recentRatio: z + .number() + .min(0) + .max(1) + .optional() + .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"), + summaryMaxTokens: z + .number() + .min(1000) + .max(50000) + .optional() + .describe("For collapse mode: target token count for the summary output (default: 10000)"), + previousSummaries: z + .number() + .min(0) + .max(10) + .optional() + .describe("For collapse mode: number of previous summaries to include for context merging (default: 3)"), + insertTriggers: z + .boolean() + .optional() + .describe( + "Whether to insert compaction trigger messages in the stream. Standard compaction needs triggers (default: true), collapse compaction does not (default: false)", + ), + splitChain: z + .boolean() + .optional() + .describe( + "For collapse mode: allow inserting breakpoints in the middle of chains (default: true). When false, breakpoints only occur at chain boundaries to preserve conversation flow.", + ), + splitChainMinThreshold: z + .number() + .min(0) + .max(1) + .optional() + .describe( + "For collapse mode with splitChain=true: minimum fraction of extractTarget that must be covered when rewinding to chain boundary before falling back to mid-chain split (default: 0.75). E.g. 0.75 means the rewind must still extract at least 75% of the token target to be accepted.", + ), + float: z + .object({ + chainThreshold: z + .number() + .min(1) + .max(20) + .optional() + .describe("Number of chains before triggering sub-collapse on oldest chain (default: 3)"), + minFloat: z + .number() + .min(0) + .max(1) + .optional() + .describe( + "Minimum fraction of context window that must be used before sub-collapse chains are evaluated (default: 0.6 = 60%). Sub-collapse is skipped entirely when context usage is below this threshold, and stops between chains if usage drops below it.", + ), + algorithm: z + .enum(["full", "bookend", "minimal"]) + .optional() + .describe( + "Sub-collapse algorithm: 'full' includes all context, 'bookend' focuses on user request + final response + tools, 'minimal' uses only final response (default: bookend)", + ), + subCollapseSummaryMaxTokens: z + .number() + .min(500) + .max(20000) + .optional() + .describe("Target token count for sub-collapse summaries (default: 5000)"), + }) + .optional() + .describe("Float mode settings for automatic chain sub-collapse"), reserved: z .number() .int() @@ -1147,6 +1234,26 @@ export namespace Config { .describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."), }) .optional(), + knowledge: z + .object({ + enabled: z.boolean().optional().describe("Enable knowledge pack injection (default: true)"), + paths: z + .array(z.string()) + .optional() + .describe("Additional directories to scan for .yaml knowledge pack files"), + packs: z + .array( + z.object({ + name: z.string().describe("Knowledge pack name"), + version: z.string().describe("Knowledge pack version"), + enabled: z.boolean().describe("Whether to enable this knowledge pack by default"), + }), + ) + .optional() + .describe("Knowledge packs to enable or disable by default"), + }) + .optional() + .describe("Knowledge pack settings"), experimental: z .object({ disable_paste_summary: z.boolean().optional(), @@ -1287,8 +1394,13 @@ export namespace Config { return global() } + /** Read only the local project config file (not merged with global). */ + export async function getProject() { + return loadFile(path.join(Instance.worktree, ".opencode", "opencode.json")) + } + export async function update(config: Info) { - const filepath = path.join(Instance.directory, "config.json") + const filepath = path.join(Instance.worktree, ".opencode", "opencode.json") const existing = await loadFile(filepath) await Filesystem.writeJson(filepath, mergeDeep(existing, config)) await Instance.dispose() diff --git a/packages/opencode/src/id/id.ts b/packages/opencode/src/id/id.ts index 6673297cbfac..af0da8f3b365 100644 --- a/packages/opencode/src/id/id.ts +++ b/packages/opencode/src/id/id.ts @@ -19,6 +19,7 @@ export namespace Identifier { } const LENGTH = 26 + const TIME_BYTES = 6 // State for monotonic ID generation let lastTimestamp = 0 @@ -66,12 +67,12 @@ export namespace Identifier { now = descending ? ~now : now - const timeBytes = Buffer.alloc(6) - for (let i = 0; i < 6; i++) { - timeBytes[i] = Number((now >> BigInt(40 - 8 * i)) & BigInt(0xff)) + const timeBytes = Buffer.alloc(TIME_BYTES) + for (let i = 0; i < TIME_BYTES; i++) { + timeBytes[i] = Number((now >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff)) } - return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - 12) + return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2) } /** Extract timestamp from an ascending ID. Does not work with descending IDs. */ @@ -81,4 +82,73 @@ export namespace Identifier { const encoded = BigInt("0x" + hex) return Number(encoded / BigInt(0x1000)) } + + /** + * Insert an ID that sorts after afterId, and optionally before beforeId. + * + * If beforeId is provided and there's a gap, the new ID will sort between them. + * Otherwise, the new ID will sort immediately after afterId. + * + * @param afterId - The ID that the new ID must sort AFTER + * @param beforeId - Optional ID that the new ID should sort BEFORE (if gap exists) + * @param prefix - The prefix for the new ID (e.g., "message", "part") + */ + export function insert(afterId: string, beforeId: string | undefined, prefix: keyof typeof prefixes): string { + const underscoreIndex = afterId.indexOf("_") + if (underscoreIndex === -1) { + throw new Error(`Invalid afterId: ${afterId}`) + } + + const afterHex = afterId.slice(underscoreIndex + 1, underscoreIndex + 1 + TIME_BYTES * 2) + const afterValue = BigInt("0x" + afterHex) + + let newValue: bigint + + if (beforeId) { + const beforeUnderscoreIndex = beforeId.indexOf("_") + if (beforeUnderscoreIndex !== -1) { + const beforeHex = beforeId.slice(beforeUnderscoreIndex + 1, beforeUnderscoreIndex + 1 + TIME_BYTES * 2) + if (/^[0-9a-f]+$/i.test(beforeHex)) { + const beforeValue = BigInt("0x" + beforeHex) + const gap = beforeValue - afterValue + if (gap > BigInt(1)) { + // Insert in the middle of the gap + newValue = afterValue + gap / BigInt(2) + } else { + // Gap too small, create after afterId + newValue = afterValue + BigInt(0x1000) + BigInt(1) + } + } else { + newValue = afterValue + BigInt(0x1000) + BigInt(1) + } + } else { + newValue = afterValue + BigInt(0x1000) + BigInt(1) + } + } else { + // No beforeId, create after afterId + newValue = afterValue + BigInt(0x1000) + BigInt(1) + } + + const timeBytes = Buffer.alloc(TIME_BYTES) + for (let i = 0; i < TIME_BYTES; i++) { + timeBytes[i] = Number((newValue >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff)) + } + + return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2) + } + + /** + * Generate a pair of IDs (message + part) that both sort between afterId and beforeId. + * Used when copying a user message mid-chain: the message ID sorts between the two + * anchors, and the part ID sorts just after the message ID. + * + * @param afterId - The ID that the new IDs must sort AFTER + * @param beforeId - Optional ID that the new IDs should sort BEFORE + */ + export function insertCopy(afterId: string, beforeId: string | undefined): { messageID: string; partID: string } { + const messageID = insert(afterId, beforeId, "message") + // Part ID sorts just after the message ID, still before beforeId + const partID = insert(messageID, beforeId, "part") + return { messageID, partID } + } } diff --git a/packages/opencode/src/server/routes/session.ts b/packages/opencode/src/server/routes/session.ts index 12938aeaba04..8daa7c405c20 100644 --- a/packages/opencode/src/server/routes/session.ts +++ b/packages/opencode/src/server/routes/session.ts @@ -4,6 +4,7 @@ import { describeRoute, validator, resolver } from "hono-openapi" import z from "zod" import { Session } from "../../session" import { MessageV2 } from "../../session/message-v2" +import { KnowledgePack } from "../../session/knowledge-pack" import { SessionPrompt } from "../../session/prompt" import { SessionCompaction } from "../../session/compaction" import { SessionRevert } from "../../session/revert" @@ -16,9 +17,61 @@ import { Log } from "../../util/log" import { PermissionNext } from "@/permission/next" import { errors } from "../error" import { lazy } from "../../util/lazy" +import { SessionProxyMiddleware } from "../../control-plane/session-proxy-middleware" +import { Config } from "../../config/config" const log = Log.create({ service: "server" }) +type KPEntry = { name: string; version: string; enabled: boolean } + +/** + * When the user ADDS a knowledge pack via the sidebar we must ensure the + * local project config reflects the full desired pack list. + * + * opencode does not merge the `knowledge.packs` array between global and + * project configs — once the project config defines that key the global + * array is completely ignored. So before writing any local change we first + * mirror every globally-enabled pack into the project file, then apply the + * addition on top. This matches the behaviour of `--kp-add` in the + * utils/coder CLI tool. + */ +async function addProjectKnowledgePack(name: string, version: string) { + const [global, project] = await Promise.all([Config.getGlobal(), Config.getProject()]) + // Start from whatever the project file already has. + const local: KPEntry[] = project.knowledge?.packs ?? [] + const byKey = new Map(local.map((p) => [`${p.name}@${p.version}`, p])) + // Mirror globally-enabled packs that are not yet in the project file. + for (const gp of global.knowledge?.packs ?? []) { + if (!gp.enabled) continue + const key = `${gp.name}@${gp.version}` + if (!byKey.has(key)) { + byKey.set(key, { name: gp.name, version: gp.version, enabled: true }) + log.info("knowledge pack: mirroring global pack to project config", { name: gp.name, version: gp.version }) + } + } + // Add the requested pack (or re-enable if already present but disabled). + const key = `${name}@${version}` + byKey.set(key, { name, version, enabled: true }) + await Config.update({ knowledge: { packs: [...byKey.values()] } }) +} + +/** + * When the user REMOVES a knowledge pack via the sidebar we only touch the + * project config file — we do NOT mirror global packs, because the user only + * asked to remove one specific pack. The entry is deleted entirely (not + * marked disabled) so it cleanly disappears from future sessions. + * + * This matches the behaviour of `--kp-remove` in the utils/coder CLI tool. + */ +async function removeProjectKnowledgePack(name: string, version: string) { + const project = await Config.getProject() + const existing = project.knowledge?.packs + if (!existing?.length) return + const packs = existing.filter((p) => !(p.name === name && p.version === version)) + if (packs.length === existing.length) return + await Config.update({ knowledge: { packs } }) +} + export const SessionRoutes = lazy(() => new Hono() .get( @@ -618,6 +671,174 @@ export const SessionRoutes = lazy(() => return c.json(message) }, ) + .get( + "/:sessionID/knowledge-packs", + describeRoute({ + summary: "List knowledge packs", + description: "Get all knowledge pack messages injected into a session.", + operationId: "session.knowledgePacks", + responses: { + 200: { + description: "Knowledge packs", + content: { + "application/json": { + schema: resolver( + z.array( + z.object({ + id: z.string(), + name: z.string(), + displayName: z.string(), + version: z.string(), + }), + ), + ), + }, + }, + }, + ...errors(400), + }, + }), + validator( + "param", + z.object({ + sessionID: z.string().meta({ description: "Session ID" }), + }), + ), + async (c) => { + const { sessionID } = c.req.valid("param") + const [msgs, available] = await Promise.all([KnowledgePack.fromSession(sessionID), KnowledgePack.available()]) + const library = new Map(available.map((p) => [p.name + "@" + p.version, p])) + const result = msgs.map((msg) => { + const user = msg.info as MessageV2.User + const key = user.agent.startsWith("kp:") ? user.agent.slice(3) : user.agent + const pack = library.get(key) + const [name, version] = key.split("@") + return { + id: msg.info.id, + name, + displayName: pack?.displayName ?? pack?.name ?? name, + version: pack?.version ?? version, + } + }) + return c.json(result) + }, + ) + .get( + "/:sessionID/knowledge-packs/available", + describeRoute({ + summary: "List available knowledge packs", + description: + "Get all knowledge packs available in the library directory (~/.config/opencode/llm_knowledge_packs/).", + operationId: "session.knowledgePacksAvailable", + responses: { + 200: { + description: "Available knowledge packs", + content: { + "application/json": { + schema: resolver( + z.array( + z.object({ + name: z.string(), + displayName: z.string(), + version: z.string(), + enabled: z.boolean(), + }), + ), + ), + }, + }, + }, + ...errors(400), + }, + }), + validator( + "param", + z.object({ + sessionID: z.string().meta({ description: "Session ID" }), + }), + ), + async (c) => { + const { sessionID } = c.req.valid("param") + const [available, active] = await Promise.all([KnowledgePack.available(), KnowledgePack.fromSession(sessionID)]) + const activeKeys = new Set( + active.map((msg) => { + const user = msg.info as MessageV2.User + return user.agent.startsWith("kp:") ? user.agent.slice(3) : user.agent + }), + ) + return c.json( + available.map((p) => ({ + name: p.name, + displayName: p.displayName ?? p.name, + version: p.version, + enabled: activeKeys.has(p.name + "@" + p.version), + })), + ) + }, + ) + .post( + "/:sessionID/knowledge-packs/:name/:version", + describeRoute({ + summary: "Add a knowledge pack to session", + description: "Inject a knowledge pack from the library into the session.", + operationId: "session.knowledgePackAdd", + responses: { + 200: { + description: "Knowledge pack added", + content: { "application/json": { schema: resolver(z.boolean()) } }, + }, + ...errors(400, 404), + }, + }), + validator( + "param", + z.object({ + sessionID: z.string().meta({ description: "Session ID" }), + name: z.string().meta({ description: "Knowledge pack name" }), + version: z.string().meta({ description: "Knowledge pack version" }), + }), + ), + async (c) => { + const { sessionID, name, version } = c.req.valid("param") + await KnowledgePack.add({ sessionID, name, version }) + // Persist the addition to the local project config so future sessions + // also start with this pack enabled. Global packs are mirrored into + // the project file first so they are not silently dropped. + await addProjectKnowledgePack(name, version) + return c.json(true) + }, + ) + .delete( + "/:sessionID/knowledge-packs/:name/:version", + describeRoute({ + summary: "Remove a knowledge pack from session", + description: "Remove an injected knowledge pack from the session.", + operationId: "session.knowledgePackRemove", + responses: { + 200: { + description: "Knowledge pack removed", + content: { "application/json": { schema: resolver(z.boolean()) } }, + }, + ...errors(400, 404), + }, + }), + validator( + "param", + z.object({ + sessionID: z.string().meta({ description: "Session ID" }), + name: z.string().meta({ description: "Knowledge pack name" }), + version: z.string().meta({ description: "Knowledge pack version" }), + }), + ), + async (c) => { + const { sessionID, name, version } = c.req.valid("param") + await KnowledgePack.remove({ sessionID, name, version }) + // Persist the removal to the local project config (entry deleted + // entirely, no global mirroring — matches --kp-remove behaviour). + await removeProjectKnowledgePack(name, version) + return c.json(true) + }, + ) .delete( "/:sessionID/message/:messageID", describeRoute({ diff --git a/packages/opencode/src/session/compaction-extension.ts b/packages/opencode/src/session/compaction-extension.ts new file mode 100644 index 000000000000..a473bc9d0854 --- /dev/null +++ b/packages/opencode/src/session/compaction-extension.ts @@ -0,0 +1,1919 @@ +import { Session } from "." +import { Identifier } from "../id/id" +import { Instance } from "../project/instance" +import { Provider } from "../provider/provider" +import { MessageV2 } from "./message-v2" +import { Token } from "../util/token" +import { Log } from "../util/log" +import { SessionProcessor } from "./processor" +import { Agent } from "@/agent/agent" +import { Plugin } from "@/plugin" +import { Config } from "@/config/config" +import { Global } from "@/global" +import { Bus } from "@/bus" +import { SessionCompaction } from "./compaction" +import { ProviderTransform } from "../provider/transform" +import { KnowledgePack } from "./knowledge-pack" +import path from "path" + +/** + * Compaction Extension Module + * + * This module implements extended compaction modes beyond the standard compaction. + * Currently includes "collapse" and "float" modes. + * + * Collapse mode features: + * - Selective compression: Only compresses OLD messages, keeps recent work intact + * - Historical summary merging: Merges previous summaries into new ones (no info loss) + * - Breakpoint insertion: Places summary at correct position in message timeline + * - splitChain control: When false (default), breakpoints only at chain boundaries + * + * Float mode features: + * - Automatic chain sub-collapse before evaluating context overflow + * - Preserves high-fidelity summaries of individual chains + * - Configurable chain threshold before triggering sub-collapse + * + * This file is designed to be self-contained for easy rebasing when upstream changes. + * + * DEBUG: All debug logging uses "COLLAPSE" tag for easy grep filtering: + * tail -f ~/.local/share/opencode/log/dev.log | grep COLLAPSE + */ + +export namespace CompactionExtension { + const log = Log.create({ service: "session.compaction.extension" }) + + // Sub-collapse algorithm types + export type SubCollapseAlgorithm = "full" | "bookend" | "minimal" + + // Default configuration values + export const DEFAULTS = { + method: "standard" as const, + trigger: 0.85, // Trigger at 85% of usable context to leave headroom + extractRatio: 0.65, + recentRatio: 0.15, + summaryMaxTokens: 10000, // Target token count for collapse summary + previousSummaries: 3, // Number of previous summaries to include in collapse + splitChain: true, // Allow breakpoints mid-chain by default + splitChainMinThreshold: 0.75, // Min fraction of extractTarget required when rewinding to chain start; below this, fall back to mid-chain split + float: { + chainThreshold: 3, // Number of chains before sub-collapse triggers + minFloat: 0.6, // Minimum context used fraction required before sub-collapse is evaluated (60%) + algorithm: "bookend" as SubCollapseAlgorithm, + subCollapseSummaryMaxTokens: 5000, + }, + } + + /** + * Chain information for sub-collapse + */ + export interface ChainInfo { + /** Index of the user message that starts the chain */ + userMessageIndex: number + /** Indices of all assistant messages in the chain */ + assistantMessageIndices: number[] + /** All message indices in the chain */ + allMessageIndices: number[] + /** Total estimated tokens in the chain */ + chainTokens: number + /** User message ID */ + userMessageId: string + } + + // Build collapse prompt instructions (tokenTarget is optional for estimation) + function collapseInstructions(tokenTarget?: number, knowledgePacks?: { name: string; text: string }[]): string { + const targetClause = tokenTarget ? ` (target: approximately ${tokenTarget} tokens)` : "" + + const kpSection = + knowledgePacks && knowledgePacks.length > 0 + ? `\n\nKnowledge Packs (PERSISTENT -- always injected into every conversation, never compacted away): +${knowledgePacks.map((kp) => `- ${kp.name}`).join("\n")} + +These knowledge packs are permanently present in every conversation. Do NOT summarize or repeat content that is already covered by a knowledge pack -- it wastes tokens and will always be there anyway. + +EXCEPTION: If the conversation explicitly overrides, disables, or modifies instructions from a knowledge pack, you MUST capture that override precisely -- reference the knowledge pack by name and state exactly what was changed or overridden. For example: "User overrode coder-mcp-tools: do not use coder snapshot tool for this project, use direct file reads instead."` + : "" + + return `You are creating a comprehensive context restoration document. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost. + +Create a detailed summary${targetClause} with these sections: +1. Current Task State - what is being worked on, next steps, blockers +2. Resolved Code & Lessons Learned - working code verbatim, failed approaches, insights +3. User Directives - explicit preferences, style rules, things to always/never do +4. Custom Utilities & Commands - scripts, aliases, debugging commands +5. Design Decisions & Derived Requirements - architecture decisions, API contracts, patterns +6. Technical Facts - file paths, function names, config values, environment details${kpSection} + +Critical rules: +- PRESERVE working code verbatim in fenced blocks +- INCLUDE failed approaches with explanations +- Be specific with paths, line numbers, function names +- Capture the "why" behind decisions +- User directives are sacred - never omit them` + } + + /** + * Get the compaction method. + * Priority: TUI toggle (kv.json) > config file > default + */ + export async function getMethod(): Promise<"standard" | "collapse" | "float"> { + const config = await Config.get() + const configMethod = config.compaction?.method + + // Check TUI toggle override + try { + const file = Bun.file(path.join(Global.Path.state, "kv.json")) + if (await file.exists()) { + const kv = await file.json() + const toggle = kv["compaction_method"] + if (toggle === "standard" || toggle === "collapse" || toggle === "float") { + log.info("COLLAPSE getMethod kv override", { method: toggle }) + return toggle + } + } + } catch { + // Ignore KV read errors + } + + log.info("COLLAPSE getMethod", { method: configMethod ?? DEFAULTS.method }) + return configMethod ?? DEFAULTS.method + } + + /** + * Check if context is overflowing based on collapse trigger threshold. + * Uses configurable trigger ratio instead of fixed context-output calculation. + */ + export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { + const config = await Config.get() + if (config.compaction?.auto === false) { + log.debug("COLLAPSE isOverflow auto=false, skipping") + return false + } + const context = input.model.limit.context + if (context === 0) { + log.debug("COLLAPSE isOverflow context=0, skipping") + return false + } + + const count = input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output + const trigger = config.compaction?.trigger ?? DEFAULTS.trigger + const threshold = context * trigger + const isOver = count > threshold + + log.info("COLLAPSE isOverflow", { + tokenCount: count, + contextLimit: context, + trigger, + threshold: Math.floor(threshold), + isOver, + input: input.tokens.input, + cacheRead: input.tokens.cache.read, + cacheWrite: input.tokens.cache.write, + output: input.tokens.output, + }) + + return isOver + } + + /** + * Collapse compaction: Extract oldest messages, distill with AI, insert summary at breakpoint. + * Messages before the breakpoint are filtered out by filterCompacted(). + */ + export async function process(input: { + parentID: string + messages: MessageV2.WithParts[] + sessionID: string + abort: AbortSignal + auto: boolean + }): Promise<"continue" | "stop"> { + const config = await Config.get() + const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio + const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio + const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens + const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries + const splitChain = config.compaction?.splitChain ?? DEFAULTS.splitChain + const splitChainMinThreshold = config.compaction?.splitChainMinThreshold ?? DEFAULTS.splitChainMinThreshold + + const method = await getMethod() + log.info("COLLAPSE begin", { + sessionID: input.sessionID, + method, + auto: input.auto, + splitChain, + messages: input.messages.length, + parentID: input.parentID, + }) + + // Get the user message to determine which model we'll use + const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User + const agent = await Agent.get("compaction") + const model = agent.model + ? await Provider.getModel(agent.model.providerID, agent.model.modelID) + : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID) + + // Calculate token counts and role counts + let messageTokens: number[] = [] + let totalTokens = 0 + let userCount = 0 + let assistantCount = 0 + // Track tokens saved by inline sub-collapse so the final token adjustment + // accounts for BOTH the sub-collapse savings AND the main collapse extract. + // Without this, extractedTokens only covers the (tiny) post-sub-collapse + // extract range, and isOverflow still sees high token counts on the next loop. + let subCollapseSavedTokens = 0 + for (const msg of input.messages) { + const estimate = estimateMessageTokens(msg) + messageTokens.push(estimate) + totalTokens += estimate + if (msg.info.role === "user") userCount++ + else if (msg.info.role === "assistant") assistantCount++ + } + + // Check if first message is a breakpoint (existing compaction) or new conversation + const firstMessage = input.messages[0] + const isBreakpoint = + firstMessage?.info.role === "assistant" && (firstMessage.info as MessageV2.Assistant).mode === "compaction" + + log.info("COLLAPSE context analysis", { + sessionID: input.sessionID, + messages: input.messages.length, + tokens: totalTokens, + user: userCount, + assistant: assistantCount, + firstMessageId: firstMessage?.info.id, + chainType: isBreakpoint ? "breakpoint" : "new", + splitChain, + }) + + // Calculate extraction targets + let extractTarget = Math.floor(totalTokens * extractRatio) + let recentTarget = Math.floor(totalTokens * recentRatio) + + log.debug("COLLAPSE extraction targets", { + sessionID: input.sessionID, + extractRatio, + extractTarget, + recentRatio, + recentTarget, + totalTokens, + }) + + /** + * Helper: if message at index has a parentID pointing to an earlier message, + * return the parent's index. Always checks regardless of splitChain — the + * caller decides what to do with the result based on splitChain and threshold. + */ + function findChainStart(index: number): number | undefined { + if (index <= 0 || index >= input.messages.length) return undefined + const msg = input.messages[index] + if (msg.info.role !== "assistant") return undefined + const parentID = (msg.info as MessageV2.Assistant).parentID + if (!parentID) return undefined + const parentIndex = input.messages.findIndex((m) => m.info.id === parentID) + if (parentIndex >= 0 && parentIndex < index) return parentIndex + return undefined + } + + /** + * Helper: if message at index has a parentID, return the parent's index. + * Respects splitChain: returns undefined when splitChain=true (allowing mid-chain splits). + * Used for the recent split boundary which does NOT have a min-threshold fallback. + */ + function findChainStartRespectingSplit(index: number): number | undefined { + if (splitChain) return undefined + return findChainStart(index) + } + + // Find split points + let extractedTokens = 0 + let extractSplitIndex = 0 + for (let i = 0; i < input.messages.length; i++) { + if (extractedTokens >= extractTarget) break + extractedTokens += messageTokens[i] + extractSplitIndex = i + 1 + } + + log.debug("COLLAPSE initial extract split", { + sessionID: input.sessionID, + extractSplitIndex, + extractedTokens, + extractTarget, + splitAtMessageId: input.messages[extractSplitIndex]?.info.id, + splitAtRole: input.messages[extractSplitIndex]?.info.role, + splitAtParentID: + input.messages[extractSplitIndex]?.info.role === "assistant" + ? (input.messages[extractSplitIndex].info as MessageV2.Assistant).parentID + : undefined, + }) + + // Ensure extract split is not in the middle of a chain (unless splitChain=true + // AND the rewind would not meet the min threshold). + // + // Algorithm: + // 1. Always check for a blocking chain at the extract boundary + // 2. If blocking: compute how many tokens the rewind would yield + // 3. If rewound tokens >= splitChainMinThreshold * extractTarget: accept the rewind + // 4. If rewound tokens < threshold AND splitChain=true: keep mid-chain split (Fix 2) + // 5. If rewound tokens < threshold AND splitChain=false: attempt sub-collapse + const originalExtractSplitIndex = extractSplitIndex + const extractChainStart = findChainStart(extractSplitIndex) + + // Run chain detection here so we can log the full chain landscape regardless + // of whether splitChain is true or false. This helps diagnose mid-chain splits. + const allChains = detectChains(input.messages) + log.debug("COLLAPSE chain landscape at extract boundary", { + sessionID: input.sessionID, + splitChain, + splitChainMinThreshold, + extractSplitIndex, + extractChainStart: extractChainStart ?? "(none - no chain at boundary)", + totalChains: allChains.length, + chains: allChains.map((c) => ({ + userIndex: c.userMessageIndex, + userId: c.userMessageId, + assistantCount: c.assistantMessageIndices.length, + firstAssistantIdx: c.assistantMessageIndices[0], + lastAssistantIdx: c.assistantMessageIndices[c.assistantMessageIndices.length - 1], + tokens: c.chainTokens, + containsExtractBoundary: + c.userMessageIndex <= extractSplitIndex && + (c.assistantMessageIndices[c.assistantMessageIndices.length - 1] ?? c.userMessageIndex) >= extractSplitIndex, + })), + }) + if (extractChainStart !== undefined) { + // Compute tokens that the rewind-to-chain-start would yield + let rewoundTokens = 0 + for (let i = 0; i < extractChainStart; i++) rewoundTokens += messageTokens[i] + const minRequired = splitChainMinThreshold * extractTarget + const rewindMeetsThreshold = rewoundTokens >= minRequired + + log.info("COLLAPSE extract split lands in chain, evaluating options", { + sessionID: input.sessionID, + originalIndex: extractSplitIndex, + chainStart: extractChainStart, + extractedTokens, + rewoundTokens, + minRequired, + rewindMeetsThreshold, + splitChain, + }) + + if (rewindMeetsThreshold) { + // Rewind is good enough — accept chain boundary, behave like splitChain=false + log.info("COLLAPSE rewinding to chain boundary (meets threshold)", { + sessionID: input.sessionID, + extractSplitIndex, + chainStart: extractChainStart, + rewoundTokens, + minRequired, + }) + for (let i = extractChainStart; i < extractSplitIndex; i++) { + extractedTokens -= messageTokens[i] + } + extractSplitIndex = extractChainStart + } else if (!splitChain) { + // Rewind doesn't meet threshold AND splitChain=false: attempt sub-collapse + const chains = detectChains(input.messages) + const blockingChain = chains.find( + (c) => c.userMessageIndex === extractChainStart || c.allMessageIndices.includes(extractChainStart), + ) + + if (blockingChain && blockingChain.assistantMessageIndices.length >= 2) { + log.info("COLLAPSE sub-collapsing blocking chain before extract", { + sessionID: input.sessionID, + chainUserIndex: blockingChain.userMessageIndex, + chainUserMessageId: blockingChain.userMessageId, + assistantCount: blockingChain.assistantMessageIndices.length, + chainTokens: blockingChain.chainTokens, + }) + + const subResult = await executeSubCollapse({ + sessionID: input.sessionID, + messages: input.messages, + chain: blockingChain, + abort: input.abort, + }) + + if (subResult.status === "success") { + log.info("COLLAPSE blocking chain sub-collapsed, reloading and fixing extract range", { + sessionID: input.sessionID, + summaryMessageId: subResult.summaryMessageId, + chainUserMessageId: subResult.chainUserMessageId, + summaryTokens: subResult.summaryTokens, + }) + + subCollapseSavedTokens = blockingChain.chainTokens - (subResult.summaryTokens ?? 0) + log.info("COLLAPSE sub-collapse saved tokens", { + sessionID: input.sessionID, + chainTokens: blockingChain.chainTokens, + summaryTokens: subResult.summaryTokens, + savedTokens: subCollapseSavedTokens, + }) + + const filteredMessages = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + const summaryIdx = filteredMessages.findIndex( + (m: MessageV2.WithParts) => m.info.id === subResult.summaryMessageId, + ) + + if (summaryIdx >= 0) { + input.messages = filteredMessages + extractSplitIndex = summaryIdx + 1 + messageTokens = input.messages.map((m) => estimateMessageTokens(m)) + totalTokens = messageTokens.reduce((a, b) => a + b, 0) + extractedTokens = 0 + for (let i = 0; i < extractSplitIndex; i++) extractedTokens += messageTokens[i] + extractTarget = Math.floor(totalTokens * extractRatio) + recentTarget = Math.floor(totalTokens * recentRatio) + log.info("COLLAPSE extract range fixed after sub-collapse", { + sessionID: input.sessionID, + extractSplitIndex, + extractedTokens, + totalMessages: input.messages.length, + totalTokens, + }) + } else { + log.warn("COLLAPSE could not find sub-collapse summary in reloaded messages, returning continue", { + sessionID: input.sessionID, + summaryMessageId: subResult.summaryMessageId, + }) + return "continue" + } + } else { + log.error("COLLAPSE blocking chain sub-collapse failed, falling back to rewind", { + sessionID: input.sessionID, + }) + for (let i = extractChainStart; i < extractSplitIndex; i++) extractedTokens -= messageTokens[i] + extractSplitIndex = extractChainStart + } + } else { + // No suitable chain for sub-collapse, rewind anyway + for (let i = extractChainStart; i < extractSplitIndex; i++) extractedTokens -= messageTokens[i] + extractSplitIndex = extractChainStart + } + } else { + // splitChain=true and rewind doesn't meet threshold: keep mid-chain split (Fix 2) + log.info("COLLAPSE keeping mid-chain split (rewind below threshold, splitChain=true)", { + sessionID: input.sessionID, + extractSplitIndex, + chainStart: extractChainStart, + rewoundTokens, + minRequired, + }) + // splitChain mid-chain split: beforeId will be set below in the splitChain block + } + } + + let recentTokens = 0 + let recentSplitIndex = input.messages.length + for (let i = input.messages.length - 1; i >= 0; i--) { + if (recentTokens >= recentTarget) break + recentTokens += messageTokens[i] + recentSplitIndex = i + } + + log.debug("COLLAPSE initial recent split", { + sessionID: input.sessionID, + recentSplitIndex, + recentTokens, + recentTarget, + }) + + // Ensure recent split is not in the middle of a chain (unless splitChain=true) + const recentChainStart = findChainStartRespectingSplit(recentSplitIndex) + if (recentChainStart !== undefined) { + log.info("COLLAPSE adjusting recent split for chain boundary", { + sessionID: input.sessionID, + originalIndex: recentSplitIndex, + adjustedIndex: recentChainStart, + }) + for (let i = recentChainStart; i < recentSplitIndex; i++) { + recentTokens += messageTokens[i] + } + recentSplitIndex = recentChainStart + } + + // Ensure recent split doesn't overlap with extract + if (recentSplitIndex <= extractSplitIndex) { + log.debug("COLLAPSE recent/extract overlap, adjusting", { + sessionID: input.sessionID, + recentSplitIndex, + extractSplitIndex, + }) + recentSplitIndex = extractSplitIndex + } + + const extractedMessages = input.messages.slice(0, extractSplitIndex) + const middleMessages = input.messages.slice(extractSplitIndex, recentSplitIndex) + const recentReferenceMessages = input.messages.slice(recentSplitIndex) + + // Calculate middle section tokens + let middleTokens = 0 + for (let i = extractSplitIndex; i < recentSplitIndex; i++) { + middleTokens += messageTokens[i] + } + + log.info("COLLAPSE split result", { + sessionID: input.sessionID, + total: { messages: input.messages.length, tokens: totalTokens }, + extract: { + messages: extractedMessages.length, + tokens: extractedTokens, + range: `[0..${extractSplitIndex - 1}]`, + lastMsgId: extractedMessages[extractedMessages.length - 1]?.info.id, + lastMsgRole: extractedMessages[extractedMessages.length - 1]?.info.role, + }, + middle: { + messages: middleMessages.length, + tokens: middleTokens, + range: `[${extractSplitIndex}..${recentSplitIndex - 1}]`, + }, + recent: { + messages: recentReferenceMessages.length, + tokens: recentTokens, + range: `[${recentSplitIndex}..${input.messages.length - 1}]`, + }, + splitChain, + midChainSplit: + extractedMessages.length > 0 && + extractedMessages[extractedMessages.length - 1].info.role === "assistant" && + middleMessages.length > 0 && + middleMessages[0].info.role === "assistant" && + (middleMessages[0].info as MessageV2.Assistant).parentID === + (extractedMessages[extractedMessages.length - 1].info as MessageV2.Assistant).parentID, + }) + + if (extractedMessages.length === 0) { + // Chain rewind eliminated the entire extract range and sub-collapse either + // was not applicable or already failed above. Stop to prevent infinite loop. + log.info("COLLAPSE skipped - no messages to extract after chain handling", { + sessionID: input.sessionID, + }) + return "stop" + } + + // Convert extracted messages to markdown for distillation + const markdownContent = messagesToMarkdown(extractedMessages) + const recentContext = messagesToMarkdown(recentReferenceMessages) + + // Build base prompt (without previous summaries) to calculate token budget + const markdownTokens = Token.estimate(markdownContent) + const recentTokensEstimate = Token.estimate(recentContext) + const templateTokens = Token.estimate(collapseInstructions()) + const basePromptTokens = markdownTokens + recentTokensEstimate + templateTokens + const contextLimit = model.limit.context + const outputReserve = ProviderTransform.maxOutputTokens(model) + const previousSummaryBudget = Math.max(0, contextLimit - outputReserve - basePromptTokens) + + // Fetch previous summaries that fit within budget + const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit, previousSummaryBudget) + + // Get the last extracted message to determine breakpoint position + const lastExtractedMessage = extractedMessages[extractedMessages.length - 1] + let afterId = lastExtractedMessage.info.id + let beforeId: string | undefined + let breakpointTimestamp = lastExtractedMessage.info.time.created + 1 + + log.debug("COLLAPSE breakpoint initial position", { + sessionID: input.sessionID, + lastExtractedId: lastExtractedMessage.info.id, + afterId, + breakpointTimestamp, + }) + + // When splitChain is false, check if any message after the split has a parentID + // (is part of a chain). If so, the compaction must sort BEFORE that parent to + // keep the chain together. + // + // When splitChain is true, the breakpoint stays where the token walk placed it + // (mid-chain). The next message after the split becomes the beforeId anchor so + // Identifier.insert produces an ID that sorts correctly between the two messages. + if (splitChain) { + // Mid-chain split: anchor the breakpoint between lastExtractedMessage and + // the first message remaining in context + const firstRemaining = input.messages[extractSplitIndex] + if (firstRemaining) { + beforeId = firstRemaining.info.id + } + log.info("COLLAPSE splitChain=true, breakpoint stays mid-chain", { + sessionID: input.sessionID, + afterId, + beforeId: beforeId ?? "(none)", + breakpointTimestamp, + }) + } else { + const messagesAfterSplit = input.messages.slice(extractSplitIndex) + for (const msg of messagesAfterSplit) { + if (msg.info.role === "assistant") { + const parentID = (msg.info as MessageV2.Assistant).parentID + if (parentID) { + // Find the message that sorts just before the parent + // Use direct string comparison (not localeCompare) for consistent case-sensitive ordering + const sortedMessages = [...input.messages].sort((a, b) => + a.info.id < b.info.id ? -1 : a.info.id > b.info.id ? 1 : 0, + ) + const parentIndex = sortedMessages.findIndex((m) => m.info.id === parentID) + + if (parentIndex > 0) { + afterId = sortedMessages[parentIndex - 1].info.id + beforeId = parentID + + const parent = input.messages.find((m) => m.info.id === parentID) + if (parent) { + breakpointTimestamp = parent.info.time.created - 1 + } + + log.info("COLLAPSE breakpoint adjusted for chain protection", { + sessionID: input.sessionID, + chainMessageId: msg.info.id, + parentID, + afterId, + beforeId, + newTimestamp: breakpointTimestamp, + }) + } + break + } + } + } + } + + // Create compaction user message - sorts after afterId, and before beforeId if possible + const compactionUserId = Identifier.insert(afterId, beforeId, "message") + const compactionUserTimestamp = breakpointTimestamp + + log.info("COLLAPSE inserting breakpoint", { + sessionID: input.sessionID, + splitChain, + afterId, + afterIdRole: input.messages.find((m) => m.info.id === afterId)?.info.role, + afterIdIndex: input.messages.findIndex((m) => m.info.id === afterId), + beforeId: beforeId ?? "(none)", + beforeIdRole: beforeId ? input.messages.find((m) => m.info.id === beforeId)?.info.role : undefined, + beforeIdIndex: beforeId ? input.messages.findIndex((m) => m.info.id === beforeId) : undefined, + breakpointId: compactionUserId, + breakpointTimestamp: compactionUserTimestamp, + extractSplitIndex, + extractedTokens, + totalMessages: input.messages.length, + }) + + const compactionUserMsg = await Session.updateMessage({ + id: compactionUserId, + role: "user", + model: originalUserMessage.model, + sessionID: input.sessionID, + agent: originalUserMessage.agent, + time: { + created: compactionUserTimestamp, + }, + }) + await Session.updatePart({ + id: Identifier.insert(compactionUserId, undefined, "part"), + messageID: compactionUserMsg.id, + sessionID: input.sessionID, + type: "compaction", + auto: input.auto, + }) + + // Create assistant summary message - sorts after compaction user, before beforeId if possible + const compactionAssistantId = Identifier.insert(compactionUserId, beforeId, "message") + const compactionAssistantTimestamp = compactionUserTimestamp + 1 + + const msg = (await Session.updateMessage({ + id: compactionAssistantId, + role: "assistant", + parentID: compactionUserMsg.id, + sessionID: input.sessionID, + mode: "compaction", + agent: "compaction", + summary: true, + path: { + cwd: Instance.directory, + root: Instance.worktree, + }, + cost: 0, + tokens: { + output: 0, + input: 0, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + modelID: model.id, + providerID: model.providerID, + time: { + created: compactionAssistantTimestamp, + }, + })) as MessageV2.Assistant + + const processor = SessionProcessor.create({ + assistantMessage: msg, + sessionID: input.sessionID, + model, + abort: input.abort, + }) + + // Allow plugins to inject context + const compacting = await Plugin.trigger( + "experimental.session.compacting", + { sessionID: input.sessionID }, + { context: [], prompt: undefined }, + ) + + // Build prompt sections - only include what we have + const sections: string[] = [] + + // Load knowledge packs from session for compaction context + const knowledgePacks = await KnowledgePack.loadFromSession(input.sessionID) + + // Instructions + sections.push(collapseInstructions(summaryMaxTokens, knowledgePacks)) + + // Previous summaries + if (previousSummaries.length > 0) { + sections.push(` +IMPORTANT: Merge all information from these previous summaries into your new summary. Do not lose any historical context. + +${previousSummaries.map((summary, i) => `--- Summary ${i + 1} ---\n${summary}`).join("\n\n")} +`) + } + + // Extracted content + sections.push(` +The following conversation content needs to be distilled into the summary: + +${markdownContent} +`) + + // Recent context + sections.push(` +The following is recent context for reference (shows current state): + +${recentContext} +`) + + // Additional plugin context + if (compacting.context.length > 0) { + sections.push(` +${compacting.context.join("\n\n")} +`) + } + + sections.push("Generate the context restoration document now.") + + const collapsePrompt = sections.join("\n\n") + + const result = await processor.process({ + user: originalUserMessage, + agent, + abort: input.abort, + sessionID: input.sessionID, + tools: {}, + system: [], + messages: [ + { + role: "user", + content: [{ type: "text", text: collapsePrompt }], + }, + ], + model, + }) + + // NOTE: We intentionally do NOT add a "Continue if you have next steps" message + // for collapse mode. The collapse summary is just context restoration - the loop + // should exit after the summary is generated so the user can continue naturally. + + if (processor.message.error) { + log.error("COLLAPSE processor error", { sessionID: input.sessionID, error: processor.message.error }) + return "stop" + } + + log.info("COLLAPSE summary generated", { + sessionID: input.sessionID, + summaryTokens: processor.message.tokens.output, + summaryInputTokens: processor.message.tokens.input, + }) + + // Fix 1: When splitChain=true and the extract boundary landed mid-chain, + // assistant messages after the split still have parentID pointing to the + // original chain's user message (now behind the compaction wall). + // detectChains cannot find them as a chain because their user parent is gone. + // + // Solution: insert a duplicate of the original chain's user message just + // before the orphaned tail (between compactionAssistantId and firstRemaining), + // then re-parent all orphaned assistants to this new duplicate user message. + // detectChains will then start from the duplicate user message and walk the + // full orphaned tail as a proper chain, making it eligible for float sub-collapse. + if (splitChain && extractSplitIndex < input.messages.length) { + const firstRemaining = input.messages[extractSplitIndex] + if (firstRemaining && firstRemaining.info.role === "assistant") { + const firstRemainingInfo = firstRemaining.info as MessageV2.Assistant + // Only act if the orphaned tail's parent is in the extracted range + const originalUserMsg = extractedMessages.find((m) => m.info.id === firstRemainingInfo.parentID) + if (originalUserMsg && originalUserMsg.info.role === "user") { + // Insert duplicate user message between compaction summary and first orphaned assistant + const duplicateUserMsgId = await Session.copyUserMessage({ + sessionID: input.sessionID, + source: originalUserMsg, + afterId: compactionAssistantId, + beforeId: firstRemaining.info.id, + }) + // Re-parent all orphaned assistants (those after the compaction breakpoint + // that still point to the original chain user message) to the new duplicate + const breakpointTimestamp = input.messages[extractSplitIndex - 1]?.info.time.created ?? 0 + await Session.reparentChain({ + sessionID: input.sessionID, + oldParentID: originalUserMsg.info.id, + newParentID: duplicateUserMsgId, + afterTimestamp: breakpointTimestamp, + }) + log.info("COLLAPSE mid-chain split: inserted duplicate user anchor and re-parented orphaned tail", { + sessionID: input.sessionID, + originalUserMsgId: originalUserMsg.info.id, + duplicateUserMsgId, + firstRemainingId: firstRemaining.info.id, + breakpointTimestamp, + }) + } + } + } + + // Update token count on the chronologically last assistant message + // so isOverflow() sees the correct post-collapse state. + const allMessages = await Session.messages({ sessionID: input.sessionID }) + const lastAssistant = allMessages + .filter( + (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } => + m.info.role === "assistant" && m.info.id !== msg.id, + ) + .sort((a, b) => b.info.time.created - a.info.time.created)[0] + + if (lastAssistant) { + const collapseSummaryTokens = processor.message.tokens.output + + const currentTotal = + lastAssistant.info.tokens.input + + lastAssistant.info.tokens.cache.read + + lastAssistant.info.tokens.cache.write + + lastAssistant.info.tokens.output + + // extractedTokens covers the main collapse extract range. When a sub-collapse + // ran inline before the main collapse, subCollapseSavedTokens captures the + // additional tokens removed by deleting the chain's assistant messages. + // Both must be subtracted from currentTotal so isOverflow sees the true + // post-compaction token count on the next loop iteration. + const totalExtracted = extractedTokens + subCollapseSavedTokens + const newTotal = Math.max(0, currentTotal - totalExtracted + collapseSummaryTokens) + + log.info("COLLAPSE token adjustment", { + sessionID: input.sessionID, + lastAssistantId: lastAssistant.info.id, + extractedTokens, + subCollapseSavedTokens, + totalExtracted, + summaryTokens: collapseSummaryTokens, + previousTotal: currentTotal, + newTotal, + reduction: currentTotal - newTotal, + }) + + lastAssistant.info.tokens = { + input: 0, + output: lastAssistant.info.tokens.output, + reasoning: lastAssistant.info.tokens.reasoning, + cache: { + read: Math.max(0, newTotal - lastAssistant.info.tokens.output), + write: 0, + }, + } + await Session.updateMessage(lastAssistant.info) + } + + // Count messages in the compacted chain (after compaction) + const remainingMessages = input.messages.length - extractedMessages.length + 2 // +2 for compaction user/assistant + const remainingUser = userCount - extractedMessages.filter((m) => m.info.role === "user").length + 1 + const remainingAssistant = assistantCount - extractedMessages.filter((m) => m.info.role === "assistant").length + 1 + + log.info("COLLAPSE complete", { + sessionID: input.sessionID, + method, + auto: input.auto, + splitChain, + midChainSplit: + extractSplitIndex > 0 && + extractedMessages.length > 0 && + extractedMessages[extractedMessages.length - 1].info.role === "assistant" && + (input.messages[extractSplitIndex]?.info as MessageV2.Assistant | undefined)?.parentID === + (extractedMessages[extractedMessages.length - 1].info as MessageV2.Assistant).parentID, + extracted: { messages: extractedMessages.length, tokens: extractedTokens }, + summary: { tokens: processor.message.tokens.output }, + subCollapseSavedTokens, + tokenReduction: extractedTokens + subCollapseSavedTokens - processor.message.tokens.output, + remaining: { messages: remainingMessages, user: remainingUser, assistant: remainingAssistant }, + breakpointId: compactionUserMsg.id, + result: input.auto ? "continue" : "stop", + }) + + // Delete the original trigger message (created by create()) to prevent + // the loop from picking it up again as a pending compaction task. + // The trigger is the message at input.parentID - we've created a new + // compaction user message at the breakpoint position. + // IMPORTANT: Only delete if parentID is actually a compaction trigger (has compaction part) + // In insertTriggers=false mode (collapse), parentID is the real user message! + if (input.parentID !== compactionUserMsg.id) { + const parentMsg = input.messages.find((m) => m.info.id === input.parentID) + const isCompactionTrigger = parentMsg?.parts.some((p) => p.type === "compaction") + + if (isCompactionTrigger) { + log.info("COLLAPSE cleanup trigger message", { sessionID: input.sessionID, id: input.parentID }) + // Delete parts first + if (parentMsg) { + for (const part of parentMsg.parts) { + await Session.removePart({ + sessionID: input.sessionID, + messageID: input.parentID, + partID: part.id, + }) + } + } + await Session.removeMessage({ + sessionID: input.sessionID, + messageID: input.parentID, + }) + } else { + log.debug("COLLAPSE skipping cleanup - parentID is real user message", { + sessionID: input.sessionID, + id: input.parentID, + }) + } + } + + // Convergence guard: if the collapse summary is at least as large as what was + // extracted, compaction made no progress. Returning "continue" would re-trigger + // the same overflow, creating an infinite loop. Return "stop" instead. + const collapseSummaryTokens = processor.message.tokens.output + log.debug("COLLAPSE convergence check", { + sessionID: input.sessionID, + collapseSummaryTokens, + extractedTokens, + subCollapseSavedTokens, + totalExtracted: extractedTokens + subCollapseSavedTokens, + netReduction: extractedTokens - collapseSummaryTokens, + converging: collapseSummaryTokens < extractedTokens, + splitChain, + extractSplitIndex, + totalMessages: input.messages.length, + }) + if (collapseSummaryTokens >= extractedTokens) { + log.warn("COLLAPSE summary larger than extracted content, stopping to prevent loop", { + sessionID: input.sessionID, + collapseSummaryTokens, + extractedTokens, + splitChain, + }) + return "stop" + } + + // For auto-compaction: return "continue" so the loop continues processing. + // - If parentID was a trigger (insertTriggers=true), it's now deleted and the loop + // will find the real user message and respond to it. + // - If parentID was the real user message (insertTriggers=false), the loop will + // continue with the updated context after compaction. + // For manual compaction: return "stop" - user explicitly requested compaction only. + + if (input.auto) { + return "continue" + } + return "stop" + } + + /** + * Estimate tokens for a message (respects compaction state) + */ + function estimateMessageTokens(msg: MessageV2.WithParts): number { + let tokens = 0 + for (const part of msg.parts) { + if (part.type === "text") { + tokens += Token.estimate(part.text) + } else if (part.type === "tool" && part.state.status === "completed") { + // Skip compacted tool outputs + if (part.state.time.compacted) continue + tokens += Token.estimate(JSON.stringify(part.state.input)) + tokens += Token.estimate(part.state.output) + } + } + return tokens + } + + /** + * Convert messages to markdown format for distillation + */ + function messagesToMarkdown(messages: MessageV2.WithParts[]): string { + const lines: string[] = [] + + for (const msg of messages) { + const role = msg.info.role === "user" ? "User" : "Assistant" + lines.push(`### ${role}`) + lines.push("") + + for (const part of msg.parts) { + if (part.type === "text" && part.text) { + // Skip synthetic parts like "Continue if you have next steps" + if (part.synthetic) continue + lines.push(part.text) + lines.push("") + } else if (part.type === "tool" && part.state.status === "completed") { + // Skip compacted tool outputs + if (part.state.time.compacted) continue + lines.push(`**Tool: ${part.tool}**`) + lines.push("```json") + lines.push(JSON.stringify(part.state.input, null, 2)) + lines.push("```") + if (part.state.output) { + lines.push("Output:") + lines.push("```") + lines.push(part.state.output.slice(0, 1000)) + if (part.state.output.length > 1000) lines.push("... (truncated)") + lines.push("```") + } + lines.push("") + } + } + } + + return lines.join("\n") + } + + /** + * Extract summary text from a compaction summary message's parts + */ + function extractSummaryText(msg: MessageV2.WithParts): string { + return msg.parts + .filter((p): p is MessageV2.TextPart => p.type === "text" && !p.synthetic) + .map((p) => p.text) + .join("\n") + } + + /** + * Fetch previous compaction summaries from the session. + * Only returns summaries that are true compaction breakpoint summaries + * (parent message has a compaction part), not sub-collapse summaries. + * Respects token budget to avoid overflowing context window. + */ + async function getPreviousSummaries(sessionID: string, limit: number, tokenBudget: number): Promise { + const allMessages = await Session.messages({ sessionID }) + + // Build a set of message IDs that have compaction parts (are breakpoints) + const breakpointMessageIds = new Set() + for (const msg of allMessages) { + if (msg.parts.some((p) => p.type === "compaction")) { + breakpointMessageIds.add(msg.info.id) + } + } + + log.debug("COLLAPSE getPreviousSummaries breakpoints found", { + sessionID, + breakpointCount: breakpointMessageIds.size, + breakpointIds: Array.from(breakpointMessageIds), + }) + + // Filter to assistant summaries whose parent is a compaction breakpoint + const summaryMessages = allMessages + .filter( + (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } => + m.info.role === "assistant" && + (m.info as MessageV2.Assistant).summary === true && + (m.info as MessageV2.Assistant).finish !== undefined && + // Parent must be a compaction breakpoint (has compaction part) + breakpointMessageIds.has((m.info as MessageV2.Assistant).parentID), + ) + .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first + .slice(-limit) // take the N most recent + + log.debug("COLLAPSE getPreviousSummaries filtered", { + sessionID, + totalMessages: allMessages.length, + summaryCount: summaryMessages.length, + summaryIds: summaryMessages.map((m) => m.info.id), + }) + + // Include summaries only if they fit within token budget + // Start from most recent (end of array) since those are most relevant + const result: string[] = [] + let tokensUsed = 0 + + for (let i = summaryMessages.length - 1; i >= 0; i--) { + const text = extractSummaryText(summaryMessages[i]) + if (!text.trim()) continue + + const estimate = Token.estimate(text) + if (tokensUsed + estimate > tokenBudget) break + + result.unshift(text) // prepend to maintain chronological order + tokensUsed += estimate + } + + return result + } + + // =========================================================================== + // FLOAT MODE: Sub-collapse implementation + // =========================================================================== + + /** + * Detect all chains in the message list. + * A chain is a user message followed by 2+ consecutive assistant messages + * that reference back to the user message via parentID. + * + * Single user + single assistant pairs are NOT considered chains (simple Q&A). + * Only groups with 2+ assistant messages are worth sub-collapsing. + */ + export function detectChains(messages: MessageV2.WithParts[]): ChainInfo[] { + const chains: ChainInfo[] = [] + let i = 0 + + while (i < messages.length) { + const msg = messages[i] + + // Look for user messages (start of potential chains) + if (msg.info.role === "user") { + // Skip compaction trigger messages + const isCompactionTrigger = msg.parts.some((p) => p.type === "compaction") + if (isCompactionTrigger) { + i++ + continue + } + + log.info("COLLAPSE detectChains chain start", { + userIdx: i, + userId: msg.info.id, + }) + + const chain: ChainInfo = { + userMessageIndex: i, + assistantMessageIndices: [], + allMessageIndices: [i], + chainTokens: estimateMessageTokens(msg), + userMessageId: msg.info.id, + } + + // Walk forward looking for assistant messages that belong to this chain. + // Track all user message IDs that are part of this chain so assistant messages + // parented to mid-run user interjections are still recognized as belonging here. + const chainUserIds = new Set([msg.info.id]) + + for (let j = i + 1; j < messages.length; j++) { + const next = messages[j] + if (next.info.role === "assistant") { + const nextInfo = next.info as MessageV2.Assistant + + // Skip messages that are already sub-collapse summaries (summary: true) or + // already soft-deleted (flux: "compacted"). These must not be included in + // assistantMessageIndices — the soft-delete loop in executeSubCollapse + // would otherwise re-mark already-processed summaries as flux="compacted" + // on every subsequent sub-collapse run. + if (nextInfo.summary || nextInfo.flux) { + // Still part of this chain's ID range (same parent) but should not be + // included in allMessageIndices for the sub-collapse scope — include + // only in the chain walk so we don't break the chain traversal. + const parentID = nextInfo.parentID + if ( + parentID && + (chainUserIds.has(parentID) || + chain.assistantMessageIndices.some((idx) => messages[idx].info.id === parentID)) + ) { + // Part of this chain but already processed — skip adding to indices + continue + } else { + break + } + } + + // Check if this assistant message belongs to the chain + // (has parentID pointing to any user message in the chain or previous assistant) + const parentID = nextInfo.parentID + + if ( + parentID && + (chainUserIds.has(parentID) || + chain.assistantMessageIndices.some((idx) => messages[idx].info.id === parentID)) + ) { + chain.assistantMessageIndices.push(j) + chain.allMessageIndices.push(j) + chain.chainTokens += estimateMessageTokens(next) + } else { + // Assistant message with different parent, not part of this chain + break + } + } else if (next.info.role === "user") { + // A compaction trigger user message ends the chain + if (next.parts.some((p) => p.type === "compaction")) break + + // Only treat as a mid-run user interjection if the immediately preceding + // message is an assistant still in a tool-calls sequence. If the prior + // message is a stop/end-turn assistant, a summary, or another user message, + // this is a new independent turn — end the chain. + const prev = messages[j - 1] + const prevInfo = prev?.info.role === "assistant" ? (prev.info as MessageV2.Assistant) : null + const isInterjection = !!prevInfo && prevInfo.finish === "tool-calls" + log.info("COLLAPSE detectChains user boundary", { + userIdx: j, + userId: next.info.id, + prevRole: prev?.info.role, + prevFinish: prevInfo?.finish, + isInterjection, + }) + if (!isInterjection) break + + chainUserIds.add(next.info.id) + chain.allMessageIndices.push(j) + chain.chainTokens += estimateMessageTokens(next) + } + } + + // Only count as a chain if there are 2+ assistant responses + // Single user + single assistant is just a simple Q&A, not a chain worth collapsing + log.info("COLLAPSE detectChains chain end", { + userIdx: i, + userId: chain.userMessageId, + assistants: chain.assistantMessageIndices.length, + valid: chain.assistantMessageIndices.length >= 2, + }) + if (chain.assistantMessageIndices.length >= 2) { + chains.push(chain) + } + + // Move past the chain (or single Q&A pair) + const lastIdx = + chain.assistantMessageIndices.length > 0 + ? chain.allMessageIndices[chain.allMessageIndices.length - 1] + 1 + : i + 1 + i = lastIdx + } else { + i++ + } + } + + return chains + } + + /** + * Check if float mode should trigger sub-collapse. + * Returns the oldest chain that should be sub-collapsed, or null if none. + */ + export async function shouldFloatSubCollapse( + messages: MessageV2.WithParts[], + sessionID: string, + ): Promise { + const config = await Config.get() + const floatConfig = config.compaction?.float + const chainThreshold = floatConfig?.chainThreshold ?? DEFAULTS.float.chainThreshold + + const chains = detectChains(messages) + + log.info("COLLAPSE float mode check", { + sessionID, + chainCount: chains.length, + chainThreshold, + shouldSubCollapse: chains.length > chainThreshold, + chains: chains.map((c, i) => ({ + index: i, + userIdx: c.userMessageIndex, + userId: c.userMessageId, + assistants: c.assistantMessageIndices.length, + firstAssistantIdx: c.assistantMessageIndices[0], + lastAssistantIdx: c.assistantMessageIndices[c.assistantMessageIndices.length - 1], + tokens: c.chainTokens, + })), + }) + + if (chains.length > chainThreshold) { + // Return the oldest chain (first in the list) for sub-collapse + const oldestChain = chains[0] + log.info("COLLAPSE float mode triggering sub-collapse on oldest chain", { + sessionID, + chainIndex: 0, + userMessageIndex: oldestChain.userMessageIndex, + userMessageId: oldestChain.userMessageId, + assistantCount: oldestChain.assistantMessageIndices.length, + chainTokens: oldestChain.chainTokens, + }) + return oldestChain + } + + return null + } + + /** + * Build the sub-collapse prompt for a specific chain. + * Uses the bookend algorithm approach from FluxCapacitor. + */ + function buildSubCollapsePrompt( + messages: MessageV2.WithParts[], + chain: ChainInfo, + previousSummaries: string[], + algorithm: SubCollapseAlgorithm, + tokenTarget: number, + knowledgePacks?: { name: string; text: string }[], + ): string { + // Get the user message + const userMsg = messages[chain.userMessageIndex] + const userContent = messagesToMarkdown([userMsg]) + + // Get the final assistant message (contains conclusions) + const lastAssistantIdx = chain.assistantMessageIndices[chain.assistantMessageIndices.length - 1] + const lastAssistantMsg = messages[lastAssistantIdx] + const finalAssistantText = extractTextOnly(lastAssistantMsg) + + // Gather tool outputs with timing + const toolOutputs = gatherToolOutputsForChain(messages, chain) + + const sections: string[] = [] + + // Template based on algorithm + if (algorithm === "bookend" || algorithm === "full") { + sections.push(`You are producing a settled, conflict-free record of what was accomplished in a multi-turn assistant work session. + +The assistant worked through a request over multiple turns -- reading files, running commands, writing code, debugging, making decisions, and sometimes changing direction when the user gave corrections. Your job is to produce the FINAL SETTLED STATE: what is true NOW, after all corrections and reversals have been applied. + +CRITICAL CONTEXT: After this extraction, the conversation will contain: +- The user's original message (preserved as-is, not deleted) +- Any earlier breakpoint summaries (preserved as-is, not deleted) +- YOUR OUTPUT (replaces all the assistant's multi-turn work) + +Because the user message and earlier summaries remain in the conversation, your output must NOT repeat or restate their content. That information is already there. Your output captures ONLY what the assistant uniquely produced. + +RESOLUTION RULE: If the work contains contradictions or reversals (the user corrected course, an approach was abandoned, a file was replaced), resolve them. Output only the final settled state as positive, direct statements. Do not mention what was tried and rejected. Do not include both sides of a reversal. If the bench script ended up as bench.py, state that -- do not also mention that bench was previously in the gb CLI. + +Target length: approximately ${tokenTarget} tokens`) + + if (previousSummaries.length > 0) { + sections.push(`## Earlier Summaries (REFERENCE ONLY -- this content is already preserved, do NOT repeat it) +${previousSummaries.join("\n\n---\n\n")}`) + } + + sections.push(`## User Request (REFERENCE ONLY -- this message is already preserved, do NOT repeat it) +${userContent}`) + + sections.push(`## Final Assistant Response +${finalAssistantText}`) + + sections.push(`## Work Timeline +${toolOutputs}`) + + const kpInstructions = + knowledgePacks && knowledgePacks.length > 0 + ? `\n\nKnowledge Packs (PERSISTENT -- always present in every conversation, never compacted): +${knowledgePacks.map((kp) => `- ${kp.name}`).join("\n")} +Do NOT include content already covered by these knowledge packs -- it will always be injected and wastes summary tokens. +EXCEPTION: If this chain explicitly overrides or contradicts a knowledge pack instruction, capture that override precisely -- name the pack and state what changed.` + : "" + + sections.push(`## Extraction Instructions + +From the work timeline and final response above, produce the final settled state under these headings (omit any heading with no content): + +1. **Final artifacts** -- code that was written or modified (verbatim in fenced blocks), files created, configurations applied. Show only the final version -- do not include earlier versions that were replaced. +2. **How things work now** -- the approach that is currently in place, tools and commands to use, standing patterns. State these as direct instructions ("use X", "run Y", "the script lives at Z"), not as a history of decisions. +3. **Non-obvious discoveries** -- error workarounds, environment-specific behaviors, API quirks, gotchas that would be painful to rediscover. +4. **Current state** -- what is complete, what is pending, what is broken. State each item as a direct fact. + +DISCARD everything else: +- Anything that was tried and then replaced or corrected -- only show the final result +- Debugging steps and their output (unless the finding is non-obvious and critical) +- File reads and exploration that informed decisions +- Anything already present in the user request or earlier summaries above +- Narration, history, or explanation of how the work evolved +- Any mention of approaches that were abandoned${kpInstructions} + +Write the extracted content directly, as factual statements. Not as a summary, not as a narrative, not as a response to the user. Just the settled, conflict-free record of what is true now.`) + } else { + // minimal algorithm + const kpMinimal = + knowledgePacks && knowledgePacks.length > 0 + ? `\nKnowledge packs always present (do NOT summarize their content): ${knowledgePacks.map((kp) => kp.name).join(", ")}. Exception: capture any explicit overrides to KP instructions.` + : "" + + sections.push(`Produce the final settled state of this assistant work session. + +If the work contains corrections or reversals, resolve them -- output only what is true now, as positive direct statements. Do not include both sides of any reversal. + +The user message and any earlier summaries remain in conversation context -- do NOT repeat them. + +Target length: approximately ${tokenTarget} tokens + +## User Request (REFERENCE ONLY -- already preserved) +${userContent} + +## Final Response +${finalAssistantText} + +## Extraction Instructions + +Extract only the final settled state: +1. **Final artifacts** -- code verbatim in fenced blocks, files created, configurations applied (final version only) +2. **How things work now** -- current approach, tools and commands to use, standing patterns (state as direct facts) +3. **Non-obvious discoveries** -- error workarounds, environment quirks, API behaviors that would be painful to rediscover +4. **Current state** -- what is complete, what is pending, what is broken + +DISCARD: anything tried and then replaced, intermediate work, debugging steps, file exploration, anything already in the user request, history of how decisions evolved.${kpMinimal} + +Write extracted content directly as factual statements. Settled, conflict-free, positive.`) + } + + return sections.join("\n\n") + } + + /** + * Extract only text content from an assistant message (no tool calls) + */ + function extractTextOnly(msg: MessageV2.WithParts): string { + const textParts: string[] = [] + for (const part of msg.parts) { + if (part.type === "text" && !part.synthetic && part.text) { + textParts.push(part.text) + } + } + return textParts.join("\n\n") + } + + /** + * Gather tool outputs for a chain with timing information + */ + function gatherToolOutputsForChain(messages: MessageV2.WithParts[], chain: ChainInfo): string { + const outputLines: string[] = [] + const userMsg = messages[chain.userMessageIndex] + const chainStartTime = userMsg.info.time.created + + let stepNumber = 0 + + for (const idx of chain.assistantMessageIndices) { + const msg = messages[idx] + + for (const part of msg.parts) { + if (part.type !== "tool") continue + if (part.state.status !== "completed") continue + if (part.state.time.compacted) continue + + stepNumber++ + const toolName = part.tool + const toolTime = part.state.time + + // Build timing info + let timingInfo = "" + if (toolTime.start) { + const relTime = formatRelativeTime(toolTime.start, chainStartTime) + if (toolTime.end) { + const duration = formatDuration(toolTime.start, toolTime.end) + timingInfo = ` [${relTime}, ${duration}]` + } else { + timingInfo = ` [${relTime}]` + } + } + + outputLines.push(`### Step ${stepNumber}: ${toolName}${timingInfo}`) + outputLines.push("") + + // Tool input + if (part.state.input) { + const input = JSON.stringify(part.state.input, null, 2) + const truncatedInput = input.length > 2000 ? input.slice(0, 2000) + "\n... (truncated)" : input + outputLines.push("**Parameters:**") + outputLines.push("```json") + outputLines.push(truncatedInput) + outputLines.push("```") + outputLines.push("") + } + + // Tool output + if (part.state.output) { + const output = part.state.output + const truncatedOutput = output.length > 3000 ? output.slice(0, 3000) + "\n... (truncated)" : output + outputLines.push("**Result:**") + outputLines.push("```") + outputLines.push(truncatedOutput) + outputLines.push("```") + outputLines.push("") + } + } + } + + return outputLines.join("\n") + } + + function formatRelativeTime(timestamp: number, chainStart: number): string { + const deltaMs = timestamp - chainStart + const deltaSec = Math.floor(deltaMs / 1000) + if (deltaSec < 60) return `+${deltaSec}s` + const deltaMin = Math.floor(deltaSec / 60) + const remainSec = deltaSec % 60 + return `+${deltaMin}m${remainSec}s` + } + + function formatDuration(startMs: number, endMs: number): string { + const durationMs = endMs - startMs + if (durationMs < 1000) return `${durationMs}ms` + const durationSec = (durationMs / 1000).toFixed(1) + return `${durationSec}s` + } + + /** + * Execute sub-collapse on a specific chain. + * This replaces the chain's assistant messages with a condensed summary. + */ + export interface SubCollapseResult { + status: "success" | "error" + /** The summary message ID that replaced the chain's assistant messages */ + summaryMessageId?: string + /** The user message ID at the start of the collapsed chain */ + chainUserMessageId?: string + /** Index of the last assistant message that was in the original chain */ + originalLastAssistantIndex?: number + /** Output tokens of the generated summary */ + summaryTokens?: number + } + + export async function executeSubCollapse(input: { + sessionID: string + messages: MessageV2.WithParts[] + chain: ChainInfo + abort: AbortSignal + }): Promise { + const config = await Config.get() + const floatConfig = config.compaction?.float + const algorithm = (floatConfig?.algorithm ?? DEFAULTS.float.algorithm) as SubCollapseAlgorithm + const summaryMaxTokens = floatConfig?.subCollapseSummaryMaxTokens ?? DEFAULTS.float.subCollapseSummaryMaxTokens + const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries + + log.info("COLLAPSE sub-collapse begin", { + sessionID: input.sessionID, + algorithm, + chain: { + userMessageIndex: input.chain.userMessageIndex, + userMessageId: input.chain.userMessageId, + assistantCount: input.chain.assistantMessageIndices.length, + firstAssistantIndex: input.chain.assistantMessageIndices[0], + lastAssistantIndex: input.chain.assistantMessageIndices[input.chain.assistantMessageIndices.length - 1], + tokens: input.chain.chainTokens, + range: `[${input.chain.userMessageIndex}..${input.chain.assistantMessageIndices[input.chain.assistantMessageIndices.length - 1]}]`, + }, + }) + + // Get the user message for model info + const userMsg = input.messages[input.chain.userMessageIndex] + const userInfo = userMsg.info as MessageV2.User + + // Get compaction agent and model + const agent = await Agent.get("compaction") + const model = agent.model + ? await Provider.getModel(agent.model.providerID, agent.model.modelID) + : await Provider.getModel(userInfo.model.providerID, userInfo.model.modelID) + + // Get previous summaries + const allSessionMessages = await Session.messages({ sessionID: input.sessionID }) + const previousSummaries = await getPreviousSummaries( + input.sessionID, + previousSummariesLimit, + model.limit.context - ProviderTransform.maxOutputTokens(model) - 50000, // Leave room for prompt + ) + + log.debug("COLLAPSE sub-collapse context", { + sessionID: input.sessionID, + previousSummariesCount: previousSummaries.length, + modelId: model.id, + }) + + // Load knowledge packs for compaction context + const knowledgePacks = await KnowledgePack.loadFromSession(input.sessionID) + + // Build the sub-collapse prompt + const prompt = buildSubCollapsePrompt( + input.messages, + input.chain, + previousSummaries, + algorithm, + summaryMaxTokens, + knowledgePacks, + ) + + log.debug("COLLAPSE sub-collapse prompt built", { + sessionID: input.sessionID, + promptLength: prompt.length, + promptTokensEstimate: Token.estimate(prompt), + }) + + // Create a new assistant message for the sub-collapse summary + // It should replace the chain's assistant messages + const lastAssistantIdx = input.chain.assistantMessageIndices[input.chain.assistantMessageIndices.length - 1] + const lastAssistantMsg = input.messages[lastAssistantIdx] + + // Use Identifier.insert to place the summary message right after the user message + // and before any subsequent content + const summaryMessageId = Identifier.insert(input.chain.userMessageId, lastAssistantMsg.info.id, "message") + + log.debug("COLLAPSE sub-collapse summary ID placement", { + sessionID: input.sessionID, + afterId: input.chain.userMessageId, + beforeId: lastAssistantMsg.info.id, + summaryMessageId, + lastAssistantIdx, + lastAssistantMsgId: lastAssistantMsg.info.id, + chainAssistantCount: input.chain.assistantMessageIndices.length, + idSortOrder: [input.chain.userMessageId, summaryMessageId, lastAssistantMsg.info.id].join(" < "), + }) + + const summaryMsg = (await Session.updateMessage({ + id: summaryMessageId, + role: "assistant", + parentID: input.chain.userMessageId, + sessionID: input.sessionID, + mode: "subcompaction", // Mark as sub-collapse (NOT "compaction" which creates breakpoint) + agent: "compaction", + // summary: true is required to prevent the prompt loop from re-triggering compaction. + // prompt.ts:530 checks `lastFinished.summary !== true` before calling isOverflow(). + // Without this flag, the loop sees the sub-collapse result as a normal assistant + // message, evaluates isOverflow() against its token counts, and immediately + // re-triggers compaction — causing the looping behavior. + // + // This does NOT create a compaction breakpoint. filterCompacted() only breaks on + // USER messages that have a `compaction` part (message-v2.ts:670). summary: true + // on an assistant message is purely a prompt-loop guard — it has no effect on + // filterCompacted's breakpoint detection. + summary: true, + path: { + cwd: Instance.directory, + root: Instance.worktree, + }, + cost: 0, + tokens: { + output: 0, + input: 0, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + modelID: model.id, + providerID: model.providerID, + time: { + created: userMsg.info.time.created + 1, // Right after user message + }, + })) as MessageV2.Assistant + + const processor = SessionProcessor.create({ + assistantMessage: summaryMsg, + sessionID: input.sessionID, + model, + abort: input.abort, + }) + + // Process the sub-collapse summary + await processor.process({ + user: userInfo, + agent, + abort: input.abort, + sessionID: input.sessionID, + tools: {}, + system: [], + messages: [ + { + role: "user", + content: [{ type: "text", text: prompt }], + }, + ], + model, + }) + + if (processor.message.error) { + log.error("COLLAPSE sub-collapse processor error, cleaning up placeholder", { + sessionID: input.sessionID, + error: processor.message.error, + summaryMessageId, + }) + // In SQLite, every message in the table is visible to stream() regardless + // of parent-child relationships. A failed placeholder mid-conversation with + // summary: true but no finish and zero tokens becomes a zombie that corrupts + // the session. Delete it so the original chain remains intact. + await Session.removeMessage({ + sessionID: input.sessionID, + messageID: summaryMessageId, + }) + return { status: "error" } + } + + log.info("COLLAPSE sub-collapse summary generated", { + sessionID: input.sessionID, + summaryTokens: processor.message.tokens.output, + summaryInputTokens: processor.message.tokens.input, + }) + + // Soft-delete the original assistant messages by marking them flux: "compacted". + // They remain in SQLite (queryable and restorable via fluxcapacitor) but are + // invisible to the LLM — toModelMessages skips any message with flux set. + for (const idx of input.chain.assistantMessageIndices) { + const msg = input.messages[idx] + const info = msg.info as MessageV2.Assistant + await Session.updateMessage({ + ...info, + flux: "compacted", + }) + } + + // Calculate token savings + const summaryTokens = processor.message.tokens.output + const tokensSaved = input.chain.chainTokens - summaryTokens + + log.info("COLLAPSE sub-collapse complete", { + sessionID: input.sessionID, + chain: { + range: `[${input.chain.userMessageIndex}..${input.chain.assistantMessageIndices[input.chain.assistantMessageIndices.length - 1]}]`, + userMessageId: input.chain.userMessageId, + assistantsDeleted: input.chain.assistantMessageIndices.length, + tokensBefore: input.chain.chainTokens, + }, + summary: { tokens: summaryTokens, messageId: summaryMessageId }, + tokensSaved, + }) + + // Publish event so TUI reloads messages + Bus.publish(SessionCompaction.Event.Compacted, { sessionID: input.sessionID }) + + return { + status: "success", + summaryMessageId: summaryMessageId, + chainUserMessageId: input.chain.userMessageId, + originalLastAssistantIndex: lastAssistantIdx, + summaryTokens, + } + } + + /** + * Float mode pre-check: Run before isOverflow to sub-collapse oldest chains. + * This is called from the main loop before evaluating token counts. + */ + export async function floatModePreCheck(input: { + sessionID: string + messages: MessageV2.WithParts[] + abort: AbortSignal + tokens: MessageV2.Assistant["tokens"] + contextLimit: number + }): Promise<{ subCollapsed: boolean; messages: MessageV2.WithParts[] }> { + const method = await getMethod() + + if (method !== "float") return { subCollapsed: false, messages: input.messages } + + const config = await Config.get() + const floatConfig = config.compaction?.float + const minFloat = floatConfig?.minFloat ?? DEFAULTS.float.minFloat + + // Log message analysis to debug filterCompacted behavior + const firstMsg = input.messages[0] + const lastMsg = input.messages[input.messages.length - 1] + + // Find any breakpoint markers in the messages we received + const breakpoints = input.messages + .map((m, idx) => ({ + idx, + id: m.info.id, + role: m.info.role, + hasCompactionPart: m.parts.some((p) => p.type === "compaction"), + })) + .filter((m) => m.hasCompactionPart) + + // Find any summary assistant messages + const summaries = input.messages + .map((m, idx) => ({ + idx, + id: m.info.id, + role: m.info.role, + summary: m.info.role === "assistant" ? (m.info as MessageV2.Assistant).summary : undefined, + finish: m.info.role === "assistant" ? (m.info as MessageV2.Assistant).finish : undefined, + })) + .filter((m) => m.summary === true) + + // Compute initial context usage fraction from actual token counts + const initialTokenCount = + input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output + const initialUsedFraction = input.contextLimit > 0 ? initialTokenCount / input.contextLimit : 0 + + log.info("COLLAPSE float mode begin", { + sessionID: input.sessionID, + messages: input.messages.length, + breakpoints: breakpoints.length, + summaries: summaries.length, + oldestMsgId: firstMsg?.info.id, + newestMsgId: lastMsg?.info.id, + minFloat, + initialTokenCount, + contextLimit: input.contextLimit, + initialUsedFraction: initialUsedFraction.toFixed(3), + minFloatCheck: initialUsedFraction >= minFloat ? "pass" : "skip", + }) + + // If context usage is below minFloat threshold, skip sub-collapse evaluation entirely + if (initialUsedFraction < minFloat) { + log.info("COLLAPSE float mode skipped: context usage below minFloat", { + sessionID: input.sessionID, + usedFraction: initialUsedFraction.toFixed(3), + minFloat, + }) + return { subCollapsed: false, messages: input.messages } + } + + // Collapse one chain at a time, re-checking minFloat after each. + // Returns the final message list after all collapses, or null if none occurred. + async function collapseNext( + messages: MessageV2.WithParts[], + tokenCount: number, + ): Promise { + const used = input.contextLimit > 0 ? tokenCount / input.contextLimit : 0 + if (used < minFloat) { + log.info("COLLAPSE float mode stopping: context usage dropped below minFloat", { + sessionID: input.sessionID, + tokenCount, + contextLimit: input.contextLimit, + used: used.toFixed(3), + minFloat, + }) + return null + } + + const chain = await shouldFloatSubCollapse(messages, input.sessionID) + if (!chain) return null + + const result = await executeSubCollapse({ + sessionID: input.sessionID, + messages, + chain, + abort: input.abort, + }) + + if (result.status === "error") { + log.error("COLLAPSE float mode sub-collapse failed") + return null + } + + const summaryTokens = result.summaryTokens ?? 0 + const nextTokenCount = tokenCount - chain.chainTokens + summaryTokens + + // Mirror what process() does at lines 839-888: find the chronologically last + // real assistant message (excluding the new sub-collapse summary) and patch its + // stored token counts to reflect the reduction. Without this, lastFinished.tokens + // in the prompt loop still holds pre-collapse values from the database, so the + // minFloat gate in the next collapseNext iteration (and isOverflow on the next + // loop pass) would see stale high token counts and never stop collapsing. + const allMessages = await Session.messages({ sessionID: input.sessionID }) + const lastReal = allMessages + .filter( + (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } => + m.info.role === "assistant" && + m.info.id !== result.summaryMessageId && + (m.info as MessageV2.Assistant).finish !== undefined, + ) + .sort((a, b) => b.info.time.created - a.info.time.created)[0] + + if (lastReal) { + const currentTotal = + lastReal.info.tokens.input + + lastReal.info.tokens.cache.read + + lastReal.info.tokens.cache.write + + lastReal.info.tokens.output + const newTotal = Math.max(0, currentTotal - chain.chainTokens + summaryTokens) + lastReal.info.tokens = { + input: 0, + output: lastReal.info.tokens.output, + reasoning: lastReal.info.tokens.reasoning, + cache: { + read: Math.max(0, newTotal - lastReal.info.tokens.output), + write: 0, + }, + } + await Session.updateMessage(lastReal.info) + log.info("COLLAPSE float mode token adjustment", { + sessionID: input.sessionID, + lastRealId: lastReal.info.id, + chainTokensRemoved: chain.chainTokens, + summaryTokensAdded: summaryTokens, + previousTotal: currentTotal, + newTotal, + nextTokenCount, + usedFractionAfter: input.contextLimit > 0 ? (nextTokenCount / input.contextLimit).toFixed(3) : "n/a", + minFloat, + }) + } + + // Reload messages so chain detection sees the updated conversation state. + const next = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + + return (await collapseNext(next, nextTokenCount)) ?? next + } + + const final = await collapseNext(input.messages, initialTokenCount) + if (!final) return { subCollapsed: false, messages: input.messages } + + log.info("COLLAPSE float mode complete", { + sessionID: input.sessionID, + subCollapsed: true, + messages: final.length, + }) + + // Return subCollapsed: true to signal the main loop should reload and re-filter messages + return { subCollapsed: true, messages: final } + } +} diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 79884d641ea0..da641fb02cbd 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -13,6 +13,7 @@ import { fn } from "@/util/fn" import { Agent } from "@/agent/agent" import { Plugin } from "@/plugin" import { Config } from "@/config/config" +import { CompactionExtension } from "./compaction-extension" import { ProviderTransform } from "@/provider/transform" export namespace SessionCompaction { @@ -30,6 +31,13 @@ export namespace SessionCompaction { const COMPACTION_BUFFER = 20_000 export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { + // Use collapse/float overflow check if method is collapse or float (uses configurable trigger) + const method = await CompactionExtension.getMethod() + if (method === "collapse" || method === "float") { + return CompactionExtension.isOverflow(input) + } + + // Standard overflow check const config = await Config.get() if (config.compaction?.auto === false) return false const context = input.model.limit.context @@ -105,7 +113,45 @@ export namespace SessionCompaction { abort: AbortSignal auto: boolean overflow?: boolean - }) { + }): Promise<"continue" | "stop"> { + // Route to collapse/float compaction if configured + const method = await CompactionExtension.getMethod() + log.info("COLLAPSE compacting", { method, sessionID: input.sessionID }) + + // For float mode, we use the collapse compaction but with prior sub-collapse + // The sub-collapse is handled in prompt.ts before isOverflow is called + if (method === "collapse" || method === "float") { + const result = await CompactionExtension.process(input) + Bus.publish(Event.Compacted, { sessionID: input.sessionID }) + // For overflow-triggered compaction in collapse/float mode, inject the + // overflow explanation message so the user knows their media was too large. + if (result === "continue" && input.auto && input.overflow) { + const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User + const continueMsg = await Session.updateMessage({ + id: Identifier.ascending("message"), + role: "user", + sessionID: input.sessionID, + time: { created: Date.now() }, + agent: userMessage.agent, + model: userMessage.model, + }) + await Session.updatePart({ + id: Identifier.ascending("part"), + messageID: continueMsg.id, + sessionID: input.sessionID, + type: "text", + synthetic: true, + text: "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context. If the user was asking about attached images or files, explain that the attachments were too large to process and suggest they try again with smaller or fewer files.\n\nContinue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.", + time: { + start: Date.now(), + end: Date.now(), + }, + }) + } + return result + } + + // Standard compaction const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User let messages = input.messages diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index b117632051f7..f3109ef1f3ad 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -743,6 +743,109 @@ export namespace Session { }, ) + /** + * Copy a user message to a new position between afterId and beforeId. + * + * Used when a mid-chain compaction split leaves orphaned assistant messages: + * a duplicate of the original chain's user message is inserted just before + * the orphaned tail so that detectChains can find it as a proper chain start. + * + * All parts from the source message are copied with new IDs that sort between + * the same anchors. + */ + export const copyUserMessage = fn( + z.object({ + sessionID: Identifier.schema("session"), + source: MessageV2.WithParts, + afterId: z.string(), + beforeId: z.string().optional(), + }), + async (input) => { + const { messageID, partID: firstPartID } = Identifier.insertCopy(input.afterId, input.beforeId) + + const newInfo: MessageV2.User = { + ...(input.source.info as MessageV2.User), + id: messageID, + sessionID: input.sessionID, + time: { created: Identifier.timestamp(messageID) }, + } + + await updateMessage(newInfo) + + // Copy all parts with new IDs sorted after the new message ID + let prevPartId = messageID + for (let i = 0; i < input.source.parts.length; i++) { + const srcPart = input.source.parts[i] + const newPartId = i === 0 ? firstPartID : Identifier.insert(prevPartId, input.beforeId, "part") + const newPart: MessageV2.Part = { + ...srcPart, + id: newPartId, + messageID, + sessionID: input.sessionID, + } + await updatePart(newPart) + prevPartId = newPartId + } + + log.info("COLLAPSE copyUserMessage inserted duplicate chain anchor", { + sessionID: input.sessionID, + sourceId: input.source.info.id, + newId: messageID, + afterId: input.afterId, + beforeId: input.beforeId ?? "(none)", + partsCopied: input.source.parts.length, + }) + + return messageID + }, + ) + + /** + * Re-parent a chain of orphaned assistant messages to a new parent. + * + * When a mid-chain compaction split is performed, assistant messages that + * were children of a now-extracted user message need to be re-parented to + * a new duplicate user message. This updates the parentID field on all + * assistant messages in the session that point to oldParentID and were + * created after afterTimestamp. + */ + export const reparentChain = fn( + z.object({ + sessionID: Identifier.schema("session"), + oldParentID: z.string(), + newParentID: z.string(), + afterTimestamp: z.number(), + }), + async (input) => { + // Load all messages in session and find orphaned ones matching criteria + const msgs = await messages({ sessionID: input.sessionID }) + const orphans = msgs.filter( + (m) => + m.info.role === "assistant" && + (m.info as MessageV2.Assistant).parentID === input.oldParentID && + m.info.time.created > input.afterTimestamp, + ) + + for (const orphan of orphans) { + const updated: MessageV2.Assistant = { + ...(orphan.info as MessageV2.Assistant), + parentID: input.newParentID, + } + await updateMessage(updated) + } + + log.info("COLLAPSE reparentChain updated orphaned messages", { + sessionID: input.sessionID, + oldParentID: input.oldParentID, + newParentID: input.newParentID, + afterTimestamp: input.afterTimestamp, + count: orphans.length, + }) + + return orphans.length + }, + ) + const UpdatePartInput = MessageV2.Part export const updatePart = fn(UpdatePartInput, async (part) => { diff --git a/packages/opencode/src/session/knowledge-pack.ts b/packages/opencode/src/session/knowledge-pack.ts new file mode 100644 index 000000000000..26afb8511093 --- /dev/null +++ b/packages/opencode/src/session/knowledge-pack.ts @@ -0,0 +1,381 @@ +import fs from "fs/promises" +import path from "path" +import { Global } from "@/global" +import { Identifier } from "@/id/id" +import { Log } from "@/util/log" +import { Session } from "./index" +import { MessageV2 } from "./message-v2" + +const log = Log.create({ service: "knowledge-pack" }) + +const KP_AGENT_PREFIX = "kp:" + +function agentKey(name: string, version: string) { + return KP_AGENT_PREFIX + name + "@" + version +} + +type KPFile = { + name: string + version: string + display_name?: string + content: string + // Optional per-agent system prompt overrides. Keys are agent names (e.g. "explore"), + // values are prompt strings. When a pack with agent overrides is active in a session, + // the matching agent will use the KP-supplied prompt instead of its built-in prompt. + agent?: Record + [key: string]: unknown +} + +export namespace KnowledgePack { + export type Pack = { + name: string + displayName?: string + version: string + content: string + file: string + // Per-agent system prompt overrides parsed from the YAML `agent` field. + agent?: Record + } + + /** + * Render the full text stored in the session message for a knowledge pack. + * Wraps the content with a clear header so the LLM always knows: + * - this is a persistent knowledge pack (always present, never compacted away) + * - the pack name and version (for precise override references in compaction) + * - the raw content follows immediately after the header + */ + export function render(pack: Pick): string { + const label = pack.displayName ?? pack.name + const version = pack.version ? ` v${pack.version}` : "" + return `[KNOWLEDGE PACK: ${label}${version} | id: ${pack.name} | persistent: always injected, never compacted] + +${pack.content} + +--- +` + } + + /** + * Load all knowledge pack messages from a session (flux:"knowledge" messages). + * Returns their rendered text for use in compaction prompts. + */ + export async function loadFromSession(sessionID: string): Promise<{ name: string; text: string }[]> { + const msgs = await Session.messages({ sessionID }) + const result: { name: string; text: string }[] = [] + for (const msg of msgs) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux !== "knowledge") continue + const name = user.agent.startsWith(KP_AGENT_PREFIX) ? user.agent.slice(KP_AGENT_PREFIX.length) : user.agent + const textPart = msg.parts.find((p) => p.type === "text") as MessageV2.TextPart | undefined + if (textPart?.text) result.push({ name, text: textPart.text }) + } + return result + } + + /** + * Load all knowledge pack messages from a session as full WithParts objects. + * Used by prompt.ts to prepend KP messages to sessionMessages before toModelMessages, + * bypassing filterCompacted which stops at the compaction breakpoint before reaching + * KP messages (which have time_created=1,2,...). + */ + export async function fromSession(sessionID: string): Promise { + const msgs = await Session.messages({ sessionID }) + return msgs.filter((msg) => { + if (msg.info.role !== "user") return false + const user = msg.info as MessageV2.User + return user.flux === "knowledge" + }) + } + + /** + * Return a merged map of agent-name → prompt string from all knowledge packs + * currently active in the session that declare an `agent..prompt` field. + * + * Later packs in the list win over earlier ones if multiple packs override the + * same agent. The result is used in prompt.ts to override agent.prompt at + * runtime without mutating the global Agent registry. + */ + export async function agentPrompts(sessionID: string): Promise> { + // Collect the agent keys of packs active in this session + const msgs = await Session.messages({ sessionID }) + const activeKeys = new Set() + for (const msg of msgs) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux !== "knowledge") continue + if (user.agent.startsWith(KP_AGENT_PREFIX)) activeKeys.add(user.agent.slice(KP_AGENT_PREFIX.length)) + } + if (activeKeys.size === 0) return {} + + // Load all pack files from both dirs so we can read their agent overrides + const packs = await load([defaultDir(), libraryDir()]) + const result: Record = {} + for (const pack of packs) { + if (!activeKeys.has(`${pack.name}@${pack.version}`)) continue + if (!pack.agent) continue + for (const [agentName, overrides] of Object.entries(pack.agent)) { + if (overrides.prompt) result[agentName] = overrides.prompt + } + } + return result + } + + async function load(dirs: string[]): Promise { + const packs: Pack[] = [] + for (const dir of dirs) { + let entries: string[] + try { + entries = await fs.readdir(dir) + } catch { + continue + } + for (const entry of entries.sort()) { + if (!entry.endsWith(".yaml") && !entry.endsWith(".yml")) continue + const file = path.join(dir, entry) + try { + const kp = Bun.YAML.parse(await Bun.file(file).text()) as KPFile + if (!kp.content) { + log.debug("knowledge pack has no content field, skipping", { file }) + continue + } + packs.push({ + name: kp.name ?? entry.replace(/\.ya?ml$/, ""), + displayName: kp.display_name, + version: kp.version, + content: kp.content.trimEnd(), + file, + agent: kp.agent, + }) + } catch (e) { + log.warn("failed to read knowledge pack", { file, error: e }) + } + } + } + return packs + } + + /** + * Ensure knowledge packs exist as flux:knowledge user messages at the very + * beginning of the session (time.created = i+1 so they sort before all real + * messages). Idempotent: existing packs matched by agent name are skipped or + * updated if content changed. + */ + export async function inject(input: { sessionID: string; dirs: string[] }) { + const packs = await load(input.dirs) + if (packs.length === 0) return + + const existing = await Session.messages({ sessionID: input.sessionID }) + const existingByName = new Map() + for (const msg of existing) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux !== "knowledge") continue + if (user.agent.startsWith(KP_AGENT_PREFIX)) existingByName.set(user.agent.slice(KP_AGENT_PREFIX.length), msg) + } + + for (let i = 0; i < packs.length; i++) { + const pack = packs[i] + const key = agentKey(pack.name, pack.version) + const found = existingByName.get(key.slice(KP_AGENT_PREFIX.length)) + const rendered = render(pack) + + if (found) { + const textPart = found.parts.find((p) => p.type === "text") as MessageV2.TextPart | undefined + if (textPart?.text === rendered) { + log.debug("knowledge pack already injected, skipping", { name: pack.name }) + continue + } + await Session.updatePart({ ...textPart!, text: rendered }) + log.info("knowledge pack content updated", { name: pack.name, sessionID: input.sessionID }) + continue + } + + const msgId = Identifier.create("message", false, i + 1) + const partId = Identifier.create("part", false, i + 1) + + await Session.updateMessage({ + id: msgId, + sessionID: input.sessionID, + role: "user", + flux: "knowledge", + time: { created: i + 1 }, + agent: key, + model: { + providerID: "flux", + modelID: "knowledge-pack", + name: pack.displayName ?? pack.name, + version: pack.version, + }, + } as MessageV2.User) + + await Session.updatePart({ + id: partId, + messageID: msgId, + sessionID: input.sessionID, + type: "text", + text: rendered, + } as MessageV2.TextPart) + + log.info("knowledge pack injected", { name: pack.name, sessionID: input.sessionID }) + } + } + + export function defaultDir(): string { + return path.join(Global.Path.config, "kp") + } + + /** + * The directory scanned for available knowledge packs in the sidebar. + * Named `llm_knowledge_packs` inside the opencode config dir. + */ + export function libraryDir(): string { + return path.join(Global.Path.config, "llm_knowledge_packs") + } + + /** + * List all knowledge packs available in the library directory. + * Returns Pack objects without injecting them into any session. + */ + export async function available(): Promise { + return load([libraryDir()]) + } + + /** + * Inject a single knowledge pack by name into a session. + * Finds the pack in the library directory and injects it. + * If already injected and content matches, does nothing. + */ + export async function add(input: { sessionID: string; name: string; version: string }): Promise { + const packs = await available() + const pack = packs.find((p) => p.name === input.name && p.version === input.version) + if (!pack) throw new Error(`Knowledge pack not found: ${input.name}@${input.version}`) + + const key = agentKey(pack.name, pack.version) + const existing = await Session.messages({ sessionID: input.sessionID }) + let existingMsg: MessageV2.WithParts | undefined + let count = 0 + for (const msg of existing) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux !== "knowledge") continue + count++ + if (user.agent === key) existingMsg = msg + } + + const rendered = render(pack) + + if (existingMsg) { + const textPart = existingMsg.parts.find((p) => p.type === "text") as MessageV2.TextPart | undefined + if (textPart?.text === rendered) return + await Session.updatePart({ ...textPart!, text: rendered }) + log.info("knowledge pack content updated", { name: input.name, sessionID: input.sessionID }) + return + } + + const idx = count + 1 + const msgId = Identifier.create("message", false, idx) + const partId = Identifier.create("part", false, idx) + + await Session.updateMessage({ + id: msgId, + sessionID: input.sessionID, + role: "user", + flux: "knowledge", + time: { created: idx }, + agent: key, + model: { + providerID: "flux", + modelID: "knowledge-pack", + name: pack.displayName ?? pack.name, + version: pack.version, + }, + } as MessageV2.User) + + await Session.updatePart({ + id: partId, + messageID: msgId, + sessionID: input.sessionID, + type: "text", + text: rendered, + } as MessageV2.TextPart) + + log.info("knowledge pack added", { name: input.name, sessionID: input.sessionID }) + } + + /** + * Copy all active knowledge pack messages from a parent session into a child session. + * Used when a subagent (Task tool) creates a child session so it inherits the parent's + * manually-enabled knowledge packs. Idempotent: packs already present in the child + * are skipped (matched by agent key). + */ + export async function copyFromParent(input: { parentSessionID: string; sessionID: string }): Promise { + const parentKPs = await fromSession(input.parentSessionID) + if (parentKPs.length === 0) return + + const childMsgs = await Session.messages({ sessionID: input.sessionID }) + const childKeys = new Set() + for (const msg of childMsgs) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux === "knowledge") childKeys.add(user.agent) + } + + const toAdd = parentKPs.filter((msg) => { + const user = msg.info as MessageV2.User + return !childKeys.has(user.agent) + }) + if (toAdd.length === 0) return + + const offset = childKeys.size + for (let i = 0; i < toAdd.length; i++) { + const src = toAdd[i] + const user = src.info as MessageV2.User + const textPart = src.parts.find((p) => p.type === "text") as MessageV2.TextPart | undefined + if (!textPart?.text) continue + + const idx = offset + i + 1 + const msgId = Identifier.create("message", false, idx) + const partId = Identifier.create("part", false, idx) + + await Session.updateMessage({ + id: msgId, + sessionID: input.sessionID, + role: "user", + flux: "knowledge", + time: { created: idx }, + agent: user.agent, + model: user.model, + } as MessageV2.User) + + await Session.updatePart({ + id: partId, + messageID: msgId, + sessionID: input.sessionID, + type: "text", + text: textPart.text, + } as MessageV2.TextPart) + + log.info("knowledge pack copied from parent", { agent: user.agent, sessionID: input.sessionID }) + } + } + + /** + * Remove a knowledge pack from a session by name. + * Deletes the flux:knowledge message (CASCADE removes its parts). + */ + export async function remove(input: { sessionID: string; name: string; version: string }): Promise { + const key = agentKey(input.name, input.version) + const msgs = await Session.messages({ sessionID: input.sessionID }) + for (const msg of msgs) { + if (msg.info.role !== "user") continue + const user = msg.info as MessageV2.User + if (user.flux !== "knowledge") continue + if (user.agent !== key) continue + await Session.removeMessage({ sessionID: input.sessionID, messageID: msg.info.id }) + log.info("knowledge pack removed", { name: input.name, sessionID: input.sessionID }) + return + } + throw new Error(`Knowledge pack not active in session: ${input.name}@${input.version}`) + } +} diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 5b4e7bdbc044..bdcacbf3804e 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -14,6 +14,7 @@ import { Storage } from "@/storage/storage" import { ProviderError } from "@/provider/error" import { iife } from "@/util/iife" import { type SystemError } from "bun" +import { Log } from "../util/log" import type { Provider } from "@/provider/provider" export namespace MessageV2 { @@ -21,6 +22,8 @@ export namespace MessageV2 { return mime.startsWith("image/") || mime === "application/pdf" } + const log = Log.create({ service: "message-v2" }) + export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({})) export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() })) export const StructuredOutputError = NamedError.create( @@ -368,6 +371,7 @@ export namespace MessageV2 { system: z.string().optional(), tools: z.record(z.string(), z.boolean()).optional(), variant: z.string().optional(), + flux: z.string().optional(), }).meta({ ref: "UserMessage", }) @@ -437,6 +441,7 @@ export namespace MessageV2 { structured: z.any().optional(), variant: z.string().optional(), finish: z.string().optional(), + flux: z.string().optional(), }).meta({ ref: "AssistantMessage", }) @@ -554,8 +559,51 @@ export namespace MessageV2 { return { type: "json", value: output as never } } + // Prepend knowledge pack messages as the first user messages the LLM sees. + // They are stored with flux:"knowledge" and would otherwise be skipped below. + // KP messages are loaded separately by prompt.ts (fromSession) and prepended + // to sessionMessages before this call, since filterCompacted stops at the + // compaction breakpoint before reaching KP messages (time_created=1,2,...). + const kpCount = input.filter((m) => m.info.flux === "knowledge").length + log.debug("KNOWLEDGE PACK toModelMessages", { + totalInput: input.length, + kpMessages: kpCount, + kpIds: input.filter((m) => m.info.flux === "knowledge").map((m) => m.info.id), + }) + let kpPushed = 0 + for (const msg of input) { + if (msg.info.flux !== "knowledge") continue + if (msg.parts.length === 0) continue + const textParts = msg.parts.filter((p) => p.type === "text") as TextPart[] + if (textParts.length === 0) continue + log.debug("KNOWLEDGE PACK prepending to model messages", { + id: msg.info.id, + agent: (msg.info as User).agent, + textLen: textParts[0]?.text?.length ?? 0, + }) + result.push({ + id: msg.info.id, + role: "user", + parts: textParts.map((p) => ({ type: "text" as const, text: p.text })), + }) + kpPushed++ + } + // Append a user message delimiter after all knowledge pack messages. + // Because providers like Anthropic merge consecutive user messages into one, + // all KP text parts land in a single user message. This marker signals the + // start of real user content, making the boundary between injected KP content + // and the first real user message unambiguous, regardless of model or tokenizer. + if (kpPushed > 0) { + result.push({ + id: Identifier.ascending("message"), + role: "user", + parts: [{ type: "text" as const, text: "=== USER MESSAGE ===\n" }], + }) + } + for (const msg of input) { if (msg.parts.length === 0) continue + if (msg.info.flux) continue if (msg.info.role === "user") { const userMessage: UIMessage = { @@ -809,18 +857,54 @@ export namespace MessageV2 { export async function filterCompacted(stream: AsyncIterable) { const result = [] as MessageV2.WithParts[] const completed = new Set() + for await (const msg of stream) { + // Knowledge pack messages (flux:"knowledge") are always prepended explicitly by + // prompt.ts via KnowledgePack.fromSession(). Never include them here — doing so + // causes duplicates in the [...kpMsgs, ...msgs] merge at prompt.ts:704. + if ((msg.info as User).flux === "knowledge") continue + + const hasCompactionPart = msg.parts.some((part) => part.type === "compaction") + // Recognize assistant summary for breakpoint detection - finish is not required + // (collapse compaction may not set finish, but summary: true is sufficient) + const isAssistantSummary = msg.info.role === "assistant" && (msg.info as Assistant).summary === true + result.push(msg) - if ( - msg.info.role === "user" && - completed.has(msg.info.id) && - msg.parts.some((part) => part.type === "compaction") - ) - break - if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish && !msg.info.error) - completed.add(msg.info.parentID) + + // Debug: log potential breakpoint candidates + // Upstream guard: do not mark errored summaries as completed breakpoints. + // Collapse compaction may not set finish, but summary: true is sufficient; + // however an errored summary must not be treated as a valid breakpoint. + if (isAssistantSummary && !msg.info.error) { + const parentID = (msg.info as Assistant).parentID + log.debug("COLLAPSE filterCompacted found summary", { + msgId: msg.info.id, + parentID, + completedBefore: Array.from(completed), + }) + completed.add(parentID) + } + + // Check if this is a compaction breakpoint + if (msg.info.role === "user" && hasCompactionPart) { + log.debug("COLLAPSE filterCompacted user with compaction part", { + msgId: msg.info.id, + inCompleted: completed.has(msg.info.id), + completedSet: Array.from(completed), + }) + if (completed.has(msg.info.id)) { + log.debug("COLLAPSE filterCompacted BREAKPOINT", { id: msg.info.id }) + break + } + } } + result.reverse() + log.debug("COLLAPSE filterCompacted result", { + count: result.length, + firstId: result[0]?.info.id, + lastId: result[result.length - 1]?.info.id, + }) return result } diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 4f77920cc987..b64dc0502f34 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -12,6 +12,7 @@ import { Agent } from "../agent/agent" import { Provider } from "../provider/provider" import { type Tool as AITool, tool, jsonSchema, type ToolCallOptions, asSchema } from "ai" import { SessionCompaction } from "./compaction" +import { Config } from "../config/config" import { Instance } from "../project/instance" import { Bus } from "../bus" import { ProviderTransform } from "../provider/transform" @@ -22,6 +23,7 @@ import PROMPT_PLAN from "../session/prompt/plan.txt" import BUILD_SWITCH from "../session/prompt/build-switch.txt" import MAX_STEPS from "../session/prompt/max-steps.txt" import { defer } from "../util/defer" +import { clone } from "remeda" import { ToolRegistry } from "../tool/registry" import { MCP } from "../mcp" import { LSP } from "../lsp" @@ -45,6 +47,7 @@ import { LLM } from "./llm" import { iife } from "@/util/iife" import { Shell } from "@/shell/shell" import { Truncate } from "@/tool/truncation" +import { KnowledgePack } from "./knowledge-pack" // @ts-ignore globalThis.AI_SDK_LOG_WARNINGS = false @@ -291,6 +294,31 @@ export namespace SessionPrompt { let step = 0 const session = await Session.get(sessionID) + + // Inject knowledge packs as flux:knowledge messages at the beginning of the session + const cfg = await Config.get() + if (cfg.knowledge?.enabled !== false) { + const dirs = [KnowledgePack.defaultDir(), ...(cfg.knowledge?.paths ?? [])] + await KnowledgePack.inject({ sessionID, dirs }) + } + + // Auto-enable any knowledge packs declared in config with enabled: true + const configPacks = cfg.knowledge?.packs?.filter((p) => p.enabled) ?? [] + if (configPacks.length > 0) { + const active = await KnowledgePack.fromSession(sessionID) + const activeKeys = new Set(active.map((msg) => (msg.info as MessageV2.User).agent)) + await Promise.all( + configPacks + .filter((p) => !activeKeys.has(`kp:${p.name}@${p.version}`)) + .map((p) => KnowledgePack.add({ sessionID, name: p.name, version: p.version })), + ) + } + + // Propagate manually-enabled knowledge packs from parent session into this subagent session + if (session.parentID) { + await KnowledgePack.copyFromParent({ parentSessionID: session.parentID, sessionID }) + } + while (true) { SessionStatus.set(sessionID, { type: "busy" }) log.info("loop", { step, sessionID }) @@ -320,6 +348,26 @@ export namespace SessionPrompt { !["tool-calls", "unknown"].includes(lastAssistant.finish) && lastUser.id < lastAssistant.id ) { + // Run float pre-check before exiting so sub-collapse fires on complete chains (stop finish) + // even when we are not about to make another LLM call. + if (lastFinished && lastFinished.summary !== true) { + const { CompactionExtension } = await import("./compaction-extension") + const method = await CompactionExtension.getMethod() + if (method === "float") { + const stopModel = await Provider.getModel(lastUser.model.providerID, lastUser.model.modelID).catch( + () => null, + ) + if (stopModel) { + await CompactionExtension.floatModePreCheck({ + sessionID, + messages: msgs, + abort, + tokens: lastFinished.tokens, + contextLimit: stopModel.limit.context, + }) + } + } + } log.info("exiting loop", { sessionID }) break } @@ -539,23 +587,76 @@ export namespace SessionPrompt { continue } + // Float mode pre-check: sub-collapse oldest chains before evaluating overflow + // This runs before isOverflow to reduce token count via high-fidelity chain summaries + const { CompactionExtension } = await import("./compaction-extension") + const method = await CompactionExtension.getMethod() + log.info("COLLAPSE prompt float check", { + sessionID, + method, + hasLastFinished: !!lastFinished, + lastFinishedSummary: lastFinished?.summary, + willRunPreCheck: method === "float" && lastFinished && lastFinished.summary !== true, + }) + if (method === "float" && lastFinished && lastFinished.summary !== true) { + const floatResult = await CompactionExtension.floatModePreCheck({ + sessionID, + messages: msgs, + abort, + tokens: lastFinished.tokens, + contextLimit: model.limit.context, + }) + if (floatResult.subCollapsed) { + // Reload and re-filter messages after sub-collapse, then continue loop + // This ensures proper filtering is applied via filterCompacted() + log.info("COLLAPSE float mode sub-collapsed, restarting loop iteration", { sessionID }) + continue + } + } + // context overflow, needs compaction + const config = await Config.get() if ( lastFinished && lastFinished.summary !== true && (await SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model })) ) { - await SessionCompaction.create({ - sessionID, - agent: lastUser.agent, - model: lastUser.model, - auto: true, - }) - continue + const insertTriggers = config.compaction?.insertTriggers ?? method === "standard" + + if (insertTriggers) { + // Standard compaction: create trigger message, loop will process it + await SessionCompaction.create({ + sessionID, + agent: lastUser.agent, + model: lastUser.model, + auto: true, + }) + continue + } else { + // Collapse/Float compaction: directly call process without trigger + const result = await SessionCompaction.process({ + messages: msgs, + parentID: lastUser.id, + abort, + sessionID, + auto: true, + }) + if (result === "stop") break + continue + } } // normal processing - const agent = await Agent.get(lastUser.agent) + const agentBase = await Agent.get(lastUser.agent) + // Apply any agent prompt overrides from active knowledge packs. + // If a KP in this session declares `agent..prompt`, it replaces + // the agent's built-in system prompt for this loop iteration only — the + // global Agent registry is never mutated. + const kpAgentPrompts = await KnowledgePack.agentPrompts(sessionID) + const agent = + kpAgentPrompts[agentBase.name] !== undefined + ? { ...agentBase, prompt: kpAgentPrompts[agentBase.name] } + : agentBase const maxSteps = agent.steps ?? Infinity const isLastStep = step >= maxSteps msgs = await insertReminders({ @@ -627,6 +728,26 @@ export namespace SessionPrompt { }) } + // Load knowledge pack messages separately — they sit at time_created=1,2,... + // which is BEFORE any compaction breakpoint, so filterCompacted never returns them. + // We must load them from the full unfiltered message list and prepend explicitly. + const kpMsgs = await KnowledgePack.fromSession(sessionID) + log.debug("KNOWLEDGE PACK session messages", { + sessionID, + count: kpMsgs.length, + ids: kpMsgs.map((m: MessageV2.WithParts) => m.info.id), + names: kpMsgs.map((m: MessageV2.WithParts) => (m.info as MessageV2.User).agent), + }) + + // Prepend knowledge-pack messages into the working array so they flow + // through the plugin transform hook and into toModelMessages. + // kpMsgs sit at time_created=1,2,... which is before any compaction + // breakpoint, so filterCompacted never returns them — we must inject + // them explicitly here. + if (kpMsgs.length > 0) { + msgs.unshift(...clone(kpMsgs)) + } + // Ephemerally wrap queued user messages with a reminder to stay on track if (step > 1 && lastFinished) { for (const msg of msgs) { diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index 22dcfec3553a..5c7f23222172 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -122,6 +122,14 @@ import type { SessionGetResponses, SessionInitErrors, SessionInitResponses, + SessionKnowledgePackAddErrors, + SessionKnowledgePackAddResponses, + SessionKnowledgePackRemoveErrors, + SessionKnowledgePackRemoveResponses, + SessionKnowledgePacksAvailableErrors, + SessionKnowledgePacksAvailableResponses, + SessionKnowledgePacksErrors, + SessionKnowledgePacksResponses, SessionListResponses, SessionMessageErrors, SessionMessageResponses, @@ -1946,6 +1954,158 @@ export class Session2 extends HeyApiClient { }) } + /** + * List knowledge packs + * + * Get all knowledge pack messages injected into a session. + */ + public knowledgePacks( + parameters: { + sessionID: string + directory?: string + workspace?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "sessionID" }, + { in: "query", key: "directory" }, + { in: "query", key: "workspace" }, + ], + }, + ], + ) + return (options?.client ?? this.client).get< + SessionKnowledgePacksResponses, + SessionKnowledgePacksErrors, + ThrowOnError + >({ + url: "/session/{sessionID}/knowledge-packs", + ...options, + ...params, + }) + } + + /** + * List available knowledge packs + * + * Get all knowledge packs available in the library directory (~/.config/opencode/llm_knowledge_packs/). + */ + public knowledgePacksAvailable( + parameters: { + sessionID: string + directory?: string + workspace?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "sessionID" }, + { in: "query", key: "directory" }, + { in: "query", key: "workspace" }, + ], + }, + ], + ) + return (options?.client ?? this.client).get< + SessionKnowledgePacksAvailableResponses, + SessionKnowledgePacksAvailableErrors, + ThrowOnError + >({ + url: "/session/{sessionID}/knowledge-packs/available", + ...options, + ...params, + }) + } + + /** + * Remove a knowledge pack from session + * + * Remove an injected knowledge pack from the session. + */ + public knowledgePackRemove( + parameters: { + sessionID: string + name: string + version: string + directory?: string + workspace?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "sessionID" }, + { in: "path", key: "name" }, + { in: "path", key: "version" }, + { in: "query", key: "directory" }, + { in: "query", key: "workspace" }, + ], + }, + ], + ) + return (options?.client ?? this.client).delete< + SessionKnowledgePackRemoveResponses, + SessionKnowledgePackRemoveErrors, + ThrowOnError + >({ + url: "/session/{sessionID}/knowledge-packs/{name}/{version}", + ...options, + ...params, + }) + } + + /** + * Add a knowledge pack to session + * + * Inject a knowledge pack from the library into the session. + */ + public knowledgePackAdd( + parameters: { + sessionID: string + name: string + version: string + directory?: string + workspace?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "sessionID" }, + { in: "path", key: "name" }, + { in: "path", key: "version" }, + { in: "query", key: "directory" }, + { in: "query", key: "workspace" }, + ], + }, + ], + ) + return (options?.client ?? this.client).post< + SessionKnowledgePackAddResponses, + SessionKnowledgePackAddErrors, + ThrowOnError + >({ + url: "/session/{sessionID}/knowledge-packs/{name}/{version}", + ...options, + ...params, + }) + } + /** * Send async message * diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 71e075b39169..bbf4f33ae7e2 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -138,6 +138,7 @@ export type UserMessage = { [key: string]: boolean } variant?: string + flux?: string } export type ProviderAuthError = { @@ -241,6 +242,7 @@ export type AssistantMessage = { structured?: unknown variant?: string finish?: string + flux?: string } export type Message = UserMessage | AssistantMessage @@ -1469,11 +1471,98 @@ export type Config = { * Enable pruning of old tool outputs (default: true) */ prune?: boolean + /** + * Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint, 'float' automatically sub-collapses oldest chains before evaluating context overflow (default: standard) + */ + method?: "standard" | "collapse" | "float" + /** + * Trigger compaction at this fraction of total context (default: 0.85 = 85%) + */ + trigger?: number + /** + * For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65) + */ + extractRatio?: number + /** + * For collapse mode: fraction of newest tokens to use as reference context (default: 0.15) + */ + recentRatio?: number + /** + * For collapse mode: target token count for the summary output (default: 10000) + */ + summaryMaxTokens?: number + /** + * For collapse mode: number of previous summaries to include for context merging (default: 3) + */ + previousSummaries?: number + /** + * Whether to insert compaction trigger messages in the stream. Standard compaction needs triggers (default: true), collapse compaction does not (default: false) + */ + insertTriggers?: boolean + /** + * For collapse mode: allow inserting breakpoints in the middle of chains (default: true). When false, breakpoints only occur at chain boundaries to preserve conversation flow. + */ + splitChain?: boolean + /** + * For collapse mode with splitChain=true: minimum fraction of extractTarget that must be covered when rewinding to chain boundary before falling back to mid-chain split (default: 0.75). E.g. 0.75 means the rewind must still extract at least 75% of the token target to be accepted. + */ + splitChainMinThreshold?: number + /** + * Float mode settings for automatic chain sub-collapse + */ + float?: { + /** + * Number of chains before triggering sub-collapse on oldest chain (default: 3) + */ + chainThreshold?: number + /** + * Minimum fraction of context window that must be used before sub-collapse chains are evaluated (default: 0.6 = 60%). Sub-collapse is skipped entirely when context usage is below this threshold, and stops between chains if usage drops below it. + */ + minFloat?: number + /** + * Sub-collapse algorithm: 'full' includes all context, 'bookend' focuses on user request + final response + tools, 'minimal' uses only final response (default: bookend) + */ + algorithm?: "full" | "bookend" | "minimal" + /** + * Target token count for sub-collapse summaries (default: 5000) + */ + subCollapseSummaryMaxTokens?: number + } /** * Token buffer for compaction. Leaves enough window to avoid overflow during compaction. */ reserved?: number } + /** + * Knowledge pack settings + */ + knowledge?: { + /** + * Enable knowledge pack injection (default: true) + */ + enabled?: boolean + /** + * Additional directories to scan for .yaml knowledge pack files + */ + paths?: Array + /** + * Knowledge packs to enable or disable by default + */ + packs?: Array<{ + /** + * Knowledge pack name + */ + name: string + /** + * Knowledge pack version + */ + version: string + /** + * Whether to enable this knowledge pack by default + */ + enabled: boolean + }> + } experimental?: { disable_paste_summary?: boolean /** @@ -3412,6 +3501,175 @@ export type SessionMessageResponses = { export type SessionMessageResponse = SessionMessageResponses[keyof SessionMessageResponses] +export type SessionKnowledgePacksData = { + body?: never + path: { + /** + * Session ID + */ + sessionID: string + } + query?: { + directory?: string + workspace?: string + } + url: "/session/{sessionID}/knowledge-packs" +} + +export type SessionKnowledgePacksErrors = { + /** + * Bad request + */ + 400: BadRequestError +} + +export type SessionKnowledgePacksError = SessionKnowledgePacksErrors[keyof SessionKnowledgePacksErrors] + +export type SessionKnowledgePacksResponses = { + /** + * Knowledge packs + */ + 200: Array<{ + id: string + name: string + displayName: string + version: string + }> +} + +export type SessionKnowledgePacksResponse = SessionKnowledgePacksResponses[keyof SessionKnowledgePacksResponses] + +export type SessionKnowledgePacksAvailableData = { + body?: never + path: { + /** + * Session ID + */ + sessionID: string + } + query?: { + directory?: string + workspace?: string + } + url: "/session/{sessionID}/knowledge-packs/available" +} + +export type SessionKnowledgePacksAvailableErrors = { + /** + * Bad request + */ + 400: BadRequestError +} + +export type SessionKnowledgePacksAvailableError = + SessionKnowledgePacksAvailableErrors[keyof SessionKnowledgePacksAvailableErrors] + +export type SessionKnowledgePacksAvailableResponses = { + /** + * Available knowledge packs + */ + 200: Array<{ + name: string + displayName: string + version: string + enabled: boolean + }> +} + +export type SessionKnowledgePacksAvailableResponse = + SessionKnowledgePacksAvailableResponses[keyof SessionKnowledgePacksAvailableResponses] + +export type SessionKnowledgePackRemoveData = { + body?: never + path: { + /** + * Session ID + */ + sessionID: string + /** + * Knowledge pack name + */ + name: string + /** + * Knowledge pack version + */ + version: string + } + query?: { + directory?: string + workspace?: string + } + url: "/session/{sessionID}/knowledge-packs/{name}/{version}" +} + +export type SessionKnowledgePackRemoveErrors = { + /** + * Bad request + */ + 400: BadRequestError + /** + * Not found + */ + 404: NotFoundError +} + +export type SessionKnowledgePackRemoveError = SessionKnowledgePackRemoveErrors[keyof SessionKnowledgePackRemoveErrors] + +export type SessionKnowledgePackRemoveResponses = { + /** + * Knowledge pack removed + */ + 200: boolean +} + +export type SessionKnowledgePackRemoveResponse = + SessionKnowledgePackRemoveResponses[keyof SessionKnowledgePackRemoveResponses] + +export type SessionKnowledgePackAddData = { + body?: never + path: { + /** + * Session ID + */ + sessionID: string + /** + * Knowledge pack name + */ + name: string + /** + * Knowledge pack version + */ + version: string + } + query?: { + directory?: string + workspace?: string + } + url: "/session/{sessionID}/knowledge-packs/{name}/{version}" +} + +export type SessionKnowledgePackAddErrors = { + /** + * Bad request + */ + 400: BadRequestError + /** + * Not found + */ + 404: NotFoundError +} + +export type SessionKnowledgePackAddError = SessionKnowledgePackAddErrors[keyof SessionKnowledgePackAddErrors] + +export type SessionKnowledgePackAddResponses = { + /** + * Knowledge pack added + */ + 200: boolean +} + +export type SessionKnowledgePackAddResponse = SessionKnowledgePackAddResponses[keyof SessionKnowledgePackAddResponses] + export type PartDeleteData = { body?: never path: { diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index d1198c11dd53..0c72bd6d137f 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -3206,6 +3206,336 @@ ] } }, + "/session/{sessionID}/knowledge-packs": { + "get": { + "operationId": "session.knowledgePacks", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "workspace", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "sessionID", + "schema": { + "type": "string" + }, + "required": true, + "description": "Session ID" + } + ], + "summary": "List knowledge packs", + "description": "Get all knowledge pack messages injected into a session.", + "responses": { + "200": { + "description": "Knowledge packs", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "displayName": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "required": ["id", "name", "displayName", "version"] + } + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BadRequestError" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.knowledgePacks({\n ...\n})" + } + ] + } + }, + "/session/{sessionID}/knowledge-packs/available": { + "get": { + "operationId": "session.knowledgePacksAvailable", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "workspace", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "sessionID", + "schema": { + "type": "string" + }, + "required": true, + "description": "Session ID" + } + ], + "summary": "List available knowledge packs", + "description": "Get all knowledge packs available in the library directory (~/.config/opencode/llm_knowledge_packs/).", + "responses": { + "200": { + "description": "Available knowledge packs", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "displayName": { + "type": "string" + }, + "version": { + "type": "string" + }, + "enabled": { + "type": "boolean" + } + }, + "required": ["name", "displayName", "version", "enabled"] + } + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BadRequestError" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.knowledgePacksAvailable({\n ...\n})" + } + ] + } + }, + "/session/{sessionID}/knowledge-packs/{name}/{version}": { + "post": { + "operationId": "session.knowledgePackAdd", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "workspace", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "sessionID", + "schema": { + "type": "string" + }, + "required": true, + "description": "Session ID" + }, + { + "in": "path", + "name": "name", + "schema": { + "type": "string" + }, + "required": true, + "description": "Knowledge pack name" + }, + { + "in": "path", + "name": "version", + "schema": { + "type": "string" + }, + "required": true, + "description": "Knowledge pack version" + } + ], + "summary": "Add a knowledge pack to session", + "description": "Inject a knowledge pack from the library into the session.", + "responses": { + "200": { + "description": "Knowledge pack added", + "content": { + "application/json": { + "schema": { + "type": "boolean" + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BadRequestError" + } + } + } + }, + "404": { + "description": "Not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NotFoundError" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.knowledgePackAdd({\n ...\n})" + } + ] + }, + "delete": { + "operationId": "session.knowledgePackRemove", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "workspace", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "sessionID", + "schema": { + "type": "string" + }, + "required": true, + "description": "Session ID" + }, + { + "in": "path", + "name": "name", + "schema": { + "type": "string" + }, + "required": true, + "description": "Knowledge pack name" + }, + { + "in": "path", + "name": "version", + "schema": { + "type": "string" + }, + "required": true, + "description": "Knowledge pack version" + } + ], + "summary": "Remove a knowledge pack from session", + "description": "Remove an injected knowledge pack from the session.", + "responses": { + "200": { + "description": "Knowledge pack removed", + "content": { + "application/json": { + "schema": { + "type": "boolean" + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BadRequestError" + } + } + } + }, + "404": { + "description": "Not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NotFoundError" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.knowledgePackRemove({\n ...\n})" + } + ] + } + }, "/session/{sessionID}/message/{messageID}/part/{partID}": { "delete": { "operationId": "part.delete", @@ -7259,6 +7589,9 @@ }, "variant": { "type": "string" + }, + "flux": { + "type": "string" } }, "required": ["id", "sessionID", "role", "time", "agent", "model"] @@ -7546,6 +7879,9 @@ }, "finish": { "type": "string" + }, + "flux": { + "type": "string" } }, "required": [ @@ -10593,6 +10929,84 @@ "description": "Enable pruning of old tool outputs (default: true)", "type": "boolean" }, + "method": { + "description": "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint, 'float' automatically sub-collapses oldest chains before evaluating context overflow (default: standard)", + "type": "string", + "enum": ["standard", "collapse", "float"] + }, + "trigger": { + "description": "Trigger compaction at this fraction of total context (default: 0.85 = 85%)", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "extractRatio": { + "description": "For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "recentRatio": { + "description": "For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "summaryMaxTokens": { + "description": "For collapse mode: target token count for the summary output (default: 10000)", + "type": "number", + "minimum": 1000, + "maximum": 50000 + }, + "previousSummaries": { + "description": "For collapse mode: number of previous summaries to include for context merging (default: 3)", + "type": "number", + "minimum": 0, + "maximum": 10 + }, + "insertTriggers": { + "description": "Whether to insert compaction trigger messages in the stream. Standard compaction needs triggers (default: true), collapse compaction does not (default: false)", + "type": "boolean" + }, + "splitChain": { + "description": "For collapse mode: allow inserting breakpoints in the middle of chains (default: true). When false, breakpoints only occur at chain boundaries to preserve conversation flow.", + "type": "boolean" + }, + "splitChainMinThreshold": { + "description": "For collapse mode with splitChain=true: minimum fraction of extractTarget that must be covered when rewinding to chain boundary before falling back to mid-chain split (default: 0.75). E.g. 0.75 means the rewind must still extract at least 75% of the token target to be accepted.", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "float": { + "description": "Float mode settings for automatic chain sub-collapse", + "type": "object", + "properties": { + "chainThreshold": { + "description": "Number of chains before triggering sub-collapse on oldest chain (default: 3)", + "type": "number", + "minimum": 1, + "maximum": 20 + }, + "minFloat": { + "description": "Minimum fraction of context window that must be used before sub-collapse chains are evaluated (default: 0.6 = 60%). Sub-collapse is skipped entirely when context usage is below this threshold, and stops between chains if usage drops below it.", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "algorithm": { + "description": "Sub-collapse algorithm: 'full' includes all context, 'bookend' focuses on user request + final response + tools, 'minimal' uses only final response (default: bookend)", + "type": "string", + "enum": ["full", "bookend", "minimal"] + }, + "subCollapseSummaryMaxTokens": { + "description": "Target token count for sub-collapse summaries (default: 5000)", + "type": "number", + "minimum": 500, + "maximum": 20000 + } + } + }, "reserved": { "description": "Token buffer for compaction. Leaves enough window to avoid overflow during compaction.", "type": "integer", @@ -10601,6 +11015,45 @@ } } }, + "knowledge": { + "description": "Knowledge pack settings", + "type": "object", + "properties": { + "enabled": { + "description": "Enable knowledge pack injection (default: true)", + "type": "boolean" + }, + "paths": { + "description": "Additional directories to scan for .yaml knowledge pack files", + "type": "array", + "items": { + "type": "string" + } + }, + "packs": { + "description": "Knowledge packs to enable or disable by default", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "description": "Knowledge pack name", + "type": "string" + }, + "version": { + "description": "Knowledge pack version", + "type": "string" + }, + "enabled": { + "description": "Whether to enable this knowledge pack by default", + "type": "boolean" + } + }, + "required": ["name", "version", "enabled"] + } + } + } + }, "experimental": { "type": "object", "properties": { From 8756834518b9c6465802d071f247231eb04b0a0b Mon Sep 17 00:00:00 2001 From: ryan Date: Sat, 28 Feb 2026 20:01:17 -0700 Subject: [PATCH 2/2] feat: add compaction model override for session compaction operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a dedicated compaction model selector so users can run one model for chat and a different model for summarization (e.g. Claude Sonnet for interactive coding, Zen Big Pickle free tier for zero-cost compaction). Model resolution priority: TUI selection > agent.compaction.model config > session model. When no TUI selection is set, behavior is identical to upstream. Changes: - DialogModel gains target="compaction" prop — no duplicate component - SessionCompaction.process() accepts optional compactionModel override - CompactionPart schema extended with optional compactionModel field - compaction_model_list keybind added (default: none) - /compaction-models slash command and command menu entry - local.model.compaction context backed by kv.signal('compaction_model') - Prompt footer shows active compaction model when set - SDK regenerated via ./script/generate.ts --- packages/opencode/src/cli/cmd/tui/app.tsx | 14 +++++ .../cli/cmd/tui/component/dialog-model.tsx | 53 ++++++++++++++----- .../cli/cmd/tui/component/prompt/index.tsx | 4 ++ .../src/cli/cmd/tui/context/local.tsx | 40 ++++++++++++++ .../src/cli/cmd/tui/routes/session/index.tsx | 2 + packages/opencode/src/config/config.ts | 1 + .../opencode/src/server/routes/session.ts | 9 +++- .../src/session/compaction-extension.ts | 11 ++-- packages/opencode/src/session/compaction.ts | 16 ++++-- packages/opencode/src/session/message-v2.ts | 8 ++- packages/opencode/src/session/prompt.ts | 1 + packages/sdk/js/src/v2/gen/sdk.gen.ts | 5 ++ packages/sdk/js/src/v2/gen/types.gen.ts | 8 +++ packages/sdk/openapi.json | 24 +++++++++ 14 files changed, 175 insertions(+), 21 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/app.tsx b/packages/opencode/src/cli/cmd/tui/app.tsx index 3304d6be6a6d..5dd1e982291e 100644 --- a/packages/opencode/src/cli/cmd/tui/app.tsx +++ b/packages/opencode/src/cli/cmd/tui/app.tsx @@ -445,6 +445,20 @@ function App() { local.model.cycleFavorite(-1) }, }, + { + title: "Switch compaction model", + value: "compaction_model.list", + keybind: "compaction_model_list", + category: "Agent", + slash: { + name: "compaction-models", + aliases: ["compaction-model"], + }, + onSelect: () => { + dialog.replace(() => ) + }, + }, + { title: "Switch agent", value: "agent.list", diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-model.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-model.tsx index c30b8d12a933..ab3c5ed2d022 100644 --- a/packages/opencode/src/cli/cmd/tui/component/dialog-model.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-model.tsx @@ -15,7 +15,7 @@ export function useConnected() { ) } -export function DialogModel(props: { providerID?: string }) { +export function DialogModel(props: { providerID?: string; target?: "session" | "compaction" }) { const local = useLocal() const sync = useSync() const dialog = useDialog() @@ -25,14 +25,41 @@ export function DialogModel(props: { providerID?: string }) { const connected = useConnected() const providers = createDialogProviderOptions() + const isCompaction = props.target === "compaction" + const showExtra = createMemo(() => connected() && !props.providerID) + function onModelSelect(model: { providerID: string; modelID: string }) { + dialog.clear() + if (isCompaction) { + local.model.compaction.set(model) + return + } + local.model.set(model, { recent: true }) + } + const options = createMemo(() => { const needle = query().trim() const showSections = showExtra() && needle.length === 0 const favorites = connected() ? local.model.favorite() : [] const recents = local.model.recent() + // "Use session model (default)" option only shown in compaction mode + const defaultOption = isCompaction + ? [ + { + value: { providerID: "", modelID: "" }, + title: "Use session model (default)", + description: "Compaction will use the same model as the session", + category: showSections ? "Default" : undefined, + onSelect: () => { + dialog.clear() + local.model.compaction.clear() + }, + }, + ] + : [] + function toOptions(items: typeof favorites, category: string) { if (!showSections) return [] return items.flatMap((item) => { @@ -49,10 +76,7 @@ export function DialogModel(props: { providerID?: string }) { category, disabled: provider.id === "opencode" && model.id.includes("-nano"), footer: model.cost?.input === 0 && provider.id === "opencode" ? "Free" : undefined, - onSelect: () => { - dialog.clear() - local.model.set({ providerID: provider.id, modelID: model.id }, { recent: true }) - }, + onSelect: () => onModelSelect({ providerID: provider.id, modelID: model.id }), }, ] }) @@ -87,10 +111,7 @@ export function DialogModel(props: { providerID?: string }) { category: connected() ? provider.name : undefined, disabled: provider.id === "opencode" && model.includes("-nano"), footer: info.cost?.input === 0 && provider.id === "opencode" ? "Free" : undefined, - onSelect() { - dialog.clear() - local.model.set({ providerID: provider.id, modelID: model }, { recent: true }) - }, + onSelect: () => onModelSelect({ providerID: provider.id, modelID: model }), })), filter((x) => { if (!showSections) return true @@ -121,19 +142,22 @@ export function DialogModel(props: { providerID?: string }) { if (needle) { return [ + ...defaultOption, ...fuzzysort.go(needle, providerOptions, { keys: ["title", "category"] }).map((x) => x.obj), ...fuzzysort.go(needle, popularProviders, { keys: ["title"] }).map((x) => x.obj), ] } - return [...favoriteOptions, ...recentOptions, ...providerOptions, ...popularProviders] + return [...defaultOption, ...favoriteOptions, ...recentOptions, ...providerOptions, ...popularProviders] }) const provider = createMemo(() => props.providerID ? sync.data.provider.find((x) => x.id === props.providerID) : null, ) - const title = createMemo(() => provider()?.name ?? "Select model") + const title = createMemo(() => (isCompaction ? "Select compaction model" : (provider()?.name ?? "Select model"))) + + const current = createMemo(() => (isCompaction ? local.model.compaction.current() : local.model.current())) return ( [number]["value"]> @@ -151,7 +175,10 @@ export function DialogModel(props: { providerID?: string }) { title: "Favorite", disabled: !connected(), onTrigger: (option) => { - local.model.toggleFavorite(option.value as { providerID: string; modelID: string }) + const val = option.value as { providerID: string; modelID: string } + if (val.providerID && val.modelID) { + local.model.toggleFavorite(val) + } }, }, ]} @@ -159,7 +186,7 @@ export function DialogModel(props: { providerID?: string }) { flat={true} skipFilter={true} title={title()} - current={local.model.current()} + current={current()} /> ) } diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index d63c248fb83e..d3a7d4f279c4 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -1012,6 +1012,10 @@ export function Prompt(props: PromptProps) { {local.model.variant.current()} + + · + compact: {local.model.compaction.parsed().model} + diff --git a/packages/opencode/src/cli/cmd/tui/context/local.tsx b/packages/opencode/src/cli/cmd/tui/context/local.tsx index d93079f12a42..77197135ea57 100644 --- a/packages/opencode/src/cli/cmd/tui/context/local.tsx +++ b/packages/opencode/src/cli/cmd/tui/context/local.tsx @@ -13,6 +13,7 @@ import { useArgs } from "./args" import { useSDK } from "./sdk" import { RGBA } from "@opentui/core" import { Filesystem } from "@/util/filesystem" +import { useKV } from "./kv" export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ name: "Local", @@ -20,6 +21,7 @@ export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ const sync = useSync() const sdk = useSDK() const toast = useToast() + const kv = useKV() function isModelValid(model: { providerID: string; modelID: string }) { const provider = sync.data.provider.find((x) => x.id === model.providerID) @@ -320,6 +322,44 @@ export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ save() }) }, + compaction: iife(() => { + const key = "compaction_model" + const [get] = kv.signal<{ providerID: string; modelID: string } | undefined>(key, undefined) + return { + current() { + return get() as { providerID: string; modelID: string } | undefined + }, + parsed: createMemo(() => { + const value = get() as { providerID: string; modelID: string } | undefined + if (!value) { + return { + provider: undefined, + model: "Using session model", + } + } + const provider = sync.data.provider.find((x) => x.id === value.providerID) + const info = provider?.models[value.modelID] + return { + provider: provider?.name ?? value.providerID, + model: info?.name ?? value.modelID, + } + }), + set(model: { providerID: string; modelID: string }) { + if (!isModelValid(model)) { + toast.show({ + message: `Model ${model.providerID}/${model.modelID} is not valid`, + variant: "warning", + duration: 3000, + }) + return + } + kv.set(key, { ...model }) + }, + clear() { + kv.set(key, undefined) + }, + } + }), variant: { current() { const m = currentModel() diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 90631ffe0cc0..845cdab29cd5 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -461,10 +461,12 @@ export function Session() { }) return } + const compactionModel = local.model.compaction.current() sdk.client.session.summarize({ sessionID: route.sessionID, modelID: selectedModel.modelID, providerID: selectedModel.providerID, + compactionModel: compactionModel ?? undefined, }) dialog.clear() }, diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index bc6d338218b6..3bad4627f77e 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -809,6 +809,7 @@ export namespace Config { model_cycle_recent_reverse: z.string().optional().default("shift+f2").describe("Previous recently used model"), model_cycle_favorite: z.string().optional().default("none").describe("Next favorite model"), model_cycle_favorite_reverse: z.string().optional().default("none").describe("Previous favorite model"), + compaction_model_list: z.string().optional().default("none").describe("List available compaction models"), command_list: z.string().optional().default("ctrl+p").describe("List available commands"), agent_list: z.string().optional().default("a").describe("List agents"), agent_cycle: z.string().optional().default("tab").describe("Next agent"), diff --git a/packages/opencode/src/server/routes/session.ts b/packages/opencode/src/server/routes/session.ts index 8daa7c405c20..c0947a7b285d 100644 --- a/packages/opencode/src/server/routes/session.ts +++ b/packages/opencode/src/server/routes/session.ts @@ -17,7 +17,7 @@ import { Log } from "../../util/log" import { PermissionNext } from "@/permission/next" import { errors } from "../error" import { lazy } from "../../util/lazy" -import { SessionProxyMiddleware } from "../../control-plane/session-proxy-middleware" + import { Config } from "../../config/config" const log = Log.create({ service: "server" }) @@ -564,6 +564,12 @@ export const SessionRoutes = lazy(() => providerID: z.string(), modelID: z.string(), auto: z.boolean().optional().default(false), + compactionModel: z + .object({ + providerID: z.string(), + modelID: z.string(), + }) + .optional(), }), ), async (c) => { @@ -588,6 +594,7 @@ export const SessionRoutes = lazy(() => modelID: body.modelID, }, auto: body.auto, + compactionModel: body.compactionModel, }) await SessionPrompt.loop({ sessionID }) return c.json(true) diff --git a/packages/opencode/src/session/compaction-extension.ts b/packages/opencode/src/session/compaction-extension.ts index a473bc9d0854..868849928358 100644 --- a/packages/opencode/src/session/compaction-extension.ts +++ b/packages/opencode/src/session/compaction-extension.ts @@ -184,6 +184,8 @@ Critical rules: sessionID: string abort: AbortSignal auto: boolean + compactionModel?: { providerID: string; modelID: string } + overflow?: boolean }): Promise<"continue" | "stop"> { const config = await Config.get() const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio @@ -206,9 +208,12 @@ Critical rules: // Get the user message to determine which model we'll use const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User const agent = await Agent.get("compaction") - const model = agent.model - ? await Provider.getModel(agent.model.providerID, agent.model.modelID) - : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID) + // Model resolution priority: TUI compactionModel override > agent.compaction.model config > session model + const model = input.compactionModel + ? await Provider.getModel(input.compactionModel.providerID, input.compactionModel.modelID) + : agent.model + ? await Provider.getModel(agent.model.providerID, agent.model.modelID) + : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID) // Calculate token counts and role counts let messageTokens: number[] = [] diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index da641fb02cbd..febea16be193 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -108,6 +108,7 @@ export namespace SessionCompaction { export async function process(input: { parentID: string + compactionModel?: { providerID: string; modelID: string } messages: MessageV2.WithParts[] sessionID: string abort: AbortSignal @@ -175,9 +176,11 @@ export namespace SessionCompaction { } const agent = await Agent.get("compaction") - const model = agent.model - ? await Provider.getModel(agent.model.providerID, agent.model.modelID) - : await Provider.getModel(userMessage.model.providerID, userMessage.model.modelID) + const model = input.compactionModel + ? await Provider.getModel(input.compactionModel.providerID, input.compactionModel.modelID) + : agent.model + ? await Provider.getModel(agent.model.providerID, agent.model.modelID) + : await Provider.getModel(userMessage.model.providerID, userMessage.model.modelID) const msg = (await Session.updateMessage({ id: Identifier.ascending("message"), role: "assistant", @@ -349,6 +352,12 @@ When constructing the summary, try to stick to this template: }), auto: z.boolean(), overflow: z.boolean().optional(), + compactionModel: z + .object({ + providerID: z.string(), + modelID: z.string(), + }) + .optional(), }), async (input) => { const msg = await Session.updateMessage({ @@ -368,6 +377,7 @@ When constructing the summary, try to stick to this template: type: "compaction", auto: input.auto, overflow: input.overflow, + compactionModel: input.compactionModel, }) }, ) diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index bdcacbf3804e..cc9d55a9ca1a 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -204,6 +204,12 @@ export namespace MessageV2 { type: z.literal("compaction"), auto: z.boolean(), overflow: z.boolean().optional(), + compactionModel: z + .object({ + providerID: z.string(), + modelID: z.string(), + }) + .optional(), }).meta({ ref: "CompactionPart", }) @@ -875,7 +881,7 @@ export namespace MessageV2 { // Upstream guard: do not mark errored summaries as completed breakpoints. // Collapse compaction may not set finish, but summary: true is sufficient; // however an errored summary must not be treated as a valid breakpoint. - if (isAssistantSummary && !msg.info.error) { + if (isAssistantSummary && !(msg.info as Assistant).error) { const parentID = (msg.info as Assistant).parentID log.debug("COLLAPSE filterCompacted found summary", { msgId: msg.info.id, diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index b64dc0502f34..21ab14d3f94c 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -582,6 +582,7 @@ export namespace SessionPrompt { sessionID, auto: task.auto, overflow: task.overflow, + compactionModel: task.compactionModel, }) if (result === "stop") break continue diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index 5c7f23222172..9b80459eb082 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -1758,6 +1758,10 @@ export class Session2 extends HeyApiClient { providerID?: string modelID?: string auto?: boolean + compactionModel?: { + providerID: string + modelID: string + } }, options?: Options, ) { @@ -1772,6 +1776,7 @@ export class Session2 extends HeyApiClient { { in: "body", key: "providerID" }, { in: "body", key: "modelID" }, { in: "body", key: "auto" }, + { in: "body", key: "compactionModel" }, ], }, ], diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index bbf4f33ae7e2..1d0f5a3e8485 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -508,6 +508,10 @@ export type CompactionPart = { type: "compaction" auto: boolean overflow?: boolean + compactionModel?: { + providerID: string + modelID: string + } } export type Part = @@ -3281,6 +3285,10 @@ export type SessionSummarizeData = { providerID: string modelID: string auto?: boolean + compactionModel?: { + providerID: string + modelID: string + } } path: { /** diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index 0c72bd6d137f..0a48340d2f05 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -2779,6 +2779,18 @@ "auto": { "default": false, "type": "boolean" + }, + "compactionModel": { + "type": "object", + "properties": { + "providerID": { + "type": "string" + }, + "modelID": { + "type": "string" + } + }, + "required": ["providerID", "modelID"] } }, "required": ["providerID", "modelID"] @@ -8659,6 +8671,18 @@ }, "overflow": { "type": "boolean" + }, + "compactionModel": { + "type": "object", + "properties": { + "providerID": { + "type": "string" + }, + "modelID": { + "type": "string" + } + }, + "required": ["providerID", "modelID"] } }, "required": ["id", "sessionID", "messageID", "type", "auto"]