From 8c917d832f12b4bcb2742c21d34b398c96f10c14 Mon Sep 17 00:00:00 2001 From: Arsham Shirvani Date: Sat, 22 Nov 2025 19:48:14 +0000 Subject: [PATCH 1/5] feat: add token count display to session view Enable token visibility to help users understand API costs and context window utilization. Display estimated tokens for user messages and actual output tokens plus cumulative context percentage for assistant messages. Include context panel toggle. --- .../src/cli/cmd/tui/routes/session/index.tsx | 58 ++++++++++++++++++- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 0169c68e617c..f85aa10ec6e8 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -28,6 +28,7 @@ import { Prompt, type PromptRef } from "@tui/component/prompt" import type { AssistantMessage, Part, ToolPart, UserMessage, TextPart, ReasoningPart } from "@opencode-ai/sdk" import { useLocal } from "@tui/context/local" import { Locale } from "@/util/locale" +import { Token } from "@/util/token" import type { Tool } from "@/tool/tool" import type { ReadTool } from "@/tool/read" import type { WriteTool } from "@/tool/write" @@ -80,6 +81,7 @@ const context = createContext<{ conceal: () => boolean showThinking: () => boolean showTimestamps: () => boolean + showTokens: () => boolean }>() function use() { @@ -106,11 +108,20 @@ export function Session() { return messages().findLast((x) => x.role === "assistant") }) + const local = useLocal() + + const contextLimit = createMemo(() => { + const c = local.model.current() + const provider = sync.data.provider.find((p) => p.id === c.providerID) + return provider?.models[c.modelID]?.limit.context ?? 200000 + }) + const dimensions = useTerminalDimensions() const [sidebar, setSidebar] = createSignal<"show" | "hide" | "auto">(kv.get("sidebar", "auto")) const [conceal, setConceal] = createSignal(true) const [showThinking, setShowThinking] = createSignal(true) const [showTimestamps, setShowTimestamps] = createSignal(kv.get("timestamps", "hide") === "show") + const [showTokens, setShowTokens] = createSignal(kv.get("tokens", "hide") === "show") const wide = createMemo(() => dimensions().width > 120) const sidebarVisible = createMemo(() => sidebar() === "show" || (sidebar() === "auto" && wide())) @@ -204,8 +215,6 @@ export function Session() { }, 50) } - const local = useLocal() - function moveChild(direction: number) { const parentID = session()?.parentID ?? session()?.id let children = sync.data.session @@ -428,6 +437,19 @@ export function Session() { dialog.clear() }, }, + { + title: "Toggle tokens", + value: "session.toggle.tokens", + category: "Session", + onSelect: (dialog) => { + setShowTokens((prev) => { + const next = !prev + kv.set("tokens", next ? "show" : "hide") + return next + }) + dialog.clear() + }, + }, { title: "Page up", value: "session.page.up", @@ -729,6 +751,7 @@ export function Session() { conceal, showThinking, showTimestamps, + showTokens, }} > @@ -864,6 +887,7 @@ export function Session() { last={lastAssistant()?.id === message.id} message={message as AssistantMessage} parts={sync.data.part[message.id] ?? []} + contextLimit={contextLimit()} /> @@ -917,6 +941,13 @@ function UserMessage(props: { const queued = createMemo(() => props.pending && props.message.id > props.pending) const color = createMemo(() => (queued() ? theme.accent : theme.secondary)) + const individualTokens = createMemo(() => { + return props.parts.reduce((sum, part) => { + if (part.type === "text") return sum + Token.estimate(part.text) + return sum + }, 0) + }) + const compaction = createMemo(() => props.parts.find((x) => x.type === "compaction")) return ( @@ -977,6 +1008,9 @@ function UserMessage(props: { > QUEUED + 0}> + ⬝~{individualTokens().toLocaleString()} tok + @@ -994,7 +1028,8 @@ function UserMessage(props: { ) } -function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean }) { +function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean; contextLimit: number }) { + const ctx = use() const local = useLocal() const { theme } = useTheme() const sync = useSync() @@ -1012,6 +1047,16 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las return props.message.time.completed - user.time.created }) + const individualTokens = createMemo(() => props.message.tokens.output) + const cumulativeTokens = createMemo( + () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write, + ) + + const percentage = createMemo(() => { + if (!props.contextLimit) return 0 + return Math.round((cumulativeTokens() / props.contextLimit) * 100) + }) + return ( <> @@ -1053,6 +1098,13 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ⬝{Locale.duration(duration())} + 0}> + + {" "} + ⬝{individualTokens().toLocaleString()} tok · {cumulativeTokens().toLocaleString()} context ( + {percentage()}%) + + From 9fabaf53a2f302af4b0a15a42146b68305fa7a46 Mon Sep 17 00:00:00 2001 From: Arsham Shirvani Date: Mon, 24 Nov 2025 13:59:39 +0000 Subject: [PATCH 2/5] feat: add streaming token estimation display Display estimated output tokens (~X tok) during assistant message streaming using character-based estimation (4 chars = 1 token). Replace estimate with actual count on completion, with change detection to minimise message broadcasts. --- .../src/cli/cmd/tui/routes/session/index.tsx | 15 ++++++++++++--- packages/opencode/src/session/message-v2.ts | 1 + packages/opencode/src/session/processor.ts | 10 +++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index f85aa10ec6e8..baaa08bfb05d 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -1048,6 +1048,12 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las }) const individualTokens = createMemo(() => props.message.tokens.output) + const outputEstimate = createMemo(() => props.message.outputEstimate) + const outputDisplay = createMemo(() => { + const estimate = outputEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + return individualTokens().toLocaleString() + }) const cumulativeTokens = createMemo( () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write, ) @@ -1098,11 +1104,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ⬝{Locale.duration(duration())} - 0}> + 0 || outputEstimate() !== undefined)}> {" "} - ⬝{individualTokens().toLocaleString()} tok · {cumulativeTokens().toLocaleString()} context ( - {percentage()}%) + ⬝{outputDisplay()} tok + 0}> + {" "} + · {cumulativeTokens().toLocaleString()} context ({percentage()}%) + diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index c451ae2b38d5..a7fe2f8438a5 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -360,6 +360,7 @@ export namespace MessageV2 { write: z.number(), }), }), + outputEstimate: z.number().optional(), finish: z.string().optional(), }).meta({ ref: "AssistantMessage", diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 6d1125c66b32..7c5349ca81f0 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,6 +11,7 @@ import { SessionSummary } from "./summary" import { Bus } from "@/bus" import { SessionRetry } from "./retry" import { SessionStatus } from "./status" +import { Token } from "@/util/token" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -248,6 +249,7 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + input.assistantMessage.outputEstimate = undefined await Session.updatePart({ id: Identifier.ascending("part"), reason: value.finishReason, @@ -297,11 +299,17 @@ export namespace SessionProcessor { if (currentText) { currentText.text += value.text if (value.providerMetadata) currentText.metadata = value.providerMetadata - if (currentText.text) + if (currentText.text) { + const estimate = Token.estimate(currentText.text) + if (input.assistantMessage.outputEstimate !== estimate) { + input.assistantMessage.outputEstimate = estimate + await Session.updateMessage(input.assistantMessage) + } await Session.updatePart({ part: currentText, delta: value.text, }) + } } break From 09aaec4f3344595228c4f0316995b94d6a0999fa Mon Sep 17 00:00:00 2001 From: Arsham Shirvani Date: Mon, 24 Nov 2025 17:03:41 +0000 Subject: [PATCH 3/5] feat: stream token estimates with accumulation Add real-time token estimation during streaming with separate tracking for output (~tok) and reasoning (~think) tokens. Accumulates estimates across multiple text/reasoning blocks within a step and across multiple steps (tool calls). Includes change detection to minimise message broadcasts and clean transition from estimates to actual counts on completion. --- .../src/cli/cmd/tui/routes/session/index.tsx | 38 +++++++++++++++---- packages/opencode/src/session/message-v2.ts | 1 + packages/opencode/src/session/processor.ts | 19 ++++++++-- packages/opencode/src/session/prompt.ts | 2 + 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index baaa08bfb05d..954c0b958aee 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -1047,12 +1047,30 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las return props.message.time.completed - user.time.created }) - const individualTokens = createMemo(() => props.message.tokens.output) + // Output tokens + const outputTokens = createMemo(() => props.message.tokens.output) const outputEstimate = createMemo(() => props.message.outputEstimate) + + // Reasoning tokens (must be defined BEFORE outputDisplay) + const reasoningTokens = createMemo(() => props.message.tokens.reasoning) + const reasoningEstimate = createMemo(() => props.message.reasoningEstimate) + const outputDisplay = createMemo(() => { const estimate = outputEstimate() if (estimate !== undefined) return "~" + estimate.toLocaleString() - return individualTokens().toLocaleString() + const tokens = outputTokens() + if (tokens > 0) return tokens.toLocaleString() + // Show ~0 during streaming when we have reasoning but no output yet + if (reasoningEstimate() !== undefined || reasoningTokens() > 0) return "~0" + return undefined + }) + + const reasoningDisplay = createMemo(() => { + const estimate = reasoningEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + const tokens = reasoningTokens() + if (tokens > 0) return tokens.toLocaleString() + return undefined }) const cumulativeTokens = createMemo( () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write, @@ -1104,13 +1122,19 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ⬝{Locale.duration(duration())} - 0 || outputEstimate() !== undefined)}> + {" "} - ⬝{outputDisplay()} tok - 0}> - {" "} - · {cumulativeTokens().toLocaleString()} context ({percentage()}%) + ⬝ {outputDisplay()} tok + + {" · "} + {reasoningDisplay()} think + + 0 || outputEstimate() !== undefined || reasoningEstimate() !== undefined} + > + {" · "} + {cumulativeTokens().toLocaleString()} context ({percentage()}%) diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index a7fe2f8438a5..15e33040e601 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -361,6 +361,7 @@ export namespace MessageV2 { }), }), outputEstimate: z.number().optional(), + reasoningEstimate: z.number().optional(), finish: z.string().optional(), }).meta({ ref: "AssistantMessage", diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 7c5349ca81f0..42c2229b5e38 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,7 +11,6 @@ import { SessionSummary } from "./summary" import { Bus } from "@/bus" import { SessionRetry } from "./retry" import { SessionStatus } from "./status" -import { Token } from "@/util/token" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -41,6 +40,9 @@ export namespace SessionProcessor { }, async process(fn: () => StreamTextResult, never>) { log.info("process") + // Initialize from existing estimates (reverse the /4 calculation) to accumulate across multiple process() calls + let reasoningTotal = (input.assistantMessage.reasoningEstimate ?? 0) * 4 + let textTotal = (input.assistantMessage.outputEstimate ?? 0) * 4 while (true) { try { let currentText: MessageV2.TextPart | undefined @@ -76,7 +78,15 @@ export namespace SessionProcessor { const part = reasoningMap[value.id] part.text += value.text if (value.providerMetadata) part.metadata = value.providerMetadata - if (part.text) await Session.updatePart({ part, delta: value.text }) + if (part.text) { + const active = Object.values(reasoningMap).reduce((sum, p) => sum + p.text.length, 0) + const estimate = Math.round(Math.max(0, (reasoningTotal + active) / 4)) + if (input.assistantMessage.reasoningEstimate !== estimate) { + input.assistantMessage.reasoningEstimate = estimate + await Session.updateMessage(input.assistantMessage) + } + await Session.updatePart({ part, delta: value.text }) + } } break @@ -90,6 +100,7 @@ export namespace SessionProcessor { end: Date.now(), } if (value.providerMetadata) part.metadata = value.providerMetadata + reasoningTotal += part.text.length await Session.updatePart(part) delete reasoningMap[value.id] } @@ -249,7 +260,6 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens - input.assistantMessage.outputEstimate = undefined await Session.updatePart({ id: Identifier.ascending("part"), reason: value.finishReason, @@ -300,7 +310,7 @@ export namespace SessionProcessor { currentText.text += value.text if (value.providerMetadata) currentText.metadata = value.providerMetadata if (currentText.text) { - const estimate = Token.estimate(currentText.text) + const estimate = Math.round(Math.max(0, (textTotal + currentText.text.length) / 4)) if (input.assistantMessage.outputEstimate !== estimate) { input.assistantMessage.outputEstimate = estimate await Session.updateMessage(input.assistantMessage) @@ -321,6 +331,7 @@ export namespace SessionProcessor { end: Date.now(), } if (value.providerMetadata) currentText.metadata = value.providerMetadata + textTotal += currentText.text.length await Session.updatePart(currentText) } currentText = undefined diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index b3c3c4671682..f51c91fb625d 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -456,6 +456,8 @@ export namespace SessionPrompt { created: Date.now(), }, sessionID, + outputEstimate: lastAssistant?.outputEstimate, + reasoningEstimate: lastAssistant?.reasoningEstimate, })) as MessageV2.Assistant, sessionID: sessionID, model: model.info, From 0f7d18ade907d7c9bdb619406f9e7fd4145e1304 Mon Sep 17 00:00:00 2001 From: Arsham Shirvani Date: Mon, 24 Nov 2025 21:19:27 +0000 Subject: [PATCH 4/5] fix: improve outbound token counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add sentEstimate/contextEstimate fields to message schema for tracking tokens sent to API - Extract tool result token calculation to shared helper - Replace magic number 4 with CHARS_PER_TOKEN semantic constant - Fix accumulation logic to always include user message tokens (not just as fallback) - Ensure tool result tokens are counted when sent back to API - Update TUI display to show accurate IN↓/OUT↑ token counts --- .../src/cli/cmd/tui/routes/session/index.tsx | 57 +++++--- packages/opencode/src/session/compaction.ts | 7 + packages/opencode/src/session/message-v2.ts | 4 + packages/opencode/src/session/processor.ts | 13 +- packages/opencode/src/session/prompt.ts | 130 +++++++++++------- packages/opencode/src/util/token.ts | 42 ++++++ packages/sdk/js/src/gen/types.gen.ts | 4 + 7 files changed, 183 insertions(+), 74 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 954c0b958aee..fbc8e6e05fef 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -1039,32 +1039,52 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish) }) + // Find the parent user message (reused by duration and token calculations) + const user = createMemo(() => messages().find((x) => x.role === "user" && x.id === props.message.parentID)) + const duration = createMemo(() => { if (!final()) return 0 if (!props.message.time.completed) return 0 - const user = messages().find((x) => x.role === "user" && x.id === props.message.parentID) - if (!user || !user.time) return 0 - return props.message.time.completed - user.time.created + const u = user() + if (!u || !u.time) return 0 + return props.message.time.completed - u.time.created }) - // Output tokens - const outputTokens = createMemo(() => props.message.tokens.output) - const outputEstimate = createMemo(() => props.message.outputEstimate) + // OUT tokens (sent TO API) - includes user text + tool results from previous assistant + const outEstimate = createMemo(() => props.message.sentEstimate) + + // IN tokens (from API TO computer) + const inTokens = createMemo(() => props.message.tokens.output) + const inEstimate = createMemo(() => props.message.outputEstimate) - // Reasoning tokens (must be defined BEFORE outputDisplay) + // Reasoning tokens (must be defined BEFORE inDisplay) const reasoningTokens = createMemo(() => props.message.tokens.reasoning) const reasoningEstimate = createMemo(() => props.message.reasoningEstimate) - const outputDisplay = createMemo(() => { - const estimate = outputEstimate() + const outDisplay = createMemo(() => { + const estimate = outEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + const tokens = props.message.tokens.input + if (tokens > 0) return tokens.toLocaleString() + return "0" + }) + + const inDisplay = createMemo(() => { + const estimate = inEstimate() if (estimate !== undefined) return "~" + estimate.toLocaleString() - const tokens = outputTokens() + const tokens = inTokens() if (tokens > 0) return tokens.toLocaleString() // Show ~0 during streaming when we have reasoning but no output yet if (reasoningEstimate() !== undefined || reasoningTokens() > 0) return "~0" return undefined }) + const tokensDisplay = createMemo(() => { + const inVal = inDisplay() + if (!inVal) return undefined + return `${inVal}↓/${outDisplay()}↑` + }) + const reasoningDisplay = createMemo(() => { const estimate = reasoningEstimate() if (estimate !== undefined) return "~" + estimate.toLocaleString() @@ -1072,9 +1092,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las if (tokens > 0) return tokens.toLocaleString() return undefined }) - const cumulativeTokens = createMemo( - () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write, - ) + + const contextEstimate = createMemo(() => props.message.contextEstimate) + + const cumulativeTokens = createMemo(() => { + const estimate = contextEstimate() + if (estimate !== undefined) return estimate + return props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write + }) const percentage = createMemo(() => { if (!props.contextLimit) return 0 @@ -1122,16 +1147,16 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ⬝{Locale.duration(duration())} - + {" "} - ⬝ {outputDisplay()} tok + ⬝ {tokensDisplay()} tok {" · "} {reasoningDisplay()} think 0 || outputEstimate() !== undefined || reasoningEstimate() !== undefined} + when={cumulativeTokens() > 0 || inEstimate() !== undefined || reasoningEstimate() !== undefined} > {" · "} {cumulativeTokens().toLocaleString()} context ({percentage()}%) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 1255d39f0b8e..bfd935537b23 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -98,6 +98,9 @@ export namespace SessionCompaction { }) { const model = await Provider.getModel(input.model.providerID, input.model.modelID) const system = [...SystemPrompt.compaction(model.providerID)] + const lastFinished = input.messages.find((m) => m.info.role === "assistant" && m.info.finish)?.info as + | MessageV2.Assistant + | undefined const msg = (await Session.updateMessage({ id: Identifier.ascending("message"), role: "assistant", @@ -121,6 +124,10 @@ export namespace SessionCompaction { time: { created: Date.now(), }, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: lastFinished?.sentEstimate, })) as MessageV2.Assistant const processor = SessionProcessor.create({ assistantMessage: msg, diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 15e33040e601..e9e6d5e72d80 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -301,6 +301,8 @@ export namespace MessageV2 { }), system: z.string().optional(), tools: z.record(z.string(), z.boolean()).optional(), + sentEstimate: z.number().optional(), + contextEstimate: z.number().optional(), }).meta({ ref: "UserMessage", }) @@ -362,6 +364,8 @@ export namespace MessageV2 { }), outputEstimate: z.number().optional(), reasoningEstimate: z.number().optional(), + contextEstimate: z.number().optional(), + sentEstimate: z.number().optional(), finish: z.string().optional(), }).meta({ ref: "AssistantMessage", diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 42c2229b5e38..570879df1b45 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,6 +11,7 @@ import { SessionSummary } from "./summary" import { Bus } from "@/bus" import { SessionRetry } from "./retry" import { SessionStatus } from "./status" +import { Token } from "@/util/token" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -40,9 +41,9 @@ export namespace SessionProcessor { }, async process(fn: () => StreamTextResult, never>) { log.info("process") - // Initialize from existing estimates (reverse the /4 calculation) to accumulate across multiple process() calls - let reasoningTotal = (input.assistantMessage.reasoningEstimate ?? 0) * 4 - let textTotal = (input.assistantMessage.outputEstimate ?? 0) * 4 + // Initialize from existing estimates (convert tokens to characters) to accumulate across multiple process() calls + let reasoningTotal = Token.toCharCount(input.assistantMessage.reasoningEstimate ?? 0) + let textTotal = Token.toCharCount(input.assistantMessage.outputEstimate ?? 0) while (true) { try { let currentText: MessageV2.TextPart | undefined @@ -80,7 +81,7 @@ export namespace SessionProcessor { if (value.providerMetadata) part.metadata = value.providerMetadata if (part.text) { const active = Object.values(reasoningMap).reduce((sum, p) => sum + p.text.length, 0) - const estimate = Math.round(Math.max(0, (reasoningTotal + active) / 4)) + const estimate = Token.toTokenEstimate(Math.max(0, reasoningTotal + active)) if (input.assistantMessage.reasoningEstimate !== estimate) { input.assistantMessage.reasoningEstimate = estimate await Session.updateMessage(input.assistantMessage) @@ -260,6 +261,8 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + input.assistantMessage.contextEstimate = + usage.tokens.input + usage.tokens.cache.read + usage.tokens.cache.write await Session.updatePart({ id: Identifier.ascending("part"), reason: value.finishReason, @@ -310,7 +313,7 @@ export namespace SessionProcessor { currentText.text += value.text if (value.providerMetadata) currentText.metadata = value.providerMetadata if (currentText.text) { - const estimate = Math.round(Math.max(0, (textTotal + currentText.text.length) / 4)) + const estimate = Token.toTokenEstimate(Math.max(0, textTotal + currentText.text.length)) if (input.assistantMessage.outputEstimate !== estimate) { input.assistantMessage.outputEstimate = estimate await Session.updateMessage(input.assistantMessage) diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index f51c91fb625d..ef1e07a4c6eb 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -48,6 +48,7 @@ import { fn } from "@/util/fn" import { SessionProcessor } from "./processor" import { TaskTool } from "@/tool/task" import { SessionStatus } from "./status" +import { Token } from "@/util/token" // @ts-ignore globalThis.AI_SDK_LOG_WARNINGS = false @@ -313,71 +314,50 @@ export namespace SessionPrompt { time: { created: Date.now(), }, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0), })) as MessageV2.Assistant - let part = (await Session.updatePart({ + + const part: MessageV2.ToolPart = { + type: "tool", id: Identifier.ascending("part"), messageID: assistantMessage.id, - sessionID: assistantMessage.sessionID, - type: "tool", + sessionID, + tool: "task", callID: ulid(), - tool: TaskTool.id, state: { status: "running", - input: { - prompt: task.prompt, - description: task.description, - subagent_type: task.agent, - }, time: { start: Date.now(), }, - }, - })) as MessageV2.ToolPart - const result = await taskTool - .execute( - { + input: { prompt: task.prompt, description: task.description, subagent_type: task.agent, }, - { - agent: task.agent, - messageID: assistantMessage.id, - sessionID: sessionID, - abort, - async metadata(input) { - await Session.updatePart({ - ...part, - type: "tool", - state: { - ...part.state, - ...input, - }, - } satisfies MessageV2.ToolPart) - }, - }, - ) - .catch(() => {}) - assistantMessage.finish = "tool-calls" - assistantMessage.time.completed = Date.now() - await Session.updateMessage(assistantMessage) - if (result && part.state.status === "running") { - await Session.updatePart({ - ...part, - state: { - status: "completed", - input: part.state.input, - title: result.title, - metadata: result.metadata, - output: result.output, - attachments: result.attachments, - time: { - ...part.state.time, - end: Date.now(), - }, - }, - } satisfies MessageV2.ToolPart) + }, } + await Session.updatePart(part) + + const result = await taskTool.execute( + { + prompt: task.prompt, + description: task.description, + subagent_type: task.agent, + }, + { + sessionID, + abort, + agent: lastUser.agent, + messageID: assistantMessage.id, + callID: part.callID, + extra: { providerID: model.providerID, modelID: model.modelID }, + metadata: async () => {}, + }, + ) + if (!result) { await Session.updatePart({ ...part, @@ -433,6 +413,17 @@ export namespace SessionPrompt { messages: msgs, agent, }) + + // Calculate tokens for tool results from previous assistant that will be sent in this API call + // Reuse parts from already-loaded messages to avoid redundant query + let toolResultTokens = 0 + if (lastAssistant && step > 1) { + const assistantMessage = msgs.find((m) => m.info.id === lastAssistant.id) + if (assistantMessage) { + toolResultTokens = Token.calculateToolResultTokens(assistantMessage.parts) + } + } + const processor = SessionProcessor.create({ assistantMessage: (await Session.updateMessage({ id: Identifier.ascending("message"), @@ -456,8 +447,10 @@ export namespace SessionPrompt { created: Date.now(), }, sessionID, - outputEstimate: lastAssistant?.outputEstimate, - reasoningEstimate: lastAssistant?.reasoningEstimate, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0) + toolResultTokens, })) as MessageV2.Assistant, sessionID: sessionID, model: model.info, @@ -1067,6 +1060,25 @@ export namespace SessionPrompt { }, ) + const userText = parts + .filter((p) => p.type === "text" && !(p as MessageV2.TextPart).synthetic) + .map((p) => (p as MessageV2.TextPart).text) + .join("") + + // Calculate user message tokens + let sentTokens = Token.estimate(userText) + + // Add tokens from tool results that will be sent with this message + // Tool results from the previous assistant message are included in the API request + const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + const lastAssistant = msgs.findLast((m) => m.info.role === "assistant") + if (lastAssistant) { + sentTokens += Token.calculateToolResultTokens(lastAssistant.parts) + } + + info.sentEstimate = sentTokens + info.contextEstimate = sentTokens + await Session.updateMessage(info) for (const part of parts) { await Session.updatePart(part) @@ -1136,6 +1148,8 @@ export namespace SessionPrompt { providerID: model.providerID, modelID: model.modelID, }, + sentEstimate: 0, + contextEstimate: 0, } await Session.updateMessage(userMsg) const userPart: MessageV2.Part = { @@ -1148,6 +1162,12 @@ export namespace SessionPrompt { } await Session.updatePart(userPart) + const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + const lastFinished = msgs.find((m) => m.info.role === "assistant" && m.info.finish)?.info as + | MessageV2.Assistant + | undefined + const lastAssistant = msgs.find((m) => m.info.role === "assistant")?.info as MessageV2.Assistant | undefined + const msg: MessageV2.Assistant = { id: Identifier.ascending("message"), sessionID: input.sessionID, @@ -1170,6 +1190,10 @@ export namespace SessionPrompt { }, modelID: model.modelID, providerID: model.providerID, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (userMsg.sentEstimate || 0), } await Session.updateMessage(msg) const part: MessageV2.Part = { diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts index cee5adc37713..58b33855c975 100644 --- a/packages/opencode/src/util/token.ts +++ b/packages/opencode/src/util/token.ts @@ -4,4 +4,46 @@ export namespace Token { export function estimate(input: string) { return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN)) } + + /** + * Convert token estimate to character count + * Used when accumulating text across stream deltas + */ + export function toCharCount(tokenEstimate: number): number { + return tokenEstimate * CHARS_PER_TOKEN + } + + /** + * Convert character count to token estimate + * Used when converting accumulated text back to tokens + */ + export function toTokenEstimate(charCount: number): number { + return Math.round(charCount / CHARS_PER_TOKEN) + } + + /** + * Calculate tokens for tool results that will be sent to the API + * Includes tool input JSON, output (or compaction message), and errors + */ + export function calculateToolResultTokens(parts: Array<{ type: string; state?: any }>) { + let tokens = 0 + for (const part of parts) { + if (part.type === "tool") { + // Tool input is sent in both completed and error states + tokens += estimate(JSON.stringify(part.state.input)) + + if (part.state.status === "completed") { + // Tool result output - check if compacted + const output = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output + tokens += estimate(output) + } + + if (part.state.status === "error") { + // Tool error text is sent back to the API + tokens += estimate(part.state.error) + } + } + } + return tokens + } } diff --git a/packages/sdk/js/src/gen/types.gen.ts b/packages/sdk/js/src/gen/types.gen.ts index e2e611db13a8..c6c86adb9933 100644 --- a/packages/sdk/js/src/gen/types.gen.ts +++ b/packages/sdk/js/src/gen/types.gen.ts @@ -130,6 +130,10 @@ export type AssistantMessage = { write: number } } + outputEstimate?: number + reasoningEstimate?: number + contextEstimate?: number + sentEstimate?: number finish?: string } From d7aefec3a4f6ffedf3702de587cb55052b943200 Mon Sep 17 00:00:00 2001 From: Arsham Shirvani Date: Tue, 25 Nov 2025 16:22:32 +0000 Subject: [PATCH 5/5] Fix synthetic and noReply message token counting Fix token counting logic to properly account for synthetic and noReply messages in session totals: - Account for synthetic content (e.g. skill documentation) in token counts by checking ignored flag instead of synthetic flag - Fix sentEstimate formula to prevent double-counting tool result tokens - Maintain cumulative session totals for accurate usage tracking - noReply messages now included in session token calculations This ensures users see accurate cumulative token usage per session without exponential growth from double-counting. --- packages/opencode/src/session/prompt.ts | 4 ++-- packages/opencode/src/util/token.ts | 16 ++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index ef1e07a4c6eb..9b36b3d2632e 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -450,7 +450,7 @@ export namespace SessionPrompt { outputEstimate: lastFinished?.outputEstimate, reasoningEstimate: lastFinished?.reasoningEstimate, contextEstimate: lastFinished?.contextEstimate, - sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0) + toolResultTokens, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0), })) as MessageV2.Assistant, sessionID: sessionID, model: model.info, @@ -1061,7 +1061,7 @@ export namespace SessionPrompt { ) const userText = parts - .filter((p) => p.type === "text" && !(p as MessageV2.TextPart).synthetic) + .filter((p) => p.type === "text" && !p.ignored) .map((p) => (p as MessageV2.TextPart).text) .join("") diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts index 58b33855c975..fc47a98ae0a5 100644 --- a/packages/opencode/src/util/token.ts +++ b/packages/opencode/src/util/token.ts @@ -29,17 +29,21 @@ export namespace Token { let tokens = 0 for (const part of parts) { if (part.type === "tool") { - // Tool input is sent in both completed and error states - tokens += estimate(JSON.stringify(part.state.input)) + // Add null check for part.state + if (!part.state) continue + + // Safe access to input + if (part.state.input) { + tokens += estimate(JSON.stringify(part.state.input)) + } if (part.state.status === "completed") { - // Tool result output - check if compacted - const output = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output + // Use optional chaining for compacted check + const output = part.state.time?.compacted ? "[Old tool result content cleared]" : (part.state.output ?? "") tokens += estimate(output) } - if (part.state.status === "error") { - // Tool error text is sent back to the API + if (part.state.status === "error" && part.state.error) { tokens += estimate(part.state.error) } }