From 8c917d832f12b4bcb2742c21d34b398c96f10c14 Mon Sep 17 00:00:00 2001
From: Arsham Shirvani <arshamshirvani@gmail.com>
Date: Sat, 22 Nov 2025 19:48:14 +0000
Subject: [PATCH 1/5] feat: add token count display to session view

Enable token visibility to help users understand API costs and
context window utilization. Display estimated tokens for user
messages and actual output tokens plus cumulative context
percentage for assistant messages. Include context panel toggle.
---
 .../src/cli/cmd/tui/routes/session/index.tsx  | 58 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 3 deletions(-)
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 0169c68e617c..f85aa10ec6e8 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -28,6 +28,7 @@ import { Prompt, type PromptRef } from "@tui/component/prompt"
 import type { AssistantMessage, Part, ToolPart, UserMessage, TextPart, ReasoningPart } from "@opencode-ai/sdk"
 import { useLocal } from "@tui/context/local"
 import { Locale } from "@/util/locale"
+import { Token } from "@/util/token"
 import type { Tool } from "@/tool/tool"
 import type { ReadTool } from "@/tool/read"
 import type { WriteTool } from "@/tool/write"
@@ -80,6 +81,7 @@ const context = createContext<{
   conceal: () => boolean
   showThinking: () => boolean
   showTimestamps: () => boolean
+  showTokens: () => boolean
 }>()
 
 function use() {
@@ -106,11 +108,20 @@ export function Session() {
     return messages().findLast((x) => x.role === "assistant")
   })
 
+  const local = useLocal()
+
+  const contextLimit = createMemo(() => {
+    const c = local.model.current()
+    const provider = sync.data.provider.find((p) => p.id === c.providerID)
+    return provider?.models[c.modelID]?.limit.context ?? 200000
+  })
+
   const dimensions = useTerminalDimensions()
   const [sidebar, setSidebar] = createSignal<"show" | "hide" | "auto">(kv.get("sidebar", "auto"))
   const [conceal, setConceal] = createSignal(true)
   const [showThinking, setShowThinking] = createSignal(true)
   const [showTimestamps, setShowTimestamps] = createSignal(kv.get("timestamps", "hide") === "show")
+  const [showTokens, setShowTokens] = createSignal(kv.get("tokens", "hide") === "show")
 
   const wide = createMemo(() => dimensions().width > 120)
   const sidebarVisible = createMemo(() => sidebar() === "show" || (sidebar() === "auto" && wide()))
@@ -204,8 +215,6 @@ export function Session() {
     }, 50)
   }
 
-  const local = useLocal()
-
   function moveChild(direction: number) {
     const parentID = session()?.parentID ?? session()?.id
     let children = sync.data.session
@@ -428,6 +437,19 @@ export function Session() {
         dialog.clear()
       },
     },
+    {
+      title: "Toggle tokens",
+      value: "session.toggle.tokens",
+      category: "Session",
+      onSelect: (dialog) => {
+        setShowTokens((prev) => {
+          const next = !prev
+          kv.set("tokens", next ? "show" : "hide")
+          return next
+        })
+        dialog.clear()
+      },
+    },
     {
       title: "Page up",
       value: "session.page.up",
@@ -729,6 +751,7 @@ export function Session() {
         conceal,
         showThinking,
         showTimestamps,
+        showTokens,
       }}
     >
       <box flexDirection="row" paddingBottom={1} paddingTop={1} paddingLeft={2} paddingRight={2} gap={2}>
@@ -864,6 +887,7 @@ export function Session() {
                         last={lastAssistant()?.id === message.id}
                         message={message as AssistantMessage}
                         parts={sync.data.part[message.id] ?? []}
+                        contextLimit={contextLimit()}
                       />
                     </Match>
                   </Switch>
@@ -917,6 +941,13 @@ function UserMessage(props: {
   const queued = createMemo(() => props.pending && props.message.id > props.pending)
   const color = createMemo(() => (queued() ? theme.accent : theme.secondary))
 
+  const individualTokens = createMemo(() => {
+    return props.parts.reduce((sum, part) => {
+      if (part.type === "text") return sum + Token.estimate(part.text)
+      return sum
+    }, 0)
+  })
+
   const compaction = createMemo(() => props.parts.find((x) => x.type === "compaction"))
 
   return (
@@ -977,6 +1008,9 @@ function UserMessage(props: {
               >
                 <span style={{ bg: theme.accent, fg: theme.backgroundPanel, bold: true }}> QUEUED </span>
               </Show>
+              <Show when={ctx.showTokens() && !queued() && individualTokens() > 0}>
+                <span style={{ fg: theme.textMuted }}> ⬝~{individualTokens().toLocaleString()} tok</span>
+              </Show>
             </text>
           </box>
         </box>
@@ -994,7 +1028,8 @@ function UserMessage(props: {
   )
 }
 
-function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean }) {
+function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean; contextLimit: number }) {
+  const ctx = use()
   const local = useLocal()
   const { theme } = useTheme()
   const sync = useSync()
@@ -1012,6 +1047,16 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
     return props.message.time.completed - user.time.created
   })
 
+  const individualTokens = createMemo(() => props.message.tokens.output)
+  const cumulativeTokens = createMemo(
+    () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write,
+  )
+
+  const percentage = createMemo(() => {
+    if (!props.contextLimit) return 0
+    return Math.round((cumulativeTokens() / props.contextLimit) * 100)
+  })
+
   return (
     <>
       <For each={props.parts}>
@@ -1053,6 +1098,13 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
               <Show when={duration()}>
                 <span style={{ fg: theme.textMuted }}> ⬝{Locale.duration(duration())}</span>
               </Show>
+              <Show when={ctx.showTokens() && individualTokens() > 0}>
+                <span style={{ fg: theme.textMuted }}>
+                  {" "}
+                  ⬝{individualTokens().toLocaleString()} tok · {cumulativeTokens().toLocaleString()} context (
+                  {percentage()}%)
+                </span>
+              </Show>
             </text>
           </box>
         </Match>

From 9fabaf53a2f302af4b0a15a42146b68305fa7a46 Mon Sep 17 00:00:00 2001
From: Arsham Shirvani <arshamshirvani@gmail.com>
Date: Mon, 24 Nov 2025 13:59:39 +0000
Subject: [PATCH 2/5] feat: add streaming token estimation display

Display estimated output tokens (~X tok) during assistant
message streaming using character-based estimation
(4 chars = 1 token). Replace estimate with actual count
on completion, with change detection to minimise
message broadcasts.
---
 .../src/cli/cmd/tui/routes/session/index.tsx      | 15 ++++++++++++---
 packages/opencode/src/session/message-v2.ts       |  1 +
 packages/opencode/src/session/processor.ts        | 10 +++++++++-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index f85aa10ec6e8..baaa08bfb05d 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -1048,6 +1048,12 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
   })
 
   const individualTokens = createMemo(() => props.message.tokens.output)
+  const outputEstimate = createMemo(() => props.message.outputEstimate)
+  const outputDisplay = createMemo(() => {
+    const estimate = outputEstimate()
+    if (estimate !== undefined) return "~" + estimate.toLocaleString()
+    return individualTokens().toLocaleString()
+  })
   const cumulativeTokens = createMemo(
     () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write,
   )
@@ -1098,11 +1104,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
               <Show when={duration()}>
                 <span style={{ fg: theme.textMuted }}> ⬝{Locale.duration(duration())}</span>
               </Show>
-              <Show when={ctx.showTokens() && individualTokens() > 0}>
+              <Show when={ctx.showTokens() && (individualTokens() > 0 || outputEstimate() !== undefined)}>
                 <span style={{ fg: theme.textMuted }}>
                   {" "}
-                  ⬝{individualTokens().toLocaleString()} tok · {cumulativeTokens().toLocaleString()} context (
-                  {percentage()}%)
+                  ⬝{outputDisplay()} tok
+                  <Show when={cumulativeTokens() > 0}>
+                    {" "}
+                    · {cumulativeTokens().toLocaleString()} context ({percentage()}%)
+                  </Show>
                 </span>
               </Show>
             </text>
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index c451ae2b38d5..a7fe2f8438a5 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -360,6 +360,7 @@ export namespace MessageV2 {
         write: z.number(),
       }),
     }),
+    outputEstimate: z.number().optional(),
     finish: z.string().optional(),
   }).meta({
     ref: "AssistantMessage",
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 6d1125c66b32..7c5349ca81f0 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -11,6 +11,7 @@ import { SessionSummary } from "./summary"
 import { Bus } from "@/bus"
 import { SessionRetry } from "./retry"
 import { SessionStatus } from "./status"
+import { Token } from "@/util/token"
 
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
@@ -248,6 +249,7 @@ export namespace SessionProcessor {
                   input.assistantMessage.finish = value.finishReason
                   input.assistantMessage.cost += usage.cost
                   input.assistantMessage.tokens = usage.tokens
+                  input.assistantMessage.outputEstimate = undefined
                   await Session.updatePart({
                     id: Identifier.ascending("part"),
                     reason: value.finishReason,
@@ -297,11 +299,17 @@ export namespace SessionProcessor {
                   if (currentText) {
                     currentText.text += value.text
                     if (value.providerMetadata) currentText.metadata = value.providerMetadata
-                    if (currentText.text)
+                    if (currentText.text) {
+                      const estimate = Token.estimate(currentText.text)
+                      if (input.assistantMessage.outputEstimate !== estimate) {
+                        input.assistantMessage.outputEstimate = estimate
+                        await Session.updateMessage(input.assistantMessage)
+                      }
                       await Session.updatePart({
                         part: currentText,
                         delta: value.text,
                       })
+                    }
                   }
                   break
 

From 09aaec4f3344595228c4f0316995b94d6a0999fa Mon Sep 17 00:00:00 2001
From: Arsham Shirvani <arshamshirvani@gmail.com>
Date: Mon, 24 Nov 2025 17:03:41 +0000
Subject: [PATCH 3/5] feat: stream token estimates with accumulation

Add real-time token estimation during streaming with separate
tracking for output (~tok) and reasoning (~think) tokens.
Accumulates estimates across multiple text/reasoning blocks
within a step and across multiple steps (tool calls). Includes
change detection to minimise message broadcasts and clean
transition from estimates to actual counts on completion.
---
 .../src/cli/cmd/tui/routes/session/index.tsx  | 38 +++++++++++++++----
 packages/opencode/src/session/message-v2.ts   |  1 +
 packages/opencode/src/session/processor.ts    | 19 ++++++++--
 packages/opencode/src/session/prompt.ts       |  2 +
 4 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index baaa08bfb05d..954c0b958aee 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -1047,12 +1047,30 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
     return props.message.time.completed - user.time.created
   })
 
-  const individualTokens = createMemo(() => props.message.tokens.output)
+  // Output tokens
+  const outputTokens = createMemo(() => props.message.tokens.output)
   const outputEstimate = createMemo(() => props.message.outputEstimate)
+
+  // Reasoning tokens (must be defined BEFORE outputDisplay)
+  const reasoningTokens = createMemo(() => props.message.tokens.reasoning)
+  const reasoningEstimate = createMemo(() => props.message.reasoningEstimate)
+
   const outputDisplay = createMemo(() => {
     const estimate = outputEstimate()
     if (estimate !== undefined) return "~" + estimate.toLocaleString()
-    return individualTokens().toLocaleString()
+    const tokens = outputTokens()
+    if (tokens > 0) return tokens.toLocaleString()
+    // Show ~0 during streaming when we have reasoning but no output yet
+    if (reasoningEstimate() !== undefined || reasoningTokens() > 0) return "~0"
+    return undefined
+  })
+
+  const reasoningDisplay = createMemo(() => {
+    const estimate = reasoningEstimate()
+    if (estimate !== undefined) return "~" + estimate.toLocaleString()
+    const tokens = reasoningTokens()
+    if (tokens > 0) return tokens.toLocaleString()
+    return undefined
   })
   const cumulativeTokens = createMemo(
     () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write,
@@ -1104,13 +1122,19 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
               <Show when={duration()}>
                 <span style={{ fg: theme.textMuted }}> ⬝{Locale.duration(duration())}</span>
               </Show>
-              <Show when={ctx.showTokens() && (individualTokens() > 0 || outputEstimate() !== undefined)}>
+              <Show when={ctx.showTokens() && (outputDisplay() || reasoningDisplay())}>
                 <span style={{ fg: theme.textMuted }}>
                   {" "}
-                  ⬝{outputDisplay()} tok
-                  <Show when={cumulativeTokens() > 0}>
-                    {" "}
-                    · {cumulativeTokens().toLocaleString()} context ({percentage()}%)
+                  ⬝ {outputDisplay()} tok
+                  <Show when={reasoningDisplay()}>
+                    {" · "}
+                    {reasoningDisplay()} think
+                  </Show>
+                  <Show
+                    when={cumulativeTokens() > 0 || outputEstimate() !== undefined || reasoningEstimate() !== undefined}
+                  >
+                    {" · "}
+                    {cumulativeTokens().toLocaleString()} context ({percentage()}%)
                   </Show>
                 </span>
               </Show>
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index a7fe2f8438a5..15e33040e601 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -361,6 +361,7 @@ export namespace MessageV2 {
       }),
     }),
     outputEstimate: z.number().optional(),
+    reasoningEstimate: z.number().optional(),
     finish: z.string().optional(),
   }).meta({
     ref: "AssistantMessage",
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 7c5349ca81f0..42c2229b5e38 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -11,7 +11,6 @@ import { SessionSummary } from "./summary"
 import { Bus } from "@/bus"
 import { SessionRetry } from "./retry"
 import { SessionStatus } from "./status"
-import { Token } from "@/util/token"
 
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
@@ -41,6 +40,9 @@ export namespace SessionProcessor {
       },
       async process(fn: () => StreamTextResult<Record<string, AITool>, never>) {
         log.info("process")
+        // Initialize from existing estimates (reverse the /4 calculation) to accumulate across multiple process() calls
+        let reasoningTotal = (input.assistantMessage.reasoningEstimate ?? 0) * 4
+        let textTotal = (input.assistantMessage.outputEstimate ?? 0) * 4
         while (true) {
           try {
             let currentText: MessageV2.TextPart | undefined
@@ -76,7 +78,15 @@ export namespace SessionProcessor {
                     const part = reasoningMap[value.id]
                     part.text += value.text
                     if (value.providerMetadata) part.metadata = value.providerMetadata
-                    if (part.text) await Session.updatePart({ part, delta: value.text })
+                    if (part.text) {
+                      const active = Object.values(reasoningMap).reduce((sum, p) => sum + p.text.length, 0)
+                      const estimate = Math.round(Math.max(0, (reasoningTotal + active) / 4))
+                      if (input.assistantMessage.reasoningEstimate !== estimate) {
+                        input.assistantMessage.reasoningEstimate = estimate
+                        await Session.updateMessage(input.assistantMessage)
+                      }
+                      await Session.updatePart({ part, delta: value.text })
+                    }
                   }
                   break
 
@@ -90,6 +100,7 @@ export namespace SessionProcessor {
                       end: Date.now(),
                     }
                     if (value.providerMetadata) part.metadata = value.providerMetadata
+                    reasoningTotal += part.text.length
                     await Session.updatePart(part)
                     delete reasoningMap[value.id]
                   }
@@ -249,7 +260,6 @@ export namespace SessionProcessor {
                   input.assistantMessage.finish = value.finishReason
                   input.assistantMessage.cost += usage.cost
                   input.assistantMessage.tokens = usage.tokens
-                  input.assistantMessage.outputEstimate = undefined
                   await Session.updatePart({
                     id: Identifier.ascending("part"),
                     reason: value.finishReason,
@@ -300,7 +310,7 @@ export namespace SessionProcessor {
                     currentText.text += value.text
                     if (value.providerMetadata) currentText.metadata = value.providerMetadata
                     if (currentText.text) {
-                      const estimate = Token.estimate(currentText.text)
+                      const estimate = Math.round(Math.max(0, (textTotal + currentText.text.length) / 4))
                       if (input.assistantMessage.outputEstimate !== estimate) {
                         input.assistantMessage.outputEstimate = estimate
                         await Session.updateMessage(input.assistantMessage)
@@ -321,6 +331,7 @@ export namespace SessionProcessor {
                       end: Date.now(),
                     }
                     if (value.providerMetadata) currentText.metadata = value.providerMetadata
+                    textTotal += currentText.text.length
                     await Session.updatePart(currentText)
                   }
                   currentText = undefined
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index b3c3c4671682..f51c91fb625d 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -456,6 +456,8 @@ export namespace SessionPrompt {
             created: Date.now(),
           },
           sessionID,
+          outputEstimate: lastAssistant?.outputEstimate,
+          reasoningEstimate: lastAssistant?.reasoningEstimate,
         })) as MessageV2.Assistant,
         sessionID: sessionID,
         model: model.info,

From 0f7d18ade907d7c9bdb619406f9e7fd4145e1304 Mon Sep 17 00:00:00 2001
From: Arsham Shirvani <arshamshirvani@gmail.com>
Date: Mon, 24 Nov 2025 21:19:27 +0000
Subject: [PATCH 4/5] fix: improve outbound token counting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add sentEstimate/contextEstimate fields to message schema for
  tracking tokens sent to API
- Extract tool result token calculation to shared helper
- Replace magic number 4 with CHARS_PER_TOKEN semantic constant
- Fix accumulation logic to always include user message tokens
  (not just as fallback)
- Ensure tool result tokens are counted when sent back to API
- Update TUI display to show accurate IN↓/OUT↑ token counts
---
 .../src/cli/cmd/tui/routes/session/index.tsx  |  57 +++++---
 packages/opencode/src/session/compaction.ts   |   7 +
 packages/opencode/src/session/message-v2.ts   |   4 +
 packages/opencode/src/session/processor.ts    |  13 +-
 packages/opencode/src/session/prompt.ts       | 130 +++++++++++-------
 packages/opencode/src/util/token.ts           |  42 ++++++
 packages/sdk/js/src/gen/types.gen.ts          |   4 +
 7 files changed, 183 insertions(+), 74 deletions(-)

diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 954c0b958aee..fbc8e6e05fef 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -1039,32 +1039,52 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
     return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish)
   })
 
+  // Find the parent user message (reused by duration and token calculations)
+  const user = createMemo(() => messages().find((x) => x.role === "user" && x.id === props.message.parentID))
+
   const duration = createMemo(() => {
     if (!final()) return 0
     if (!props.message.time.completed) return 0
-    const user = messages().find((x) => x.role === "user" && x.id === props.message.parentID)
-    if (!user || !user.time) return 0
-    return props.message.time.completed - user.time.created
+    const u = user()
+    if (!u || !u.time) return 0
+    return props.message.time.completed - u.time.created
   })
 
-  // Output tokens
-  const outputTokens = createMemo(() => props.message.tokens.output)
-  const outputEstimate = createMemo(() => props.message.outputEstimate)
+  // OUT tokens (sent TO API) - includes user text + tool results from previous assistant
+  const outEstimate = createMemo(() => props.message.sentEstimate)
+
+  // IN tokens (from API TO computer)
+  const inTokens = createMemo(() => props.message.tokens.output)
+  const inEstimate = createMemo(() => props.message.outputEstimate)
 
-  // Reasoning tokens (must be defined BEFORE outputDisplay)
+  // Reasoning tokens (must be defined BEFORE inDisplay)
   const reasoningTokens = createMemo(() => props.message.tokens.reasoning)
   const reasoningEstimate = createMemo(() => props.message.reasoningEstimate)
 
-  const outputDisplay = createMemo(() => {
-    const estimate = outputEstimate()
+  const outDisplay = createMemo(() => {
+    const estimate = outEstimate()
+    if (estimate !== undefined) return "~" + estimate.toLocaleString()
+    const tokens = props.message.tokens.input
+    if (tokens > 0) return tokens.toLocaleString()
+    return "0"
+  })
+
+  const inDisplay = createMemo(() => {
+    const estimate = inEstimate()
     if (estimate !== undefined) return "~" + estimate.toLocaleString()
-    const tokens = outputTokens()
+    const tokens = inTokens()
     if (tokens > 0) return tokens.toLocaleString()
     // Show ~0 during streaming when we have reasoning but no output yet
     if (reasoningEstimate() !== undefined || reasoningTokens() > 0) return "~0"
     return undefined
   })
 
+  const tokensDisplay = createMemo(() => {
+    const inVal = inDisplay()
+    if (!inVal) return undefined
+    return `${inVal}↓/${outDisplay()}↑`
+  })
+
   const reasoningDisplay = createMemo(() => {
     const estimate = reasoningEstimate()
     if (estimate !== undefined) return "~" + estimate.toLocaleString()
@@ -1072,9 +1092,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
     if (tokens > 0) return tokens.toLocaleString()
     return undefined
   })
-  const cumulativeTokens = createMemo(
-    () => props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write,
-  )
+
+  const contextEstimate = createMemo(() => props.message.contextEstimate)
+
+  const cumulativeTokens = createMemo(() => {
+    const estimate = contextEstimate()
+    if (estimate !== undefined) return estimate
+    return props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write
+  })
 
   const percentage = createMemo(() => {
     if (!props.contextLimit) return 0
@@ -1122,16 +1147,16 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
               <Show when={duration()}>
                 <span style={{ fg: theme.textMuted }}> ⬝{Locale.duration(duration())}</span>
               </Show>
-              <Show when={ctx.showTokens() && (outputDisplay() || reasoningDisplay())}>
+              <Show when={ctx.showTokens() && (tokensDisplay() || reasoningDisplay())}>
                 <span style={{ fg: theme.textMuted }}>
                   {" "}
-                  ⬝ {outputDisplay()} tok
+                  ⬝ {tokensDisplay()} tok
                   <Show when={reasoningDisplay()}>
                     {" · "}
                     {reasoningDisplay()} think
                   </Show>
                   <Show
-                    when={cumulativeTokens() > 0 || outputEstimate() !== undefined || reasoningEstimate() !== undefined}
+                    when={cumulativeTokens() > 0 || inEstimate() !== undefined || reasoningEstimate() !== undefined}
                   >
                     {" · "}
                     {cumulativeTokens().toLocaleString()} context ({percentage()}%)
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 1255d39f0b8e..bfd935537b23 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -98,6 +98,9 @@ export namespace SessionCompaction {
   }) {
     const model = await Provider.getModel(input.model.providerID, input.model.modelID)
     const system = [...SystemPrompt.compaction(model.providerID)]
+    const lastFinished = input.messages.find((m) => m.info.role === "assistant" && m.info.finish)?.info as
+      | MessageV2.Assistant
+      | undefined
     const msg = (await Session.updateMessage({
       id: Identifier.ascending("message"),
       role: "assistant",
@@ -121,6 +124,10 @@ export namespace SessionCompaction {
       time: {
         created: Date.now(),
       },
+      outputEstimate: lastFinished?.outputEstimate,
+      reasoningEstimate: lastFinished?.reasoningEstimate,
+      contextEstimate: lastFinished?.contextEstimate,
+      sentEstimate: lastFinished?.sentEstimate,
     })) as MessageV2.Assistant
     const processor = SessionProcessor.create({
       assistantMessage: msg,
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index 15e33040e601..e9e6d5e72d80 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -301,6 +301,8 @@ export namespace MessageV2 {
     }),
     system: z.string().optional(),
     tools: z.record(z.string(), z.boolean()).optional(),
+    sentEstimate: z.number().optional(),
+    contextEstimate: z.number().optional(),
   }).meta({
     ref: "UserMessage",
   })
@@ -362,6 +364,8 @@ export namespace MessageV2 {
     }),
     outputEstimate: z.number().optional(),
     reasoningEstimate: z.number().optional(),
+    contextEstimate: z.number().optional(),
+    sentEstimate: z.number().optional(),
     finish: z.string().optional(),
   }).meta({
     ref: "AssistantMessage",
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 42c2229b5e38..570879df1b45 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -11,6 +11,7 @@ import { SessionSummary } from "./summary"
 import { Bus } from "@/bus"
 import { SessionRetry } from "./retry"
 import { SessionStatus } from "./status"
+import { Token } from "@/util/token"
 
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
@@ -40,9 +41,9 @@ export namespace SessionProcessor {
       },
       async process(fn: () => StreamTextResult<Record<string, AITool>, never>) {
         log.info("process")
-        // Initialize from existing estimates (reverse the /4 calculation) to accumulate across multiple process() calls
-        let reasoningTotal = (input.assistantMessage.reasoningEstimate ?? 0) * 4
-        let textTotal = (input.assistantMessage.outputEstimate ?? 0) * 4
+        // Initialize from existing estimates (convert tokens to characters) to accumulate across multiple process() calls
+        let reasoningTotal = Token.toCharCount(input.assistantMessage.reasoningEstimate ?? 0)
+        let textTotal = Token.toCharCount(input.assistantMessage.outputEstimate ?? 0)
         while (true) {
           try {
             let currentText: MessageV2.TextPart | undefined
@@ -80,7 +81,7 @@ export namespace SessionProcessor {
                     if (value.providerMetadata) part.metadata = value.providerMetadata
                     if (part.text) {
                       const active = Object.values(reasoningMap).reduce((sum, p) => sum + p.text.length, 0)
-                      const estimate = Math.round(Math.max(0, (reasoningTotal + active) / 4))
+                      const estimate = Token.toTokenEstimate(Math.max(0, reasoningTotal + active))
                       if (input.assistantMessage.reasoningEstimate !== estimate) {
                         input.assistantMessage.reasoningEstimate = estimate
                         await Session.updateMessage(input.assistantMessage)
@@ -260,6 +261,8 @@ export namespace SessionProcessor {
                   input.assistantMessage.finish = value.finishReason
                   input.assistantMessage.cost += usage.cost
                   input.assistantMessage.tokens = usage.tokens
+                  input.assistantMessage.contextEstimate =
+                    usage.tokens.input + usage.tokens.cache.read + usage.tokens.cache.write
                   await Session.updatePart({
                     id: Identifier.ascending("part"),
                     reason: value.finishReason,
@@ -310,7 +313,7 @@ export namespace SessionProcessor {
                     currentText.text += value.text
                     if (value.providerMetadata) currentText.metadata = value.providerMetadata
                     if (currentText.text) {
-                      const estimate = Math.round(Math.max(0, (textTotal + currentText.text.length) / 4))
+                      const estimate = Token.toTokenEstimate(Math.max(0, textTotal + currentText.text.length))
                       if (input.assistantMessage.outputEstimate !== estimate) {
                         input.assistantMessage.outputEstimate = estimate
                         await Session.updateMessage(input.assistantMessage)
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index f51c91fb625d..ef1e07a4c6eb 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -48,6 +48,7 @@ import { fn } from "@/util/fn"
 import { SessionProcessor } from "./processor"
 import { TaskTool } from "@/tool/task"
 import { SessionStatus } from "./status"
+import { Token } from "@/util/token"
 
 // @ts-ignore
 globalThis.AI_SDK_LOG_WARNINGS = false
@@ -313,71 +314,50 @@ export namespace SessionPrompt {
           time: {
             created: Date.now(),
           },
+          outputEstimate: lastFinished?.outputEstimate,
+          reasoningEstimate: lastFinished?.reasoningEstimate,
+          contextEstimate: lastFinished?.contextEstimate,
+          sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0),
         })) as MessageV2.Assistant
-        let part = (await Session.updatePart({
+
+        const part: MessageV2.ToolPart = {
+          type: "tool",
           id: Identifier.ascending("part"),
           messageID: assistantMessage.id,
-          sessionID: assistantMessage.sessionID,
-          type: "tool",
+          sessionID,
+          tool: "task",
           callID: ulid(),
-          tool: TaskTool.id,
           state: {
             status: "running",
-            input: {
-              prompt: task.prompt,
-              description: task.description,
-              subagent_type: task.agent,
-            },
             time: {
               start: Date.now(),
             },
-          },
-        })) as MessageV2.ToolPart
-        const result = await taskTool
-          .execute(
-            {
+            input: {
               prompt: task.prompt,
               description: task.description,
               subagent_type: task.agent,
             },
-            {
-              agent: task.agent,
-              messageID: assistantMessage.id,
-              sessionID: sessionID,
-              abort,
-              async metadata(input) {
-                await Session.updatePart({
-                  ...part,
-                  type: "tool",
-                  state: {
-                    ...part.state,
-                    ...input,
-                  },
-                } satisfies MessageV2.ToolPart)
-              },
-            },
-          )
-          .catch(() => {})
-        assistantMessage.finish = "tool-calls"
-        assistantMessage.time.completed = Date.now()
-        await Session.updateMessage(assistantMessage)
-        if (result && part.state.status === "running") {
-          await Session.updatePart({
-            ...part,
-            state: {
-              status: "completed",
-              input: part.state.input,
-              title: result.title,
-              metadata: result.metadata,
-              output: result.output,
-              attachments: result.attachments,
-              time: {
-                ...part.state.time,
-                end: Date.now(),
-              },
-            },
-          } satisfies MessageV2.ToolPart)
+          },
         }
+        await Session.updatePart(part)
+
+        const result = await taskTool.execute(
+          {
+            prompt: task.prompt,
+            description: task.description,
+            subagent_type: task.agent,
+          },
+          {
+            sessionID,
+            abort,
+            agent: lastUser.agent,
+            messageID: assistantMessage.id,
+            callID: part.callID,
+            extra: { providerID: model.providerID, modelID: model.modelID },
+            metadata: async () => {},
+          },
+        )
+
         if (!result) {
           await Session.updatePart({
             ...part,
@@ -433,6 +413,17 @@ export namespace SessionPrompt {
         messages: msgs,
         agent,
       })
+
+      // Calculate tokens for tool results from previous assistant that will be sent in this API call
+      // Reuse parts from already-loaded messages to avoid redundant query
+      let toolResultTokens = 0
+      if (lastAssistant && step > 1) {
+        const assistantMessage = msgs.find((m) => m.info.id === lastAssistant.id)
+        if (assistantMessage) {
+          toolResultTokens = Token.calculateToolResultTokens(assistantMessage.parts)
+        }
+      }
+
       const processor = SessionProcessor.create({
         assistantMessage: (await Session.updateMessage({
           id: Identifier.ascending("message"),
@@ -456,8 +447,10 @@ export namespace SessionPrompt {
             created: Date.now(),
           },
           sessionID,
-          outputEstimate: lastAssistant?.outputEstimate,
-          reasoningEstimate: lastAssistant?.reasoningEstimate,
+          outputEstimate: lastFinished?.outputEstimate,
+          reasoningEstimate: lastFinished?.reasoningEstimate,
+          contextEstimate: lastFinished?.contextEstimate,
+          sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0) + toolResultTokens,
         })) as MessageV2.Assistant,
         sessionID: sessionID,
         model: model.info,
@@ -1067,6 +1060,25 @@ export namespace SessionPrompt {
       },
     )
 
+    const userText = parts
+      .filter((p) => p.type === "text" && !(p as MessageV2.TextPart).synthetic)
+      .map((p) => (p as MessageV2.TextPart).text)
+      .join("")
+
+    // Calculate user message tokens
+    let sentTokens = Token.estimate(userText)
+
+    // Add tokens from tool results that will be sent with this message
+    // Tool results from the previous assistant message are included in the API request
+    const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID))
+    const lastAssistant = msgs.findLast((m) => m.info.role === "assistant")
+    if (lastAssistant) {
+      sentTokens += Token.calculateToolResultTokens(lastAssistant.parts)
+    }
+
+    info.sentEstimate = sentTokens
+    info.contextEstimate = sentTokens
+
     await Session.updateMessage(info)
     for (const part of parts) {
       await Session.updatePart(part)
@@ -1136,6 +1148,8 @@ export namespace SessionPrompt {
         providerID: model.providerID,
         modelID: model.modelID,
       },
+      sentEstimate: 0,
+      contextEstimate: 0,
     }
     await Session.updateMessage(userMsg)
     const userPart: MessageV2.Part = {
@@ -1148,6 +1162,12 @@ export namespace SessionPrompt {
     }
     await Session.updatePart(userPart)
 
+    const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID))
+    const lastFinished = msgs.find((m) => m.info.role === "assistant" && m.info.finish)?.info as
+      | MessageV2.Assistant
+      | undefined
+    const lastAssistant = msgs.find((m) => m.info.role === "assistant")?.info as MessageV2.Assistant | undefined
+
     const msg: MessageV2.Assistant = {
       id: Identifier.ascending("message"),
       sessionID: input.sessionID,
@@ -1170,6 +1190,10 @@ export namespace SessionPrompt {
       },
       modelID: model.modelID,
       providerID: model.providerID,
+      outputEstimate: lastFinished?.outputEstimate,
+      reasoningEstimate: lastFinished?.reasoningEstimate,
+      contextEstimate: lastFinished?.contextEstimate,
+      sentEstimate: (lastAssistant?.sentEstimate || 0) + (userMsg.sentEstimate || 0),
     }
     await Session.updateMessage(msg)
     const part: MessageV2.Part = {
diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts
index cee5adc37713..58b33855c975 100644
--- a/packages/opencode/src/util/token.ts
+++ b/packages/opencode/src/util/token.ts
@@ -4,4 +4,46 @@ export namespace Token {
   export function estimate(input: string) {
     return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
   }
+
+  /**
+   * Convert token estimate to character count
+   * Used when accumulating text across stream deltas
+   */
+  export function toCharCount(tokenEstimate: number): number {
+    return tokenEstimate * CHARS_PER_TOKEN
+  }
+
+  /**
+   * Convert character count to token estimate
+   * Used when converting accumulated text back to tokens
+   */
+  export function toTokenEstimate(charCount: number): number {
+    return Math.round(charCount / CHARS_PER_TOKEN)
+  }
+
+  /**
+   * Calculate tokens for tool results that will be sent to the API
+   * Includes tool input JSON, output (or compaction message), and errors
+   */
+  export function calculateToolResultTokens(parts: Array<{ type: string; state?: any }>) {
+    let tokens = 0
+    for (const part of parts) {
+      if (part.type === "tool") {
+        // Tool input is sent in both completed and error states
+        tokens += estimate(JSON.stringify(part.state.input))
+
+        if (part.state.status === "completed") {
+          // Tool result output - check if compacted
+          const output = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output
+          tokens += estimate(output)
+        }
+
+        if (part.state.status === "error") {
+          // Tool error text is sent back to the API
+          tokens += estimate(part.state.error)
+        }
+      }
+    }
+    return tokens
+  }
 }
diff --git a/packages/sdk/js/src/gen/types.gen.ts b/packages/sdk/js/src/gen/types.gen.ts
index e2e611db13a8..c6c86adb9933 100644
--- a/packages/sdk/js/src/gen/types.gen.ts
+++ b/packages/sdk/js/src/gen/types.gen.ts
@@ -130,6 +130,10 @@ export type AssistantMessage = {
       write: number
     }
   }
+  outputEstimate?: number
+  reasoningEstimate?: number
+  contextEstimate?: number
+  sentEstimate?: number
   finish?: string
 }
 

From d7aefec3a4f6ffedf3702de587cb55052b943200 Mon Sep 17 00:00:00 2001
From: Arsham Shirvani <arshamshirvani@gmail.com>
Date: Tue, 25 Nov 2025 16:22:32 +0000
Subject: [PATCH 5/5] Fix synthetic and noReply message token counting

Fix token counting logic to properly account for synthetic and noReply
messages in session totals:

- Account for synthetic content (e.g. skill documentation) in token
  counts by checking ignored flag instead of synthetic flag
- Fix sentEstimate formula to prevent double-counting tool result tokens
- Maintain cumulative session totals for accurate usage tracking
- noReply messages now included in session token calculations

This ensures users see accurate cumulative token usage per session
without exponential growth from double-counting.
---
 packages/opencode/src/session/prompt.ts |  4 ++--
 packages/opencode/src/util/token.ts     | 16 ++++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index ef1e07a4c6eb..9b36b3d2632e 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -450,7 +450,7 @@ export namespace SessionPrompt {
           outputEstimate: lastFinished?.outputEstimate,
           reasoningEstimate: lastFinished?.reasoningEstimate,
           contextEstimate: lastFinished?.contextEstimate,
-          sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0) + toolResultTokens,
+          sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0),
         })) as MessageV2.Assistant,
         sessionID: sessionID,
         model: model.info,
@@ -1061,7 +1061,7 @@ export namespace SessionPrompt {
     )
 
     const userText = parts
-      .filter((p) => p.type === "text" && !(p as MessageV2.TextPart).synthetic)
+      .filter((p) => p.type === "text" && !p.ignored)
       .map((p) => (p as MessageV2.TextPart).text)
       .join("")
 
diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts
index 58b33855c975..fc47a98ae0a5 100644
--- a/packages/opencode/src/util/token.ts
+++ b/packages/opencode/src/util/token.ts
@@ -29,17 +29,21 @@ export namespace Token {
     let tokens = 0
     for (const part of parts) {
       if (part.type === "tool") {
-        // Tool input is sent in both completed and error states
-        tokens += estimate(JSON.stringify(part.state.input))
+        // Add null check for part.state
+        if (!part.state) continue
+
+        // Safe access to input
+        if (part.state.input) {
+          tokens += estimate(JSON.stringify(part.state.input))
+        }
 
         if (part.state.status === "completed") {
-          // Tool result output - check if compacted
-          const output = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output
+          // Use optional chaining for compacted check
+          const output = part.state.time?.compacted ? "[Old tool result content cleared]" : (part.state.output ?? "")
           tokens += estimate(output)
         }
 
-        if (part.state.status === "error") {
-          // Tool error text is sent back to the API
+        if (part.state.status === "error" && part.state.error) {
           tokens += estimate(part.state.error)
         }
       }