From 9667132e4b807aaa49fb9ec6f37355feb4d34309 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?alexjtxie=28=E8=B0=A2=E5=AE=B6=E6=8F=90=29?=
 <alexjtxie@tencent.com>
Date: Tue, 17 Mar 2026 22:12:33 +0800
Subject: [PATCH] feat(tui): add CJK-aware word boundary navigation using
 Intl.Segmenter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The native Zig word boundary logic in @opentui/core treats contiguous
non-whitespace as a single word. This means Option+Arrow skips over
entire CJK text blocks (e.g. "你好世界hello") in one jump, instead of
stopping at each word boundary.

This patch adds a TypeScript-level monkey-patch that replaces the four
word-movement methods (moveWordForward, moveWordBackward,
deleteWordForward, deleteWordBackward) on TextareaRenderable with
versions powered by Intl.Segmenter(undefined, { granularity: 'word' }).

The ICU segmenter treats each CJK ideograph cluster as an independent
word segment, giving macOS-like Option+Arrow behavior:
  "你好世界hello" → 你好 | 世界 | hello
  "创建一个新的实例" → 创建 | 一个 | 新的 | 实例

Selection handling, event emission, and render requests are preserved
to match the original method contracts.
---
 .../cli/cmd/tui/component/prompt/index.tsx    |   2 +
 .../opencode/src/util/cjk-word-boundary.ts    | 162 ++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 packages/opencode/src/util/cjk-word-boundary.ts

diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx
index d63c248fb83e..9a11db59b57f 100644
--- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx
@@ -25,6 +25,7 @@ import type { FilePart } from "@opencode-ai/sdk/v2"
 import { TuiEvent } from "../../event"
 import { iife } from "@/util/iife"
 import { Locale } from "@/util/locale"
+import { patchCJKWordBoundary } from "@/util/cjk-word-boundary"
 import { formatDuration } from "@/util/format"
 import { createColors, createFrames } from "../../ui/spinner.ts"
 import { useDialog } from "@tui/ui/dialog"
@@ -981,6 +982,7 @@ export function Prompt(props: PromptProps) {
               }}
               ref={(r: TextareaRenderable) => {
                 input = r
+                patchCJKWordBoundary(input)
                 if (promptPartTypeId === 0) {
                   promptPartTypeId = input.extmarks.registerType("prompt-part")
                 }
diff --git a/packages/opencode/src/util/cjk-word-boundary.ts b/packages/opencode/src/util/cjk-word-boundary.ts
new file mode 100644
index 000000000000..1e1b57aa1c51
--- /dev/null
+++ b/packages/opencode/src/util/cjk-word-boundary.ts
@@ -0,0 +1,162 @@
+import type { TextareaRenderable } from "@opentui/core"
+
+// Intl.Segmenter with granularity:'word' treats each CJK ideograph as its own
+// word segment, which gives us macOS-like Option+Arrow behavior out of the box.
+const segmenter = new Intl.Segmenter(undefined, { granularity: "word" })
+
+interface Segment {
+  start: number
+  end: number
+  isWordLike: boolean
+}
+
+function getSegments(text: string): Segment[] {
+  const result: Segment[] = []
+  for (const seg of segmenter.segment(text)) {
+    result.push({
+      start: seg.index,
+      end: seg.index + seg.segment.length,
+      isWordLike: seg.isWordLike ?? false,
+    })
+  }
+  return result
+}
+
+/**
+ * Find the offset to jump to when pressing Option+Right (forward word).
+ *
+ * Behavior (matches macOS):
+ * - If cursor is inside or at the start of a word, jump to the end of that word.
+ * - If cursor is on whitespace/punctuation, jump to the end of the next word.
+ * - If no next word exists, jump to text end.
+ */
+function findNextWordEnd(text: string, offset: number): number {
+  if (offset >= text.length) return text.length
+  const segments = getSegments(text)
+
+  for (const seg of segments) {
+    // Find first word-like segment whose end is past our current offset
+    if (seg.isWordLike && seg.end > offset) {
+      return seg.end
+    }
+  }
+  return text.length
+}
+
+/**
+ * Find the offset to jump to when pressing Option+Left (backward word).
+ *
+ * Behavior (matches macOS):
+ * - If cursor is inside or at the end of a word, jump to the start of that word.
+ * - If cursor is on whitespace/punctuation, jump to the start of the previous word.
+ * - If no previous word exists, jump to text start.
+ */
+function findPrevWordStart(text: string, offset: number): number {
+  if (offset <= 0) return 0
+  const segments = getSegments(text)
+
+  for (let i = segments.length - 1; i >= 0; i--) {
+    const seg = segments[i]!
+    // Find last word-like segment whose start is before our current offset
+    if (seg.isWordLike && seg.start < offset) {
+      return seg.start
+    }
+  }
+  return 0
+}
+
+/**
+ * Monkey-patch a TextareaRenderable instance to use CJK-aware word boundaries
+ * powered by Intl.Segmenter. This replaces the native Zig word boundary logic
+ * (which treats contiguous non-whitespace as a single word) with Unicode-aware
+ * segmentation where each CJK character is its own word.
+ *
+ * The patched methods replicate the exact behavior of the original methods
+ * (selection handling, event emission, render requests) but substitute the
+ * boundary calculation.
+ */
+export function patchCJKWordBoundary(textarea: TextareaRenderable): void {
+  // We need access to protected/private members at runtime via (textarea as any).
+  // TypeScript visibility modifiers are compile-time only; the properties exist
+  // on the JS object at runtime.
+  const ta = textarea as any
+
+  // Store originals so we can call them if needed, and for the "input" emit pattern
+  // that the Input subclass (which TextareaRenderable actually is) adds.
+
+  ta.moveWordForward = function (options?: { select?: boolean }): boolean {
+    const select = options?.select ?? false
+    ta.updateSelectionForMovement(select, true)
+
+    const text = ta.editBuffer.getText()
+    const currentOffset = ta.editBuffer.getCursorPosition().offset
+    const targetOffset = findNextWordEnd(text, currentOffset)
+    ta.editBuffer.setCursorByOffset(targetOffset)
+
+    ta.updateSelectionForMovement(select, false)
+    ta.requestRender()
+    return true
+  }
+
+  ta.moveWordBackward = function (options?: { select?: boolean }): boolean {
+    const select = options?.select ?? false
+    ta.updateSelectionForMovement(select, true)
+
+    const text = ta.editBuffer.getText()
+    const currentOffset = ta.editBuffer.getCursorPosition().offset
+    const targetOffset = findPrevWordStart(text, currentOffset)
+    ta.editBuffer.setCursorByOffset(targetOffset)
+
+    ta.updateSelectionForMovement(select, false)
+    ta.requestRender()
+    return true
+  }
+
+  ta.deleteWordForward = function (): boolean {
+    if (ta.hasSelection()) {
+      ta.deleteSelectedText()
+      ta.emit("input", ta.plainText)
+      return true
+    }
+
+    const text = ta.editBuffer.getText()
+    const currentCursor = ta.editBuffer.getCursorPosition()
+    const targetOffset = findNextWordEnd(text, currentCursor.offset)
+
+    if (targetOffset > currentCursor.offset) {
+      const targetPos = ta.editBuffer.offsetToPosition(targetOffset)
+      if (targetPos) {
+        ta.editBuffer.deleteRange(currentCursor.row, currentCursor.col, targetPos.row, targetPos.col)
+      }
+    }
+
+    ta._ctx.clearSelection()
+    ta.requestRender()
+    ta.emit("input", ta.plainText)
+    return true
+  }
+
+  ta.deleteWordBackward = function (): boolean {
+    if (ta.hasSelection()) {
+      ta.deleteSelectedText()
+      ta.emit("input", ta.plainText)
+      return true
+    }
+
+    const text = ta.editBuffer.getText()
+    const currentCursor = ta.editBuffer.getCursorPosition()
+    const targetOffset = findPrevWordStart(text, currentCursor.offset)
+
+    if (targetOffset < currentCursor.offset) {
+      const targetPos = ta.editBuffer.offsetToPosition(targetOffset)
+      if (targetPos) {
+        ta.editBuffer.deleteRange(targetPos.row, targetPos.col, currentCursor.row, currentCursor.col)
+      }
+    }
+
+    ta._ctx.clearSelection()
+    ta.requestRender()
+    ta.emit("input", ta.plainText)
+    return true
+  }
+}