From 9667132e4b807aaa49fb9ec6f37355feb4d34309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?alexjtxie=28=E8=B0=A2=E5=AE=B6=E6=8F=90=29?= Date: Tue, 17 Mar 2026 22:12:33 +0800 Subject: [PATCH] feat(tui): add CJK-aware word boundary navigation using Intl.Segmenter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native Zig word boundary logic in @opentui/core treats contiguous non-whitespace as a single word. This means Option+Arrow skips over entire CJK text blocks (e.g. "你好世界hello") in one jump, instead of stopping at each word boundary. This patch adds a TypeScript-level monkey-patch that replaces the four word-movement methods (moveWordForward, moveWordBackward, deleteWordForward, deleteWordBackward) on TextareaRenderable with versions powered by Intl.Segmenter(undefined, { granularity: 'word' }). The ICU segmenter treats each CJK ideograph cluster as an independent word segment, giving macOS-like Option+Arrow behavior: "你好世界hello" → 你好 | 世界 | hello "创建一个新的实例" → 创建 | 一个 | 新的 | 实例 Selection handling, event emission, and render requests are preserved to match the original method contracts. --- .../cli/cmd/tui/component/prompt/index.tsx | 2 + .../opencode/src/util/cjk-word-boundary.ts | 162 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 packages/opencode/src/util/cjk-word-boundary.ts diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index d63c248fb83e..9a11db59b57f 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -25,6 +25,7 @@ import type { FilePart } from "@opencode-ai/sdk/v2" import { TuiEvent } from "../../event" import { iife } from "@/util/iife" import { Locale } from "@/util/locale" +import { patchCJKWordBoundary } from "@/util/cjk-word-boundary" import { formatDuration } from "@/util/format" import { createColors, createFrames } from "../../ui/spinner.ts" import { useDialog } from "@tui/ui/dialog" @@ -981,6 +982,7 @@ export function Prompt(props: PromptProps) { }} ref={(r: TextareaRenderable) => { input = r + patchCJKWordBoundary(input) if (promptPartTypeId === 0) { promptPartTypeId = input.extmarks.registerType("prompt-part") } diff --git a/packages/opencode/src/util/cjk-word-boundary.ts b/packages/opencode/src/util/cjk-word-boundary.ts new file mode 100644 index 000000000000..1e1b57aa1c51 --- /dev/null +++ b/packages/opencode/src/util/cjk-word-boundary.ts @@ -0,0 +1,162 @@ +import type { TextareaRenderable } from "@opentui/core" + +// Intl.Segmenter with granularity:'word' treats each CJK ideograph as its own +// word segment, which gives us macOS-like Option+Arrow behavior out of the box. +const segmenter = new Intl.Segmenter(undefined, { granularity: "word" }) + +interface Segment { + start: number + end: number + isWordLike: boolean +} + +function getSegments(text: string): Segment[] { + const result: Segment[] = [] + for (const seg of segmenter.segment(text)) { + result.push({ + start: seg.index, + end: seg.index + seg.segment.length, + isWordLike: seg.isWordLike ?? false, + }) + } + return result +} + +/** + * Find the offset to jump to when pressing Option+Right (forward word). + * + * Behavior (matches macOS): + * - If cursor is inside or at the start of a word, jump to the end of that word. + * - If cursor is on whitespace/punctuation, jump to the end of the next word. + * - If no next word exists, jump to text end. + */ +function findNextWordEnd(text: string, offset: number): number { + if (offset >= text.length) return text.length + const segments = getSegments(text) + + for (const seg of segments) { + // Find first word-like segment whose end is past our current offset + if (seg.isWordLike && seg.end > offset) { + return seg.end + } + } + return text.length +} + +/** + * Find the offset to jump to when pressing Option+Left (backward word). + * + * Behavior (matches macOS): + * - If cursor is inside or at the end of a word, jump to the start of that word. + * - If cursor is on whitespace/punctuation, jump to the start of the previous word. + * - If no previous word exists, jump to text start. + */ +function findPrevWordStart(text: string, offset: number): number { + if (offset <= 0) return 0 + const segments = getSegments(text) + + for (let i = segments.length - 1; i >= 0; i--) { + const seg = segments[i]! + // Find last word-like segment whose start is before our current offset + if (seg.isWordLike && seg.start < offset) { + return seg.start + } + } + return 0 +} + +/** + * Monkey-patch a TextareaRenderable instance to use CJK-aware word boundaries + * powered by Intl.Segmenter. This replaces the native Zig word boundary logic + * (which treats contiguous non-whitespace as a single word) with Unicode-aware + * segmentation where each CJK character is its own word. + * + * The patched methods replicate the exact behavior of the original methods + * (selection handling, event emission, render requests) but substitute the + * boundary calculation. + */ +export function patchCJKWordBoundary(textarea: TextareaRenderable): void { + // We need access to protected/private members at runtime via (textarea as any). + // TypeScript visibility modifiers are compile-time only; the properties exist + // on the JS object at runtime. + const ta = textarea as any + + // Store originals so we can call them if needed, and for the "input" emit pattern + // that the Input subclass (which TextareaRenderable actually is) adds. + + ta.moveWordForward = function (options?: { select?: boolean }): boolean { + const select = options?.select ?? false + ta.updateSelectionForMovement(select, true) + + const text = ta.editBuffer.getText() + const currentOffset = ta.editBuffer.getCursorPosition().offset + const targetOffset = findNextWordEnd(text, currentOffset) + ta.editBuffer.setCursorByOffset(targetOffset) + + ta.updateSelectionForMovement(select, false) + ta.requestRender() + return true + } + + ta.moveWordBackward = function (options?: { select?: boolean }): boolean { + const select = options?.select ?? false + ta.updateSelectionForMovement(select, true) + + const text = ta.editBuffer.getText() + const currentOffset = ta.editBuffer.getCursorPosition().offset + const targetOffset = findPrevWordStart(text, currentOffset) + ta.editBuffer.setCursorByOffset(targetOffset) + + ta.updateSelectionForMovement(select, false) + ta.requestRender() + return true + } + + ta.deleteWordForward = function (): boolean { + if (ta.hasSelection()) { + ta.deleteSelectedText() + ta.emit("input", ta.plainText) + return true + } + + const text = ta.editBuffer.getText() + const currentCursor = ta.editBuffer.getCursorPosition() + const targetOffset = findNextWordEnd(text, currentCursor.offset) + + if (targetOffset > currentCursor.offset) { + const targetPos = ta.editBuffer.offsetToPosition(targetOffset) + if (targetPos) { + ta.editBuffer.deleteRange(currentCursor.row, currentCursor.col, targetPos.row, targetPos.col) + } + } + + ta._ctx.clearSelection() + ta.requestRender() + ta.emit("input", ta.plainText) + return true + } + + ta.deleteWordBackward = function (): boolean { + if (ta.hasSelection()) { + ta.deleteSelectedText() + ta.emit("input", ta.plainText) + return true + } + + const text = ta.editBuffer.getText() + const currentCursor = ta.editBuffer.getCursorPosition() + const targetOffset = findPrevWordStart(text, currentCursor.offset) + + if (targetOffset < currentCursor.offset) { + const targetPos = ta.editBuffer.offsetToPosition(targetOffset) + if (targetPos) { + ta.editBuffer.deleteRange(targetPos.row, targetPos.col, currentCursor.row, currentCursor.col) + } + } + + ta._ctx.clearSelection() + ta.requestRender() + ta.emit("input", ta.plainText) + return true + } +}