diff --git a/packages/opencode/src/flag/flag.ts b/packages/opencode/src/flag/flag.ts index c250cda84883..eb592a10220f 100644 --- a/packages/opencode/src/flag/flag.ts +++ b/packages/opencode/src/flag/flag.ts @@ -29,6 +29,7 @@ export namespace Flag { export const OPENCODE_SERVER_PASSWORD = process.env["OPENCODE_SERVER_PASSWORD"] export const OPENCODE_SERVER_USERNAME = process.env["OPENCODE_SERVER_USERNAME"] export const OPENCODE_ENABLE_QUESTION_TOOL = truthy("OPENCODE_ENABLE_QUESTION_TOOL") + export declare const OPENCODE_EXPERIMENTAL_HASHLINE: boolean // Experimental export const OPENCODE_EXPERIMENTAL = truthy("OPENCODE_EXPERIMENTAL") @@ -139,12 +140,23 @@ Object.defineProperty(Flag, "OPENCODE_CONFIG_DIR", { }) // Dynamic getter for OPENCODE_CLIENT -// This must be evaluated at access time, not module load time, -// because some commands override the client at runtime -Object.defineProperty(Flag, "OPENCODE_CLIENT", { - get() { - return process.env["OPENCODE_CLIENT"] ?? "cli" - }, - enumerable: true, - configurable: false, -}) + // This must be evaluated at access time, not module load time, + // because some commands override the client at runtime + Object.defineProperty(Flag, "OPENCODE_CLIENT", { + get() { + return process.env["OPENCODE_CLIENT"] ?? "cli" + }, + enumerable: true, + configurable: false, + }) + + // Dynamic getter for OPENCODE_EXPERIMENTAL_HASHLINE + // This must be evaluated at access time, not module load time, + // to allow tests to control hashline features + Object.defineProperty(Flag, "OPENCODE_EXPERIMENTAL_HASHLINE", { + get() { + return truthy("OPENCODE_EXPERIMENTAL_HASHLINE") + }, + enumerable: true, + configurable: false, + }) diff --git a/packages/opencode/src/tool/hashline_read.ts b/packages/opencode/src/tool/hashline_read.ts new file mode 100644 index 000000000000..4f357fd10331 --- /dev/null +++ b/packages/opencode/src/tool/hashline_read.ts @@ -0,0 +1,194 @@ +import z from "zod" +import * as fs from "fs" +import * as path from "path" +import { Tool } from "./tool" +import { LSP } from "../lsp" +import { FileTime } from "../file/time" +import DESCRIPTION from "./hashline_read.txt" +import { Instance } from "../project/instance" +import { assertExternalDirectory } from "./external-directory" +import { InstructionPrompt } from "../session/instruction" +import { Flag } from "../flag/flag" +import { hashLine } from "./hashline" + +const DEFAULT_READ_LIMIT = 2000 +const MAX_LINE_LENGTH = 2000 +const MAX_BYTES = 50 * 1024 + +export const HashlineReadTool = Tool.define("hashline_read", { + description: DESCRIPTION, + parameters: z.object({ + filePath: z.string().describe("The absolute path to the file to read"), + offset: z.coerce.number().describe("The line number to start reading from (1-indexed)").optional(), + limit: z.coerce.number().describe("The maximum number of lines to read (defaults to 2000)").optional(), + }), + async execute(params, ctx) { + if (params.offset !== undefined && params.offset < 1) { + throw new Error("offset must be greater than or equal to 1") + } + let filepath = params.filePath + if (!path.isAbsolute(filepath)) { + filepath = path.resolve(Instance.directory, filepath) + } + const title = path.relative(Instance.worktree, filepath) + + const file = Bun.file(filepath) + const stat = await file.stat().catch(() => undefined) + + await assertExternalDirectory(ctx, filepath, { + bypass: Boolean(ctx.extra?.["bypassCwdCheck"]), + kind: stat?.isDirectory() ? "directory" : "file", + }) + + await ctx.ask({ + permission: "read", + patterns: [filepath], + always: ["*"], + metadata: {}, + }) + + if (!stat) { + const dir = path.dirname(filepath) + const base = path.basename(filepath) + + const dirEntries = fs.readdirSync(dir) + const suggestions = dirEntries + .filter( + (entry) => + entry.toLowerCase().includes(base.toLowerCase()) || base.toLowerCase().includes(entry.toLowerCase()), + ) + .map((entry) => path.join(dir, entry)) + .slice(0, 3) + + if (suggestions.length > 0) { + throw new Error(`File not found: ${filepath}\n\nDid you mean one of these?\n${suggestions.join("\n")}`) + } + + throw new Error(`File not found: ${filepath}`) + } + + if (stat.isDirectory()) { + throw new Error(`Cannot read directory with hashline_read: ${filepath}`) + } + + const instructions = await InstructionPrompt.resolve(ctx.messages, filepath, ctx.messageID) + + const isBinary = await isBinaryFile(filepath, file) + if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`) + + const limit = params.limit ?? DEFAULT_READ_LIMIT + const offset = params.offset ?? 1 + const start = offset - 1 + const lines = await file.text().then((text) => text.split("\n")) + if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`) + + const raw: string[] = [] + let bytes = 0 + let truncatedByBytes = false + for (let i = start; i < Math.min(lines.length, start + limit); i++) { + const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i] + const lineNum = i + 1 + const hashChar = hashLine(line) + const outputLine = `${lineNum}${hashChar}${line}` + const size = Buffer.byteLength(outputLine, "utf-8") + (raw.length > 0 ? 1 : 0) + if (bytes + size > MAX_BYTES) { + truncatedByBytes = true + break + } + raw.push(outputLine) + bytes += size + } + + const content = raw + const preview = raw.slice(0, 20).join("\n") + + let output = [`${filepath}`, `file`, ""].join("\n") + output += content.join("\n") + + const totalLines = lines.length + const lastReadLine = offset + raw.length - 1 + const hasMoreLines = totalLines > lastReadLine + const truncated = hasMoreLines || truncatedByBytes + + if (truncatedByBytes) { + output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})` + } else if (hasMoreLines) { + output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})` + } else { + output += `\n\n(End of file - total ${totalLines} lines)` + } + output += "\n" + + LSP.touchFile(filepath, false) + FileTime.read(ctx.sessionID, filepath) + + if (instructions.length > 0) { + output += `\n\n\n${instructions.map((i) => i.content).join("\n\n")}\n` + } + + return { + title, + output, + metadata: { + preview, + truncated, + loaded: instructions.map((i) => i.filepath), + }, + } + }, +}) + +async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise { + const ext = path.extname(filepath).toLowerCase() + switch (ext) { + case ".zip": + case ".tar": + case ".gz": + case ".exe": + case ".dll": + case ".so": + case ".class": + case ".jar": + case ".war": + case ".7z": + case ".doc": + case ".docx": + case ".xls": + case ".xlsx": + case ".ppt": + case ".pptx": + case ".odt": + case ".ods": + case ".odp": + case ".bin": + case ".dat": + case ".obj": + case ".o": + case ".a": + case ".lib": + case ".wasm": + case ".pyc": + case ".pyo": + return true + default: + break + } + + const stat = await file.stat() + const fileSize = stat.size + if (fileSize === 0) return false + + const bufferSize = Math.min(4096, fileSize) + const buffer = await file.arrayBuffer() + if (buffer.byteLength === 0) return false + const bytes = new Uint8Array(buffer.slice(0, bufferSize)) + + let nonPrintableCount = 0 + for (let i = 0; i < bytes.length; i++) { + if (bytes[i] === 0) return true + if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) { + nonPrintableCount++ + } + } + return nonPrintableCount / bytes.length > 0.3 +} \ No newline at end of file diff --git a/packages/opencode/src/tool/hashline_read.txt b/packages/opencode/src/tool/hashline_read.txt new file mode 100644 index 000000000000..7db0b232f963 --- /dev/null +++ b/packages/opencode/src/tool/hashline_read.txt @@ -0,0 +1,10 @@ +Read a file and return its contents with each line prefixed by a content-addressable anchor. + +Each line is formatted as: {lineNumber}{CJK_HASH_CHAR}{content} +Example: "14丐 const foo = 1" + +The CJK hash character is derived from the line content using xxHash32. Use these line anchors +when calling hashline_edit — they allow the model to reference specific lines precisely even +after the file has changed. + +Supports the same offset/limit/maxTokens parameters as the read tool. \ No newline at end of file diff --git a/packages/opencode/src/tool/registry.ts b/packages/opencode/src/tool/registry.ts index aa6b331ea44f..3460cb0de0b0 100644 --- a/packages/opencode/src/tool/registry.ts +++ b/packages/opencode/src/tool/registry.ts @@ -29,6 +29,7 @@ import { ApplyPatchTool } from "./apply_patch" import { CheckTaskTool } from "./check_task" import { ListTasksTool } from "./list_tasks" import { CancelTaskTool } from "./cancel_task" +import { HashlineReadTool } from "./hashline_read" export namespace ToolRegistry { const log = Log.create({ service: "tool.registry" }) @@ -130,6 +131,7 @@ export namespace ToolRegistry { SkillTool, ApplyPatchTool, ...(Flag.OPENCODE_EXPERIMENTAL_LSP_TOOL ? [LspTool] : []), + ...(Flag.OPENCODE_EXPERIMENTAL_HASHLINE ? [HashlineReadTool] : []), ...(config.experimental?.batch_tool === true ? [BatchTool] : []), ...(Flag.OPENCODE_EXPERIMENTAL_PLAN_MODE && Flag.OPENCODE_CLIENT === "cli" ? [PlanExitTool, PlanEnterTool] : []), ...custom, diff --git a/packages/opencode/test/tool/hashline_read.test.ts b/packages/opencode/test/tool/hashline_read.test.ts new file mode 100644 index 000000000000..17ba3a62421e --- /dev/null +++ b/packages/opencode/test/tool/hashline_read.test.ts @@ -0,0 +1,268 @@ +import { describe, expect, test } from "bun:test" +import path from "path" +import { HashlineReadTool } from "../../src/tool/hashline_read" +import { Instance } from "../../src/project/instance" +import { tmpdir } from "../fixture/fixture" + +const ctx = { + sessionID: "test", + messageID: "", + callID: "", + agent: "build", + abort: AbortSignal.any([]), + messages: [], + metadata: () => {}, + ask: async () => {}, +} + +describe("tool.hashline_read output format", () => { + test("each line starts with line number + CJK char", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "test.txt"), "line one\nline two\nline three") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "test.txt") }, ctx) + const contentMatch = result.output.match(/([\s\S]*?)<\/content>/) + expect(contentMatch).toBeTruthy() + const contentBody = contentMatch![1] || "" + const contentLines = contentBody.split("\n").filter((l) => l.length > 0 && !l.startsWith("(")) + expect(contentLines.length).toBe(3) + for (const line of contentLines) { + const match = line.match(/^(\d+)([\u4e00-\u9fff])/) + expect(match).toBeTruthy() + if (match) { + const num = parseInt(match[1] || "0", 10) + expect(num).toBeGreaterThan(0) + } + } + }, + }) + }) +}) + +describe("tool.hashline_read line count", () => { + test("output has same number of lines as input file", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "test.txt"), "line one\nline two\nline three") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "test.txt") }, ctx) + const contentMatch = result.output.match(/([\s\S]*?)<\/content>/) + const contentBody = contentMatch?.[1] || "" + const contentLines = contentBody.split("\n").filter((l) => l.length > 0 && !l.startsWith("(")) + expect(contentLines.length).toBe(3) + }, + }) + }) +}) + +describe("tool.hashline_read byte budget", () => { + test("MAX_BYTES not exceeded", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + const lines = Array.from({ length: 1000 }, (_, i) => `line ${i} with some content`).join("\n") + await Bun.write(path.join(dir, "large.txt"), lines) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "large.txt") }, ctx) + const bytes = Buffer.byteLength(result.output, "utf8") + expect(bytes).toBeLessThanOrEqual(50 * 1024 + 500) + }, + }) + }) +}) + +describe("tool.hashline_read binary file detection", () => { + test("binary files return error message", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + const binary = Buffer.from([0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc]) + await Bun.write(path.join(dir, "test.bin"), binary) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const error = await hashlineRead + .execute({ filePath: path.join(tmp.path, "test.bin") }, ctx) + .catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("Cannot read binary file") + }, + }) + }) +}) + +describe("tool.hashline_read non-existent file", () => { + test("returns error for non-existent file", async () => { + await using tmp = await tmpdir({}) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const error = await hashlineRead + .execute({ filePath: path.join(tmp.path, "nonexistent.txt") }, ctx) + .catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("File not found") + }, + }) + }) +}) + +describe("tool.hashline_read offset parameter", () => { + test("skips first N lines", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "test.txt"), "line 1\nline 2\nline 3\nline 4\nline 5") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "test.txt"), offset: 3 }, ctx) + const contentMatch = result.output.match(/([\s\S]*?)<\/content>/) + const contentBody = contentMatch?.[1] || "" + const contentLines = contentBody.split("\n").filter((l) => l.length > 0 && !l.startsWith("(")) + expect(contentLines.length).toBe(3) + const firstLine = contentLines[0] + expect(firstLine).toMatch(/^3[\u4e00-\u9fff]/) + }, + }) + }) +}) + +describe("tool.hashline_read limit parameter", () => { + test("returns only N lines", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "test.txt"), "line 1\nline 2\nline 3\nline 4\nline 5") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "test.txt"), limit: 2 }, ctx) + const contentMatch = result.output.match(/([\s\S]*?)<\/content>/) + const contentBody = contentMatch?.[1] || "" + const contentLines = contentBody.split("\n").filter((l) => l.length > 0 && !l.startsWith("(")) + expect(contentLines.length).toBe(2) + expect(result.metadata.truncated).toBe(true) + }, + }) + }) +}) + +describe("tool.hashline_read CJK char stability", () => { + test("same file content produces same CJK chars on re-read", async () => { + const content = "const foo = 1\nconst bar = 2\nconst baz = 3" + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "test.txt"), content) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead1 = await HashlineReadTool.init() + const result1 = await hashlineRead1.execute({ filePath: path.join(tmp.path, "test.txt") }, ctx) + const hashlineRead2 = await HashlineReadTool.init() + const result2 = await hashlineRead2.execute({ filePath: path.join(tmp.path, "test.txt") }, ctx) + expect(result1.output).toBe(result2.output) + }, + }) + }) +}) + +describe("tool.hashline_read flag disabled", () => { + test("hashline_read not in registry when flag is off", async () => { + const originalFlag = process.env.OPENCODE_EXPERIMENTAL_HASHLINE + process.env.OPENCODE_EXPERIMENTAL_HASHLINE = "false" + try { + await using tmp = await tmpdir({}) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { ToolRegistry } = await import("../../src/tool/registry") + const toolIDs = await ToolRegistry.ids() + expect(toolIDs).not.toContain("hashline_read") + }, + }) + } finally { + if (originalFlag !== undefined) { + process.env.OPENCODE_EXPERIMENTAL_HASHLINE = originalFlag + } else { + delete process.env.OPENCODE_EXPERIMENTAL_HASHLINE + } + } + }) +}) + +describe("tool.hashline_read flag enabled", () => { + test("hashline_read IS in registry when flag is on", async () => { + const originalFlag = process.env.OPENCODE_EXPERIMENTAL_HASHLINE + process.env.OPENCODE_EXPERIMENTAL_HASHLINE = "true" + try { + await using tmp = await tmpdir({}) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const { ToolRegistry } = await import("../../src/tool/registry") + const toolIDs = await ToolRegistry.ids() + expect(toolIDs).toContain("hashline_read") + }, + }) + } finally { + if (originalFlag !== undefined) { + process.env.OPENCODE_EXPERIMENTAL_HASHLINE = originalFlag + } else { + delete process.env.OPENCODE_EXPERIMENTAL_HASHLINE + } + } + }) +}) + +describe("tool.hashline_read CJK byte counting", () => { + test("byte budget uses 3 bytes per CJK char", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + const line = "x".repeat(150) + const lines = Array.from({ length: 100 }, () => line).join("\n") + await Bun.write(path.join(dir, "test.txt"), lines) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const hashlineRead = await HashlineReadTool.init() + const result = await hashlineRead.execute({ filePath: path.join(tmp.path, "test.txt") }, ctx) + const contentMatch = result.output.match(/([\s\S]*?)<\/content>/) + const contentBody = contentMatch?.[1] || "" + const contentLines = contentBody.split("\n").filter((l) => l.length > 0) + let totalBytes = 0 + for (const line of contentLines) { + const lineBytes = Buffer.byteLength(line, "utf8") + totalBytes += lineBytes + 1 + } + expect(totalBytes).toBeLessThanOrEqual(50 * 1024) + }, + }) + }) +}) \ No newline at end of file