From f25efca51cf3e2555492bbd88c6bd06e1b212db6 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Fri, 10 Apr 2026 09:59:05 -0400 Subject: [PATCH 1/2] fix(diff): skip binary file contents in reviews --- src/core/binary.ts | 80 ++++++++++++++++++++++++ src/core/loaders.test.ts | 48 ++++++++++++++ src/core/loaders.ts | 80 ++++++++++++++++++++++-- src/core/types.ts | 1 + src/opentui/HunkDiffView.tsx | 6 +- src/ui/components/ui-components.test.tsx | 21 ++++++- src/ui/diff/renderRows.tsx | 4 ++ 7 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 src/core/binary.ts diff --git a/src/core/binary.ts b/src/core/binary.ts new file mode 100644 index 0000000..fd4067d --- /dev/null +++ b/src/core/binary.ts @@ -0,0 +1,80 @@ +import type { FileDiffMetadata } from "@pierre/diffs"; +import fs from "node:fs"; + +const BINARY_SNIFF_BYTES = 8_000; +const BINARY_CONTROL_BYTE_RATIO = 0.3; + +/** Return whether one diff patch explicitly marks the file contents as binary. */ +export function patchLooksBinary(patch: string) { + return ( + /(^|\n)Binary files .* differ(?:\n|$)/.test(patch) || patch.includes("\nGIT binary patch\n") + ); +} + +/** Build placeholder metadata for one skipped binary file without inventing fake hunks. */ +export function createSkippedBinaryMetadata( + name: string, + type: FileDiffMetadata["type"] = "change", +): FileDiffMetadata { + return { + name, + type, + hunks: [], + splitLineCount: 0, + unifiedLineCount: 0, + isPartial: true, + additionLines: [], + deletionLines: [], + cacheKey: `${name}:binary-skipped`, + }; +} + +/** Read only a small prefix from disk so binary detection never loads the whole file. */ +function readFilePrefix(path: string) { + let fd: number | undefined; + + try { + fd = fs.openSync(path, "r"); + const buffer = Buffer.alloc(BINARY_SNIFF_BYTES); + const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0); + return buffer.subarray(0, bytesRead); + } finally { + if (fd !== undefined) { + fs.closeSync(fd); + } + } +} + +/** Return whether one byte is a strong binary signal instead of normal text content. */ +function isSuspiciousTextByte(byte: number) { + return byte < 0x07 || (byte > 0x0d && byte < 0x20) || byte === 0x7f; +} + +/** Detect likely binary files from a small prefix using Git-style control-byte heuristics. */ +export function isProbablyBinaryFile(path: string) { + let prefix: Uint8Array; + + try { + prefix = readFilePrefix(path); + } catch { + return false; + } + + if (prefix.length === 0) { + return false; + } + + let suspiciousBytes = 0; + + for (const byte of prefix) { + if (byte === 0) { + return true; + } + + if (isSuspiciousTextByte(byte)) { + suspiciousBytes += 1; + } + } + + return suspiciousBytes / prefix.length >= BINARY_CONTROL_BYTE_RATIO; +} diff --git a/src/core/loaders.test.ts b/src/core/loaders.test.ts index ad8de50..941453c 100644 --- a/src/core/loaders.test.ts +++ b/src/core/loaders.test.ts @@ -149,6 +149,54 @@ describe("loadAppBootstrap", () => { expect(bootstrap.changeset.files[0]?.agent?.annotations).toHaveLength(1); }); + test("skips binary file-pair diffs instead of reading their contents", async () => { + const dir = mkdtempSync(join(tmpdir(), "hunk-binary-diff-")); + tempDirs.push(dir); + + const left = join(dir, "before.png"); + const right = join(dir, "after.png"); + + writeFileSync(left, Buffer.from([0, 1, 2, 3, 4, 5])); + writeFileSync(right, Buffer.from([0, 1, 2, 9, 8, 7])); + + const bootstrap = await loadAppBootstrap({ + kind: "diff", + left, + right, + options: { + mode: "auto", + }, + }); + + expect(bootstrap.changeset.files).toHaveLength(1); + expect(bootstrap.changeset.files[0]?.path).toBe("after.png"); + expect(bootstrap.changeset.files[0]?.previousPath).toBe("before.png"); + expect(bootstrap.changeset.files[0]?.isBinary).toBe(true); + expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0); + }); + + test("marks git binary diffs as skipped binary content", async () => { + const dir = createTempRepo("hunk-git-binary-"); + const file = join(dir, "image.png"); + + writeFileSync(file, Buffer.from([0, 1, 2, 3, 4])); + git(dir, "add", "image.png"); + git(dir, "commit", "-m", "initial"); + + writeFileSync(file, Buffer.from([0, 1, 9, 3, 4, 5])); + + const bootstrap = await loadFromRepo(dir, { + kind: "git", + staged: false, + options: { mode: "auto" }, + }); + + expect(bootstrap.changeset.files).toHaveLength(1); + expect(bootstrap.changeset.files[0]?.path).toBe("image.png"); + expect(bootstrap.changeset.files[0]?.isBinary).toBe(true); + expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0); + }); + test("loads git working tree changes from a temporary repo", async () => { const dir = createTempRepo("hunk-git-"); diff --git a/src/core/loaders.ts b/src/core/loaders.ts index f65c762..5d38127 100644 --- a/src/core/loaders.ts +++ b/src/core/loaders.ts @@ -8,6 +8,7 @@ import { import { createTwoFilesPatch } from "diff"; import { resolve as resolvePath } from "node:path"; import { findAgentFileContext, loadAgentContext } from "./agent"; +import { createSkippedBinaryMetadata, isProbablyBinaryFile, patchLooksBinary } from "./binary"; import { buildGitDiffArgs, buildGitShowArgs, @@ -132,6 +133,12 @@ function findPatchChunk(metadata: FileDiffMetadata, chunks: string[], index: num ); } +interface BuildDiffFileOptions { + isUntracked?: boolean; + previousPath?: string; + isBinary?: boolean; +} + /** Build the normalized per-file model used by the UI regardless of input mode. */ function buildDiffFile( metadata: FileDiffMetadata, @@ -139,8 +146,7 @@ function buildDiffFile( index: number, sourcePrefix: string, agentContext: AgentContext | null, - isUntracked?: boolean, - previousPath?: string, + { isUntracked, previousPath, isBinary }: BuildDiffFileOptions = {}, ): DiffFile { return { id: `${sourcePrefix}:${index}:${metadata.name}`, @@ -152,6 +158,7 @@ function buildDiffFile( metadata, agent: findAgentFileContext(agentContext, metadata.name, metadata.prevName), isUntracked, + isBinary: isBinary ?? patchLooksBinary(patch), }; } @@ -237,7 +244,9 @@ function buildUntrackedDiffFile( index, sourcePrefix, agentContext, - true, // isUntracked + { + isUntracked: true, + }, ); } @@ -326,6 +335,52 @@ function normalizePatchChangeset( }; } +/** Return the change type to show when direct file comparison skips binary contents. */ +function resolveBinaryComparisonType( + leftPath: string, + rightPath: string, +): FileDiffMetadata["type"] { + if (leftPath === "/dev/null") { + return "new"; + } + + if (rightPath === "/dev/null") { + return "deleted"; + } + + return "change"; +} + +/** Build a placeholder changeset for direct file comparisons that include binary content. */ +function buildBinaryFileDiffChangeset( + input: FileCommandInput | DiffToolCommandInput, + displayPath: string, + title: string, + leftPath: string, + rightPath: string, + agentContext: AgentContext | null, +) { + return { + id: `pair:${displayPath}`, + sourceLabel: input.kind === "difftool" ? "git difftool" : "file compare", + title, + agentSummary: agentContext?.summary, + files: [ + buildDiffFile( + createSkippedBinaryMetadata(displayPath, resolveBinaryComparisonType(leftPath, rightPath)), + `Binary file skipped: ${basename(input.left)} ↔ ${basename(input.right)}\n`, + 0, + displayPath, + agentContext, + { + previousPath: basename(input.left), + isBinary: true, + }, + ), + ], + } satisfies Changeset; +} + /** Build a changeset by diffing two concrete files on disk. */ async function loadFileDiffChangeset( input: FileCommandInput | DiffToolCommandInput, @@ -334,8 +389,6 @@ async function loadFileDiffChangeset( ) { const leftPath = resolvePath(cwd, input.left); const rightPath = resolvePath(cwd, input.right); - const leftText = await Bun.file(leftPath).text(); - const rightText = await Bun.file(rightPath).text(); const displayPath = input.kind === "difftool" ? (input.path ?? basename(input.right)) : basename(input.right); const title = @@ -345,6 +398,19 @@ async function loadFileDiffChangeset( ? displayPath : `${basename(input.left)} ↔ ${basename(input.right)}`; + if (isProbablyBinaryFile(leftPath) || isProbablyBinaryFile(rightPath)) { + return buildBinaryFileDiffChangeset( + input, + displayPath, + title, + leftPath, + rightPath, + agentContext, + ); + } + + const leftText = await Bun.file(leftPath).text(); + const rightText = await Bun.file(rightPath).text(); const oldFile: FileContents = { name: displayPath, contents: leftText, @@ -367,7 +433,9 @@ async function loadFileDiffChangeset( title, agentSummary: agentContext?.summary, files: [ - buildDiffFile(metadata, patch, 0, displayPath, agentContext, undefined, basename(input.left)), + buildDiffFile(metadata, patch, 0, displayPath, agentContext, { + previousPath: basename(input.left), + }), ], } satisfies Changeset; } diff --git a/src/core/types.ts b/src/core/types.ts index 5a12b85..9974547 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -40,6 +40,7 @@ export interface DiffFile { metadata: FileDiffMetadata; agent: AgentFileContext | null; isUntracked?: boolean; + isBinary?: boolean; } export interface Changeset { diff --git a/src/opentui/HunkDiffView.tsx b/src/opentui/HunkDiffView.tsx index 2099481..bbfea60 100644 --- a/src/opentui/HunkDiffView.tsx +++ b/src/opentui/HunkDiffView.tsx @@ -1,4 +1,5 @@ import { useMemo } from "react"; +import { patchLooksBinary } from "../core/binary"; import type { DiffFile } from "../core/types"; import { findMaxLineNumber } from "../ui/diff/codeColumns"; import { buildSplitRows, buildStackRows } from "../ui/diff/pierre"; @@ -26,12 +27,15 @@ function countDiffStats(metadata: HunkDiffFile["metadata"]) { /** Adapt the public diff shape into Hunk's internal file model without exposing app-only fields. */ function toInternalDiffFile(diff: HunkDiffFile): DiffFile { + const patch = diff.patch ?? ""; + return { agent: null, id: diff.id, + isBinary: patchLooksBinary(patch), language: diff.language, metadata: diff.metadata, - patch: diff.patch ?? "", + patch, path: diff.path ?? diff.metadata.name, previousPath: diff.metadata.prevName, stats: countDiffStats(diff.metadata), diff --git a/src/ui/components/ui-components.test.tsx b/src/ui/components/ui-components.test.tsx index 849d867..fdc6b82 100644 --- a/src/ui/components/ui-components.test.tsx +++ b/src/ui/components/ui-components.test.tsx @@ -300,7 +300,7 @@ function createWrapBootstrap(): AppBootstrap { }); } -function createEmptyDiffFile(type: "rename-pure" | "new" | "deleted"): DiffFile { +function createEmptyDiffFile(type: "change" | "rename-pure" | "new" | "deleted"): DiffFile { return { id: `empty:${type}`, path: `${type}.ts`, @@ -1811,6 +1811,25 @@ describe("UI components", () => { 6, ); expect(deletedFileFrame).toContain("The file is marked as deleted."); + + const binaryFileFrame = await captureFrame( + , + 76, + 6, + ); + expect(binaryFileFrame).toContain("Binary file skipped"); }); test("PierreDiffView reuses highlighted rows after unmounting and remounting a file section", async () => { diff --git a/src/ui/diff/renderRows.tsx b/src/ui/diff/renderRows.tsx index f64ce29..b26dbd5 100644 --- a/src/ui/diff/renderRows.tsx +++ b/src/ui/diff/renderRows.tsx @@ -555,6 +555,10 @@ export function diffMessage(file: DiffFile) { return "No textual hunks. This change only renames the file."; } + if (file.isBinary) { + return "Binary file skipped"; + } + if (file.metadata.type === "new") { return "No textual hunks. The file is marked as new."; } From ab591ad31a1bc055315b68ab7dfdeff2be5fb6d5 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Fri, 10 Apr 2026 10:08:38 -0400 Subject: [PATCH 2/2] refactor(binary): clarify signal byte helper naming --- src/core/binary.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/binary.ts b/src/core/binary.ts index fd4067d..b11bbcc 100644 --- a/src/core/binary.ts +++ b/src/core/binary.ts @@ -46,7 +46,7 @@ function readFilePrefix(path: string) { } /** Return whether one byte is a strong binary signal instead of normal text content. */ -function isSuspiciousTextByte(byte: number) { +function isBinarySignalByte(byte: number) { return byte < 0x07 || (byte > 0x0d && byte < 0x20) || byte === 0x7f; } @@ -64,17 +64,17 @@ export function isProbablyBinaryFile(path: string) { return false; } - let suspiciousBytes = 0; + let binarySignalBytes = 0; for (const byte of prefix) { if (byte === 0) { return true; } - if (isSuspiciousTextByte(byte)) { - suspiciousBytes += 1; + if (isBinarySignalByte(byte)) { + binarySignalBytes += 1; } } - return suspiciousBytes / prefix.length >= BINARY_CONTROL_BYTE_RATIO; + return binarySignalBytes / prefix.length >= BINARY_CONTROL_BYTE_RATIO; }