Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions packages/cli/src/ui/hooks/atCommandProcessor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,35 @@ describe('handleAtCommand', () => {
);
}, 10000);

it('should correctly handle file paths with narrow non-breaking space (NNBSP)', async () => {
const nnbsp = '\u202F';
const fileContent = 'NNBSP file content.';
const filePath = await createTestFile(
path.join(testRootDir, `my${nnbsp}file.txt`),
fileContent,
);
const relativePath = getRelativePath(filePath);
const query = `@${filePath}`;

const result = await handleAtCommand({
query,
config: mockConfig,
addItem: mockAddItem,
onDebugMessage: mockOnDebugMessage,
messageId: 129,
signal: abortController.signal,
});

expect(result.error).toBeUndefined();
expect(result.processedQuery).toEqual([
{ text: `@${relativePath}` },
{ text: '\n--- Content from referenced files ---' },
{ text: `\nContent from @${relativePath}:\n` },
{ text: fileContent },
{ text: '\n--- End of content ---' },
]);
});

it('should handle multiple @file references', async () => {
const content1 = 'Content file1';
const file1Path = await createTestFile(
Expand Down
85 changes: 34 additions & 51 deletions packages/cli/src/ui/hooks/atCommandProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ import type { UseHistoryManagerReturn } from './useHistoryManager.js';
const REF_CONTENT_HEADER = `\n${REFERENCE_CONTENT_START}`;
const REF_CONTENT_FOOTER = `\n${REFERENCE_CONTENT_END}`;

/**
* Regex source for the path/command part of an @ reference.
* It uses strict ASCII whitespace delimiters to allow Unicode characters like NNBSP in filenames.
*
* 1. \\. matches any escaped character (e.g., \ ).
* 2. [^ \t\n\r,;!?()\[\]{}.] matches any character that is NOT a delimiter and NOT a period.
* 3. \.(?!$|[ \t\n\r]) matches a period ONLY if it is NOT followed by whitespace or end-of-string.
*/
export const AT_COMMAND_PATH_REGEX_SOURCE =
'(?:\\\\.|[^ \\t\\n\\r,;!?()\\[\\]{}.]|\\.(?!$|[ \\t\\n\\r]))+';

interface HandleAtCommandParams {
query: string;
config: Config;
Expand All @@ -52,68 +63,40 @@ interface AtCommandPart {
*/
function parseAllAtCommands(query: string): AtCommandPart[] {
const parts: AtCommandPart[] = [];
let currentIndex = 0;

while (currentIndex < query.length) {
let atIndex = -1;
let nextSearchIndex = currentIndex;
// Find next unescaped '@'
while (nextSearchIndex < query.length) {
if (
query[nextSearchIndex] === '@' &&
(nextSearchIndex === 0 || query[nextSearchIndex - 1] !== '\\')
) {
atIndex = nextSearchIndex;
break;
}
nextSearchIndex++;
}
let lastIndex = 0;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

previous code was simple and straightforward to follow while new code is a cryptic regex. Can we apply the fix but avoid adding a regex

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't. Because we need to use the same logic in the highlight.ts regex to make sure we highlight precisely the same things that we recognize as an atCommand. Part of the bug was that the highlight regex was badly approximating the code that was in this function.

Also, personally, I find the regex easier to reason about than the previous code (even if the regex itself is a bit more opaque).


if (atIndex === -1) {
// No more @
if (currentIndex < query.length) {
parts.push({ type: 'text', content: query.substring(currentIndex) });
}
break;
}
// Create a new RegExp instance for each call to avoid shared state/lastIndex issues.
const atCommandRegex = new RegExp(
`(?<!\\\\)@${AT_COMMAND_PATH_REGEX_SOURCE}`,
'g',
);

let match: RegExpExecArray | null;

while ((match = atCommandRegex.exec(query)) !== null) {
const matchIndex = match.index;
const fullMatch = match[0];

// Add text before @
if (atIndex > currentIndex) {
if (matchIndex > lastIndex) {
parts.push({
type: 'text',
content: query.substring(currentIndex, atIndex),
content: query.substring(lastIndex, matchIndex),
});
}

// Parse @path
let pathEndIndex = atIndex + 1;
let inEscape = false;
while (pathEndIndex < query.length) {
const char = query[pathEndIndex];
if (inEscape) {
inEscape = false;
} else if (char === '\\') {
inEscape = true;
} else if (/[,\s;!?()[\]{}]/.test(char)) {
// Path ends at first whitespace or punctuation not escaped
break;
} else if (char === '.') {
// For . we need to be more careful - only terminate if followed by whitespace or end of string
// This allows file extensions like .txt, .js but terminates at sentence endings like "file.txt. Next sentence"
const nextChar =
pathEndIndex + 1 < query.length ? query[pathEndIndex + 1] : '';
if (nextChar === '' || /\s/.test(nextChar)) {
break;
}
}
pathEndIndex++;
}
const rawAtPath = query.substring(atIndex, pathEndIndex);
// unescapePath expects the @ symbol to be present, and will handle it.
const atPath = unescapePath(rawAtPath);
const atPath = unescapePath(fullMatch);
parts.push({ type: 'atPath', content: atPath });
currentIndex = pathEndIndex;

lastIndex = matchIndex + fullMatch.length;
}

// Add remaining text
if (lastIndex < query.length) {
parts.push({ type: 'text', content: query.substring(lastIndex) });
}

// Filter out empty text parts that might result from consecutive @paths or leading/trailing spaces
return parts.filter(
(part) => !(part.type === 'text' && part.content.trim() === ''),
Expand Down
8 changes: 8 additions & 0 deletions packages/cli/src/ui/utils/highlight.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ describe('parseInputForHighlighting', () => {
{ text: '@/my\\ path/file.txt', type: 'file' },
]);
});

it('should highlight a file path with narrow non-breaking spaces (NNBSP)', () => {
const text = 'cat @/my\u202Fpath/file.txt';
expect(parseInputForHighlighting(text, 0)).toEqual([
{ text: 'cat ', type: 'default' },
{ text: '@/my\u202Fpath/file.txt', type: 'file' },
]);
});
});

describe('parseInputForHighlighting with Transformations', () => {
Expand Down
10 changes: 6 additions & 4 deletions packages/cli/src/ui/utils/highlight.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
import { LRUCache } from 'mnemonist';
import { cpLen, cpSlice } from './textUtils.js';
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
import { AT_COMMAND_PATH_REGEX_SOURCE } from '../hooks/atCommandProcessor.js';

export type HighlightToken = {
text: string;
Expand All @@ -19,11 +20,12 @@ export type HighlightToken = {

// Matches slash commands (e.g., /help), @ references (files or MCP resource URIs),
// and large paste placeholders (e.g., [Pasted Text: 6 lines]).
// The @ pattern uses a negated character class to support URIs like `@file:///example.txt`
// which contain colons. It matches any character except delimiters: comma, whitespace,
// semicolon, common punctuation, and brackets.
//
// The @ pattern uses the same source as the command processor to ensure consistency.
// It matches any character except strict delimiters (ASCII whitespace, comma, etc.).
// This supports URIs like `@file:///example.txt` and filenames with Unicode spaces (like NNBSP).
const HIGHLIGHT_REGEX = new RegExp(
`(^/[a-zA-Z0-9_-]+|@(?:\\\\ |[^,\\s;!?()\\[\\]{}])+|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
`(^/[a-zA-Z0-9_-]+|@${AT_COMMAND_PATH_REGEX_SOURCE}|${PASTED_TEXT_PLACEHOLDER_REGEX.source})`,
'g',
);

Expand Down
Loading