From bf63fdfacca848f61c91df58e1d042d034ecbeab Mon Sep 17 00:00:00 2001 From: echoVic Date: Mon, 2 Mar 2026 17:07:28 +0800 Subject: [PATCH 1/2] fix(core): remove spaces between Chinese characters and numbers in tool args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #2032 The model sometimes adds spaces between Chinese characters and numbers in file paths, causing file read failures (e.g., '测试 1 文件.txt' instead of '测试1文件.txt'). This fix sanitizes tool arguments by removing such spaces before processing. Changes: - Added sanitizeToolArgs() function in turn.ts and subagent.ts - Applied sanitization to all tool arguments before creating ToolCallRequestInfo --- packages/core/src/core/turn.ts | 30 ++++++++++++++++++++++++- packages/core/src/subagents/subagent.ts | 28 ++++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 17c6c47de3..498169e19a 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -367,7 +367,9 @@ export class Turn { fnCall.id ?? `${fnCall.name}-${Date.now()}-${Math.random().toString(16).slice(2)}`; const name = fnCall.name || 'undefined_tool_name'; - const args = (fnCall.args || {}) as Record; + const args = sanitizeToolArgs( + (fnCall.args || {}) as Record, + ); const toolCallRequest: ToolCallRequestInfo = { callId, @@ -399,3 +401,29 @@ function getCitations(resp: GenerateContentResponse): string[] { return citation.uri!; }); } + +/** + * Sanitizes tool arguments to fix common model output issues. + * Specifically removes spaces between Chinese characters and numbers in file paths. + * This fixes issue #2032 where the model adds spaces like "测试 1 文件.txt" instead of "测试1文件.txt" + */ +function sanitizeToolArgs( + args: Record, +): Record { + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(args)) { + if (typeof value === 'string') { + // Remove spaces between Chinese characters and numbers + // This regex matches: Chinese char + space + digit, or digit + space + Chinese char + sanitized[key] = value.replace( + /([\u4e00-\u9fa5])\s+(\d)|(\d)\s+([\u4e00-\u9fa5])/g, + '$1$2$3$4', + ); + } else { + sanitized[key] = value; + } + } + + return sanitized; +} diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts index c9328e5ad0..73031de0f0 100644 --- a/packages/core/src/subagents/subagent.ts +++ b/packages/core/src/subagents/subagent.ts @@ -774,7 +774,7 @@ export class SubAgentScope { const requests: ToolCallRequestInfo[] = authorizedCalls.map((fc) => { const toolName = String(fc.name || 'unknown'); const callId = fc.id ?? `${fc.name}-${Date.now()}`; - const args = (fc.args ?? {}) as Record; + const args = sanitizeToolArgs((fc.args ?? {}) as Record); const request: ToolCallRequestInfo = { callId, name: toolName, @@ -1002,3 +1002,29 @@ Important Rules: return finalPrompt; } } + +/** + * Sanitizes tool arguments to fix common model output issues. + * Specifically removes spaces between Chinese characters and numbers in file paths. + * This fixes issue #2032 where the model adds spaces like "测试 1 文件.txt" instead of "测试1文件.txt" + */ +function sanitizeToolArgs( + args: Record, +): Record { + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(args)) { + if (typeof value === 'string') { + // Remove spaces between Chinese characters and numbers + // This regex matches: Chinese char + space + digit, or digit + space + Chinese char + sanitized[key] = value.replace( + /([\u4e00-\u9fa5])\s+(\d)|(\d)\s+([\u4e00-\u9fa5])/g, + '$1$2$3$4', + ); + } else { + sanitized[key] = value; + } + } + + return sanitized; +} From 71badb6df6b3a88a2c6be86f9d66103c5ec07da8 Mon Sep 17 00:00:00 2001 From: echoVic Date: Mon, 2 Mar 2026 19:39:08 +0800 Subject: [PATCH 2/2] fix(filesearch): limit max files to prevent OOM crashes Fixes #2004 When users type '@' in large projects, the file crawler scans the entire project directory which can cause OOM crashes if there are too many files. This fix adds a maxFiles limit (default 100,000) to the crawler to prevent memory issues. When the limit is exceeded, a warning is shown and only the first N files are used. Changes: - Added maxFiles option to CrawlOptions interface - Added file count check and limit in crawl() function - Shows warning when project has too many files --- packages/core/src/utils/filesearch/crawler.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/core/src/utils/filesearch/crawler.ts b/packages/core/src/utils/filesearch/crawler.ts index 9184ba3286..917ccf5035 100644 --- a/packages/core/src/utils/filesearch/crawler.ts +++ b/packages/core/src/utils/filesearch/crawler.ts @@ -8,6 +8,9 @@ import path from 'node:path'; import { fdir } from 'fdir'; import type { Ignore } from './ignore.js'; import * as cache from './crawlCache.js'; +import { createDebugLogger } from '../debugLogger.js'; + +const debugLogger = createDebugLogger('CRAWLER'); export interface CrawlOptions { // The directory to start the crawl from. @@ -21,6 +24,8 @@ export interface CrawlOptions { // Caching options. cache: boolean; cacheTtl: number; + // Maximum number of files to crawl to prevent OOM (default: 100000) + maxFiles?: number; } function toPosixPath(p: string) { @@ -28,6 +33,8 @@ function toPosixPath(p: string) { } export async function crawl(options: CrawlOptions): Promise { + const maxFiles = options.maxFiles ?? 100000; + if (options.cache) { const cacheKey = cache.getCacheKey( options.crawlDirectory, @@ -61,6 +68,15 @@ export async function crawl(options: CrawlOptions): Promise { } results = await api.crawl(options.crawlDirectory).withPromise(); + + // Limit the number of files to prevent OOM in large projects + if (results.length > maxFiles) { + debugLogger.warn( + `Project contains ${results.length} files, limiting to ${maxFiles} to prevent memory issues. ` + + `Consider using .qwenignore to exclude unnecessary directories.`, + ); + results = results.slice(0, maxFiles); + } } catch (_e) { // The directory probably doesn't exist. return [];