diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 17c6c47de3..498169e19a 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -367,7 +367,9 @@ export class Turn { fnCall.id ?? `${fnCall.name}-${Date.now()}-${Math.random().toString(16).slice(2)}`; const name = fnCall.name || 'undefined_tool_name'; - const args = (fnCall.args || {}) as Record; + const args = sanitizeToolArgs( + (fnCall.args || {}) as Record, + ); const toolCallRequest: ToolCallRequestInfo = { callId, @@ -399,3 +401,29 @@ function getCitations(resp: GenerateContentResponse): string[] { return citation.uri!; }); } + +/** + * Sanitizes tool arguments to fix common model output issues. + * Specifically removes spaces between Chinese characters and numbers in file paths. + * This fixes issue #2032 where the model adds spaces like "测试 1 文件.txt" instead of "测试1文件.txt" + */ +function sanitizeToolArgs( + args: Record, +): Record { + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(args)) { + if (typeof value === 'string') { + // Remove spaces between Chinese characters and numbers + // This regex matches: Chinese char + space + digit, or digit + space + Chinese char + sanitized[key] = value.replace( + /([\u4e00-\u9fa5])\s+(\d)|(\d)\s+([\u4e00-\u9fa5])/g, + '$1$2$3$4', + ); + } else { + sanitized[key] = value; + } + } + + return sanitized; +} diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts index c9328e5ad0..73031de0f0 100644 --- a/packages/core/src/subagents/subagent.ts +++ b/packages/core/src/subagents/subagent.ts @@ -774,7 +774,7 @@ export class SubAgentScope { const requests: ToolCallRequestInfo[] = authorizedCalls.map((fc) => { const toolName = String(fc.name || 'unknown'); const callId = fc.id ?? `${fc.name}-${Date.now()}`; - const args = (fc.args ?? {}) as Record; + const args = sanitizeToolArgs((fc.args ?? {}) as Record); const request: ToolCallRequestInfo = { callId, name: toolName, @@ -1002,3 +1002,29 @@ Important Rules: return finalPrompt; } } + +/** + * Sanitizes tool arguments to fix common model output issues. + * Specifically removes spaces between Chinese characters and numbers in file paths. + * This fixes issue #2032 where the model adds spaces like "测试 1 文件.txt" instead of "测试1文件.txt" + */ +function sanitizeToolArgs( + args: Record, +): Record { + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(args)) { + if (typeof value === 'string') { + // Remove spaces between Chinese characters and numbers + // This regex matches: Chinese char + space + digit, or digit + space + Chinese char + sanitized[key] = value.replace( + /([\u4e00-\u9fa5])\s+(\d)|(\d)\s+([\u4e00-\u9fa5])/g, + '$1$2$3$4', + ); + } else { + sanitized[key] = value; + } + } + + return sanitized; +} diff --git a/packages/core/src/utils/filesearch/crawler.ts b/packages/core/src/utils/filesearch/crawler.ts index 9184ba3286..917ccf5035 100644 --- a/packages/core/src/utils/filesearch/crawler.ts +++ b/packages/core/src/utils/filesearch/crawler.ts @@ -8,6 +8,9 @@ import path from 'node:path'; import { fdir } from 'fdir'; import type { Ignore } from './ignore.js'; import * as cache from './crawlCache.js'; +import { createDebugLogger } from '../debugLogger.js'; + +const debugLogger = createDebugLogger('CRAWLER'); export interface CrawlOptions { // The directory to start the crawl from. @@ -21,6 +24,8 @@ export interface CrawlOptions { // Caching options. cache: boolean; cacheTtl: number; + // Maximum number of files to crawl to prevent OOM (default: 100000) + maxFiles?: number; } function toPosixPath(p: string) { @@ -28,6 +33,8 @@ function toPosixPath(p: string) { } export async function crawl(options: CrawlOptions): Promise { + const maxFiles = options.maxFiles ?? 100000; + if (options.cache) { const cacheKey = cache.getCacheKey( options.crawlDirectory, @@ -61,6 +68,15 @@ export async function crawl(options: CrawlOptions): Promise { } results = await api.crawl(options.crawlDirectory).withPromise(); + + // Limit the number of files to prevent OOM in large projects + if (results.length > maxFiles) { + debugLogger.warn( + `Project contains ${results.length} files, limiting to ${maxFiles} to prevent memory issues. ` + + `Consider using .qwenignore to exclude unnecessary directories.`, + ); + results = results.slice(0, maxFiles); + } } catch (_e) { // The directory probably doesn't exist. return [];