From 1e76a8f8e68f71a4ea833446ea9d4b0f07b27302 Mon Sep 17 00:00:00 2001 From: qin-ctx Date: Wed, 8 Apr 2026 14:41:18 +0800 Subject: [PATCH] fix(openclaw): sanitize and cap recall queries Recall should use cleaned user text and avoid sending oversized prompts to retrieval. Add unit coverage for sanitization and truncation behavior. --- examples/openclaw-plugin/index.ts | 48 ++++++++++++++++++- .../tests/ut/index-utils.test.ts | 27 +++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/examples/openclaw-plugin/index.ts b/examples/openclaw-plugin/index.ts index 2bf71daf2..849501402 100644 --- a/examples/openclaw-plugin/index.ts +++ b/examples/openclaw-plugin/index.ts @@ -11,6 +11,7 @@ import { compileSessionPatterns, isTranscriptLikeIngest, extractLatestUserText, + sanitizeUserTextForCapture, shouldBypassSession, } from "./text-utils.js"; import { @@ -110,6 +111,7 @@ type OpenClawPluginApi = { const MAX_OPENVIKING_STDERR_LINES = 200; const MAX_OPENVIKING_STDERR_CHARS = 256_000; const AUTO_RECALL_TIMEOUT_MS = 5_000; +const RECALL_QUERY_MAX_CHARS = 4_000; /** * OpenViking `UserIdentifier` allows only [a-zA-Z0-9_-] for agent_id @@ -128,6 +130,39 @@ export function sanitizeOpenVikingAgentIdHeader(raw: string): string { return normalized.length > 0 ? normalized : "ov_agent"; } +export type PreparedRecallQuery = { + query: string; + truncated: boolean; + originalChars: number; + finalChars: number; +}; + +export function prepareRecallQuery(rawText: string): PreparedRecallQuery { + const sanitized = sanitizeUserTextForCapture(rawText).trim(); + const originalChars = sanitized.length; + + if (!sanitized) { + return { + query: "", + truncated: false, + originalChars: 0, + finalChars: 0, + }; + } + + const query = + sanitized.length > RECALL_QUERY_MAX_CHARS + ? sanitized.slice(0, RECALL_QUERY_MAX_CHARS).trim() + : sanitized; + + return { + query, + truncated: sanitized.length > RECALL_QUERY_MAX_CHARS, + originalChars, + finalChars: query.length, + }; +} + function extractAgentIdFromSessionKey(sessionKey?: string): string | undefined { const raw = typeof sessionKey === "string" ? sessionKey.trim() : ""; if (!raw) { @@ -882,12 +917,21 @@ const contextEnginePlugin = { } const eventObj = (event ?? {}) as { messages?: unknown[]; prompt?: string }; - const queryText = - extractLatestUserText(eventObj.messages) || + const latestUserText = extractLatestUserText(eventObj.messages); + const rawRecallQuery = + latestUserText || (typeof eventObj.prompt === "string" ? eventObj.prompt.trim() : ""); + const recallQuery = prepareRecallQuery(rawRecallQuery); + const queryText = recallQuery.query; if (!queryText) { return; } + if (recallQuery.truncated) { + verboseRoutingInfo( + `openviking: recall query truncated (` + + `chars=${recallQuery.originalChars}->${recallQuery.finalChars})`, + ); + } const prependContextParts: string[] = []; diff --git a/examples/openclaw-plugin/tests/ut/index-utils.test.ts b/examples/openclaw-plugin/tests/ut/index-utils.test.ts index 77022ffd1..c8b91844b 100644 --- a/examples/openclaw-plugin/tests/ut/index-utils.test.ts +++ b/examples/openclaw-plugin/tests/ut/index-utils.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { + prepareRecallQuery, sanitizeOpenVikingAgentIdHeader, createSessionAgentResolver, } from "../../index.js"; @@ -97,3 +98,29 @@ describe("createSessionAgentResolver", () => { expect(r1.resolved).toBe(r2.resolved); }); }); + +describe("prepareRecallQuery", () => { + it("sanitizes the recall query before returning it", () => { + const result = prepareRecallQuery( + " stale\nhello world\u0000 ", + ); + + expect(result).toEqual({ + query: "hello world", + truncated: false, + originalChars: 11, + finalChars: 11, + }); + }); + + it("truncates overly long recall queries after sanitization", () => { + const rawQuery = "x".repeat(4100); + + const result = prepareRecallQuery(rawQuery); + + expect(result.query).toBe("x".repeat(4000)); + expect(result.truncated).toBe(true); + expect(result.originalChars).toBe(4100); + expect(result.finalChars).toBe(4000); + }); +});