Skip to content

Commit cd86784

Browse files
fix(memory): address PR review blockers and majors for tool-scoped memory
- Gate prefetch behind `learning.enabled` so users who opt out of learning don't have stored rules pinned into the system prompt - Move prefetch after tool-set resolution and pass actual agent tool names to `rules_for_prompt` so unrelated tool namespaces are never scanned - Tighten "stop" edict detection: only treat "stop " as an imperative when it appears at a sentence boundary; remove `contains(" stop ")` from the per-line check to prevent false-positive captures from phrases like "I want to stop working on this" - Remove body content from repeated-failure debug log to avoid PII leaking into log files (log body_len only, matching the edict capture path) - Remove duplicate `is_pinned` predicate — `is_eager` already covers both Critical and High; update the doc-comment to explain compression semantics - Filter `__unscoped__` sentinel from `list_tool_names` so unscoped edicts captured before any tool call are not injected into future prompt filters - Add `log::debug!` entry to all six tool-memory RPC handlers per project convention (stable grep-able `[tool-memory]` prefix) - Remove raw issue reference from `tool_memory_capture` registration log - Exercise `PromptSection::build()` in `section_renders_via_prompt_section_trait` test — the previous version only called `is_empty()`, leaving the trait contract uncovered - Extract shared `MockMemory` to `tool_memory/test_helpers.rs`; use it in both `store_tests.rs` and `capture.rs` to eliminate silent drift risk Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6444899 commit cd86784

9 files changed

Lines changed: 196 additions & 238 deletions

File tree

src/openhuman/agent/harness/session/builder.rs

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -787,24 +787,6 @@ impl Agent {
787787
}
788788
}
789789

790-
// (#1400) Pre-fetch Critical + High priority tool-scoped memory
791-
// rules so they pin into the (compression-resistant) system
792-
// prompt for the whole session. Skipped silently when the
793-
// current runtime cannot host a synchronous bridge — typically
794-
// a single-threaded test harness — so this stays safe for every
795-
// call site of `from_config_*`. The capture hook still runs in
796-
// every session via [`ToolMemoryCaptureHook`] above.
797-
if config.learning.tool_memory_capture_enabled {
798-
let pinned = prefetch_tool_memory_rules_blocking(memory.clone());
799-
if !pinned.is_empty() {
800-
log::info!(
801-
"[memory::tool_memory] pinning {} tool-scoped rule(s) into system prompt",
802-
pinned.len()
803-
);
804-
prompt_builder = prompt_builder.with_tool_memory_rules(pinned);
805-
}
806-
}
807-
808790
// Build post-turn hooks when learning is enabled
809791
let mut post_turn_hooks: Vec<Arc<dyn crate::openhuman::agent::hooks::PostTurnHook>> =
810792
Vec::new();
@@ -864,7 +846,7 @@ impl Agent {
864846
crate::openhuman::memory::ToolMemoryCaptureHook::new(memory.clone(), true),
865847
));
866848
log::info!(
867-
"[learning] tool_memory_capture hook registered (#1400 — durable tool rules)"
849+
"[learning] tool_memory_capture hook registered"
868850
);
869851
}
870852
}
@@ -997,6 +979,28 @@ impl Agent {
997979
.filter(|t| !existing_names.contains(t.name())),
998980
);
999981

982+
// Pre-fetch Critical + High priority tool-scoped memory rules so they
983+
// pin into the (compression-resistant) system prompt for the whole
984+
// session. Done here — after the tool list is finalised — so we only
985+
// fetch rules for tools this agent can actually use. Skipped when
986+
// `learning.enabled` is false (no new rules are written in that mode,
987+
// and users who opt out of learning expect no stored rules to surface)
988+
// or when the runtime cannot host a synchronous bridge (single-threaded
989+
// test harnesses).
990+
if config.learning.enabled && config.learning.tool_memory_capture_enabled {
991+
let agent_tool_names: Vec<String> =
992+
tools.iter().map(|t| t.name().to_string()).collect();
993+
let pinned =
994+
prefetch_tool_memory_rules_blocking(memory.clone(), &agent_tool_names);
995+
if !pinned.is_empty() {
996+
log::info!(
997+
"[memory::tool_memory] pinning {} tool-scoped rule(s) into system prompt",
998+
pinned.len()
999+
);
1000+
prompt_builder = prompt_builder.with_tool_memory_rules(pinned);
1001+
}
1002+
}
1003+
10001004
// Build the P-Format registry AFTER the tool list is finalised
10011005
// (including orchestrator tools) so every tool gets a signature
10021006
// entry. The registry is self-contained — it doesn't hold a
@@ -1198,17 +1202,19 @@ impl Agent {
11981202
/// merely seeds the rules that exist at session start.
11991203
fn prefetch_tool_memory_rules_blocking(
12001204
memory: Arc<dyn Memory>,
1205+
tool_names: &[String],
12011206
) -> Vec<crate::openhuman::memory::ToolMemoryRule> {
12021207
let Ok(handle) = tokio::runtime::Handle::try_current() else {
12031208
return Vec::new();
12041209
};
12051210
if handle.runtime_flavor() != tokio::runtime::RuntimeFlavor::MultiThread {
12061211
return Vec::new();
12071212
}
1213+
let tool_names = tool_names.to_vec();
12081214
tokio::task::block_in_place(|| {
12091215
handle.block_on(async move {
12101216
let store = crate::openhuman::memory::ToolMemoryStore::new(memory);
1211-
match store.rules_for_prompt(&[]).await {
1217+
match store.rules_for_prompt(&tool_names).await {
12121218
Ok(grouped) => {
12131219
let mut flat: Vec<_> = grouped.into_values().flatten().collect();
12141220
flat.sort_by(|a, b| {

src/openhuman/memory/ops/tool_memory.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ async fn open_store() -> Result<ToolMemoryStore, String> {
6969
pub async fn tool_rule_put(
7070
params: ToolRulePutParams,
7171
) -> Result<RpcOutcome<ToolMemoryRule>, String> {
72+
log::debug!("[tool-memory] rpc tool_rule_put tool={}", params.tool_name);
7273
let store = open_store().await?;
7374
let mut rule = ToolMemoryRule::new(
7475
&params.tool_name,
@@ -90,6 +91,11 @@ pub async fn tool_rule_put(
9091
pub async fn tool_rule_get(
9192
params: ToolRuleRefParams,
9293
) -> Result<RpcOutcome<Option<ToolMemoryRule>>, String> {
94+
log::debug!(
95+
"[tool-memory] rpc tool_rule_get tool={} id={}",
96+
params.tool_name,
97+
params.id
98+
);
9399
let store = open_store().await?;
94100
let rule = store.get_rule(&params.tool_name, &params.id).await?;
95101
Ok(RpcOutcome::single_log(rule, "tool memory rule fetched"))
@@ -99,13 +105,19 @@ pub async fn tool_rule_get(
99105
pub async fn tool_rule_list(
100106
params: ToolRuleListParams,
101107
) -> Result<RpcOutcome<Vec<ToolMemoryRule>>, String> {
108+
log::debug!("[tool-memory] rpc tool_rule_list tool={}", params.tool_name);
102109
let store = open_store().await?;
103110
let rules = store.list_rules(&params.tool_name).await?;
104111
Ok(RpcOutcome::single_log(rules, "tool memory rules listed"))
105112
}
106113

107114
/// Delete a tool-scoped rule by id.
108115
pub async fn tool_rule_delete(params: ToolRuleRefParams) -> Result<RpcOutcome<bool>, String> {
116+
log::debug!(
117+
"[tool-memory] rpc tool_rule_delete tool={} id={}",
118+
params.tool_name,
119+
params.id
120+
);
109121
let store = open_store().await?;
110122
let deleted = store.delete_rule(&params.tool_name, &params.id).await?;
111123
Ok(RpcOutcome::single_log(deleted, "tool memory rule deleted"))
@@ -126,6 +138,10 @@ pub struct ToolRulesForPromptResult {
126138
pub async fn tool_rules_for_prompt(
127139
params: ToolRulesForPromptParams,
128140
) -> Result<RpcOutcome<ToolRulesForPromptResult>, String> {
141+
log::debug!(
142+
"[tool-memory] rpc tool_rules_for_prompt tools={:?}",
143+
params.tools
144+
);
129145
let store = open_store().await?;
130146
let grouped = store.rules_for_prompt(&params.tools).await?;
131147
let mut flat: Vec<ToolMemoryRule> = grouped.into_values().flatten().collect();
@@ -148,6 +164,7 @@ pub async fn tool_rules_for_prompt(
148164
/// Render the raw JSON form of a tool's rules, useful for envelope
149165
/// consumers that want the unfiltered list.
150166
pub async fn tool_rules_json(params: ToolRuleListParams) -> Result<RpcOutcome<Value>, String> {
167+
log::debug!("[tool-memory] rpc tool_rules_json tool={}", params.tool_name);
151168
let store = open_store().await?;
152169
let value = store.list_rules_json(&params.tool_name).await?;
153170
Ok(RpcOutcome::single_log(value, "tool memory rules json"))

src/openhuman/memory/tool_memory/capture.rs

Lines changed: 11 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,14 @@ impl ToolMemoryCaptureHook {
7777
return Vec::new();
7878
}
7979
let lower = trimmed.to_lowercase();
80+
// Only treat "stop" as an imperative edict when it appears at a
81+
// sentence boundary (start of message or after ". "/"\n"), so routine
82+
// phrases like "I want to stop working" don't trigger false captures.
83+
let stop_imperative = lower.starts_with("stop ")
84+
|| lower.contains(". stop ")
85+
|| lower.contains("\nstop ");
8086
if !(lower.contains("never ") || lower.contains("don't ") || lower.contains("do not "))
81-
&& !lower.contains("stop ")
87+
&& !stop_imperative
8288
{
8389
return Vec::new();
8490
}
@@ -105,8 +111,7 @@ impl ToolMemoryCaptureHook {
105111
|| lower_line.starts_with("stop ")
106112
|| lower_line.contains(" never ")
107113
|| lower_line.contains(" don't ")
108-
|| lower_line.contains(" do not ")
109-
|| lower_line.contains(" stop ");
114+
|| lower_line.contains(" do not ");
110115
if !is_edict {
111116
continue;
112117
}
@@ -195,8 +200,8 @@ impl PostTurnHook for ToolMemoryCaptureHook {
195200

196201
for (tool, body) in Self::extract_repeated_failures(&ctx.tool_calls) {
197202
log::debug!(
198-
"[tool-memory] capturing repeated failure tool={tool} body=\"{}\"",
199-
truncate_for_log(&body)
203+
"[tool-memory] capturing repeated failure tool={tool} body_len={}",
204+
body.len()
200205
);
201206
if let Err(err) = self
202207
.store
@@ -286,103 +291,7 @@ mod tests {
286291
use super::*;
287292
use crate::openhuman::agent::hooks::ToolCallRecord;
288293
use crate::openhuman::memory::tool_memory::store::ToolMemoryStore;
289-
use crate::openhuman::memory::{MemoryCategory, MemoryEntry, NamespaceSummary, RecallOpts};
290-
use async_trait::async_trait;
291-
use parking_lot::Mutex;
292-
use std::collections::HashMap;
293-
294-
#[derive(Default)]
295-
struct MockMemory {
296-
entries: Mutex<HashMap<(String, String), MemoryEntry>>,
297-
}
298-
299-
#[async_trait]
300-
impl Memory for MockMemory {
301-
fn name(&self) -> &str {
302-
"mock"
303-
}
304-
async fn store(
305-
&self,
306-
namespace: &str,
307-
key: &str,
308-
content: &str,
309-
category: MemoryCategory,
310-
session_id: Option<&str>,
311-
) -> anyhow::Result<()> {
312-
self.entries.lock().insert(
313-
(namespace.to_string(), key.to_string()),
314-
MemoryEntry {
315-
id: format!("{namespace}/{key}"),
316-
key: key.to_string(),
317-
content: content.to_string(),
318-
namespace: Some(namespace.to_string()),
319-
category,
320-
timestamp: "now".into(),
321-
session_id: session_id.map(str::to_string),
322-
score: None,
323-
},
324-
);
325-
Ok(())
326-
}
327-
async fn recall(
328-
&self,
329-
_query: &str,
330-
_limit: usize,
331-
_opts: RecallOpts<'_>,
332-
) -> anyhow::Result<Vec<MemoryEntry>> {
333-
Ok(Vec::new())
334-
}
335-
async fn get(&self, namespace: &str, key: &str) -> anyhow::Result<Option<MemoryEntry>> {
336-
Ok(self
337-
.entries
338-
.lock()
339-
.get(&(namespace.to_string(), key.to_string()))
340-
.cloned())
341-
}
342-
async fn list(
343-
&self,
344-
namespace: Option<&str>,
345-
_category: Option<&MemoryCategory>,
346-
_session_id: Option<&str>,
347-
) -> anyhow::Result<Vec<MemoryEntry>> {
348-
let lock = self.entries.lock();
349-
let iter = lock.iter();
350-
Ok(match namespace {
351-
Some(ns) => iter
352-
.filter(|((n, _), _)| n == ns)
353-
.map(|(_, v)| v.clone())
354-
.collect(),
355-
None => iter.map(|(_, v)| v.clone()).collect(),
356-
})
357-
}
358-
async fn forget(&self, namespace: &str, key: &str) -> anyhow::Result<bool> {
359-
Ok(self
360-
.entries
361-
.lock()
362-
.remove(&(namespace.to_string(), key.to_string()))
363-
.is_some())
364-
}
365-
async fn namespace_summaries(&self) -> anyhow::Result<Vec<NamespaceSummary>> {
366-
let mut counts: HashMap<String, usize> = HashMap::new();
367-
for ((ns, _), _) in self.entries.lock().iter() {
368-
*counts.entry(ns.clone()).or_default() += 1;
369-
}
370-
Ok(counts
371-
.into_iter()
372-
.map(|(namespace, count)| NamespaceSummary {
373-
namespace,
374-
count,
375-
last_updated: None,
376-
})
377-
.collect())
378-
}
379-
async fn count(&self) -> anyhow::Result<usize> {
380-
Ok(self.entries.lock().len())
381-
}
382-
async fn health_check(&self) -> bool {
383-
true
384-
}
385-
}
294+
use crate::openhuman::memory::tool_memory::test_helpers::MockMemory;
386295

387296
fn ctx_with(message: &str, tool_calls: Vec<ToolCallRecord>) -> TurnContext {
388297
TurnContext {

src/openhuman/memory/tool_memory/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ pub mod capture;
3535
pub mod prompt;
3636
pub mod store;
3737
pub mod types;
38+
#[cfg(test)]
39+
pub mod test_helpers;
3840

3941
pub use capture::ToolMemoryCaptureHook;
4042
pub use prompt::{render_tool_memory_rules, ToolMemoryRulesSection, TOOL_MEMORY_HEADING};

src/openhuman/memory/tool_memory/prompt.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ fn priority_marker(priority: ToolMemoryPriority) -> &'static str {
144144
#[cfg(test)]
145145
mod tests {
146146
use super::*;
147+
use crate::openhuman::agent::prompts::types::{LearnedContextData, PromptContext, ToolCallFormat};
147148
use crate::openhuman::memory::tool_memory::types::ToolMemorySource;
148149

149150
fn rule(tool: &str, body: &str, priority: ToolMemoryPriority) -> ToolMemoryRule {
@@ -214,14 +215,34 @@ mod tests {
214215

215216
#[test]
216217
fn section_renders_via_prompt_section_trait() {
217-
// We construct a minimal context-independent test: build() must
218-
// not depend on PromptContext fields and must surface the
219-
// section's snapshot verbatim.
218+
// build() must not depend on PromptContext fields — it returns
219+
// the at-construction snapshot verbatim. We call it here to
220+
// exercise the trait contract directly.
220221
let section = ToolMemoryRulesSection::new(vec![rule(
221222
"email",
222223
"never email Sarah",
223224
ToolMemoryPriority::Critical,
224225
)]);
225226
assert!(!section.is_empty());
227+
let visible = std::collections::HashSet::new();
228+
let ctx = PromptContext {
229+
workspace_dir: std::path::Path::new("."),
230+
model_name: "test",
231+
agent_id: "test",
232+
tools: &[],
233+
skills: &[],
234+
dispatcher_instructions: "",
235+
learned: LearnedContextData::default(),
236+
visible_tool_names: &visible,
237+
tool_call_format: ToolCallFormat::PFormat,
238+
connected_integrations: &[],
239+
connected_identities_md: String::new(),
240+
include_profile: false,
241+
include_memory_md: false,
242+
curated_snapshot: None,
243+
user_identity: None,
244+
};
245+
let built = section.build(&ctx).unwrap();
246+
assert!(built.contains("never email Sarah"));
226247
}
227248
}

src/openhuman/memory/tool_memory/store.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,11 @@ impl ToolMemoryStore {
205205
let mut out = Vec::new();
206206
for summary in summaries {
207207
if let Some(tool) = summary.namespace.strip_prefix("tool-") {
208-
if !tool.is_empty() {
208+
// Exclude empty names and the sentinel used for unscoped
209+
// edicts captured before any tool call ran — those rules are
210+
// not permanently associated with a real tool and must not be
211+
// injected into prompt filtering for arbitrary sessions.
212+
if !tool.is_empty() && tool != "__unscoped__" {
209213
out.push(tool.to_string());
210214
}
211215
}

0 commit comments

Comments
 (0)