Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 2 additions & 77 deletions internal/agent/encoding/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1147,10 +1147,9 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto

// Gather contextual information for richer encoding
episodeCtx := ea.getEpisodeContext(ctx, raw)
relatedCtx := ea.getRelatedContext(ctx, raw)

// Build the LLM prompt
prompt := buildCompressionPrompt(truncatedContent, raw.Source, raw.Type, episodeCtx, relatedCtx, ea.coachingInstructions, ea.config.ConceptVocabulary)
prompt := buildCompressionPrompt(truncatedContent, raw.Source, raw.Type, episodeCtx, ea.coachingInstructions, ea.config.ConceptVocabulary)

req := llm.CompletionRequest{
Messages: []llm.Message{
Expand Down Expand Up @@ -1221,7 +1220,7 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto
// NOTE: The prompt deliberately avoids showing a JSON template because the local LLM model
// echoes template placeholder text verbatim into the output fields. Structured output
// (response_format with json_schema) enforces the JSON structure instead.
func buildCompressionPrompt(content, source, memType, episodeCtx, relatedCtx, coachingInstructions string, conceptVocabulary []string) string {
func buildCompressionPrompt(content, source, memType, episodeCtx, coachingInstructions string, conceptVocabulary []string) string {
var b strings.Builder

if source == "ingest" {
Expand Down Expand Up @@ -1268,10 +1267,6 @@ Fill in every JSON field based on the actual event content below:
if episodeCtx != "" {
b.WriteString(episodeCtx)
}
if relatedCtx != "" {
b.WriteString(relatedCtx)
}

if coachingInstructions != "" {
b.WriteString(coachingInstructions)
b.WriteString("\n\n")
Expand Down Expand Up @@ -1779,35 +1774,6 @@ func (ea *EncodingAgent) getEpisodeContext(ctx context.Context, raw store.RawMem
return result
}

// getRelatedContext gathers semantically similar existing memories for context.
func (ea *EncodingAgent) getRelatedContext(ctx context.Context, raw store.RawMemory) string {
// Use concept-based search with keywords from the raw content
words := extractKeywords(raw.Content)
if len(words) == 0 {
return ""
}

if len(words) > 5 {
words = words[:5]
}

related, err := ea.store.SearchByConcepts(ctx, words, 3)
if err != nil || len(related) == 0 {
return ""
}

result := "RELATED EXISTING MEMORIES:\n"
for _, mem := range related {
result += fmt.Sprintf(" - [%s] %s (concepts: %s)\n",
mem.Timestamp.Format("2006-01-02 15:04"),
mem.Summary,
joinConcepts(mem.Concepts),
)
}
result += "\n"
return result
}

// getEpisodeIDForRaw finds which episode a raw memory belongs to.
// Checks both open and recently closed episodes since encoding is async
// and the episode may close before encoding completes.
Expand Down Expand Up @@ -1836,47 +1802,6 @@ func getEpisodeIDForRaw(ea *EncodingAgent, ctx context.Context, raw store.RawMem
return ""
}

// extractKeywords pulls significant words from content for concept search.
func extractKeywords(content string) []string {
// Simple keyword extraction: split, filter short/common words
words := strings.Fields(strings.ToLower(content))
seen := make(map[string]bool)
var keywords []string

stopWords := map[string]bool{
"the": true, "a": true, "an": true, "is": true, "was": true,
"are": true, "were": true, "be": true, "been": true, "being": true,
"have": true, "has": true, "had": true, "do": true, "does": true,
"did": true, "will": true, "would": true, "could": true, "should": true,
"may": true, "might": true, "shall": true, "can": true, "to": true,
"of": true, "in": true, "for": true, "on": true, "with": true,
"at": true, "by": true, "from": true, "as": true, "into": true,
"through": true, "during": true, "before": true, "after": true,
"it": true, "its": true, "this": true, "that": true, "these": true,
"and": true, "but": true, "or": true, "nor": true, "not": true,
}

for _, w := range words {
if len(w) < 3 || stopWords[w] || seen[w] {
continue
}
seen[w] = true
keywords = append(keywords, w)
if len(keywords) >= 10 {
break
}
}
return keywords
}

// joinConcepts joins concepts with commas.
func joinConcepts(concepts []string) string {
if len(concepts) == 0 {
return "none"
}
return strings.Join(concepts, ", ")
}

// truncateString truncates a string to maxLen characters.
// Uses rune-aware slicing to avoid splitting multi-byte UTF-8 characters.
func truncateString(s string, maxLen int) string {
Expand Down
85 changes: 0 additions & 85 deletions internal/agent/encoding/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,91 +539,6 @@ func TestHeuristicSalience(t *testing.T) {
})
}

// ---------------------------------------------------------------------------
// Tests for extractKeywords
// ---------------------------------------------------------------------------

func TestExtractKeywords(t *testing.T) {
t.Run("extracts meaningful words", func(t *testing.T) {
keywords := extractKeywords("debugging the authentication module for error handling")

if len(keywords) == 0 {
t.Fatal("expected at least one keyword")
}
// Should not contain stop words
for _, kw := range keywords {
if kw == "the" || kw == "for" {
t.Errorf("unexpected stop word %q in keywords", kw)
}
}
})

t.Run("limits to 10 keywords", func(t *testing.T) {
longContent := strings.Repeat("alpha bravo charlie delta echo foxtrot golf hotel india juliet kilo lima ", 5)
keywords := extractKeywords(longContent)

if len(keywords) > 10 {
t.Errorf("expected at most 10 keywords, got %d", len(keywords))
}
})

t.Run("deduplicates words", func(t *testing.T) {
keywords := extractKeywords("testing testing testing testing")
count := 0
for _, kw := range keywords {
if kw == "testing" {
count++
}
}
if count > 1 {
t.Errorf("expected 'testing' to appear at most once, appeared %d times", count)
}
})

t.Run("empty content returns empty", func(t *testing.T) {
keywords := extractKeywords("")
if len(keywords) != 0 {
t.Errorf("expected empty keywords for empty content, got %v", keywords)
}
})

t.Run("filters short words", func(t *testing.T) {
keywords := extractKeywords("go is ok to do it")
for _, kw := range keywords {
if len(kw) < 3 {
t.Errorf("unexpected short word %q in keywords", kw)
}
}
})
}

// ---------------------------------------------------------------------------
// Tests for joinConcepts
// ---------------------------------------------------------------------------

func TestJoinConcepts(t *testing.T) {
t.Run("joins concepts with comma", func(t *testing.T) {
result := joinConcepts([]string{"go", "testing", "memory"})
if result != "go, testing, memory" {
t.Errorf("expected 'go, testing, memory', got %q", result)
}
})

t.Run("empty returns none", func(t *testing.T) {
result := joinConcepts([]string{})
if result != "none" {
t.Errorf("expected 'none', got %q", result)
}
})

t.Run("single concept", func(t *testing.T) {
result := joinConcepts([]string{"single"})
if result != "single" {
t.Errorf("expected 'single', got %q", result)
}
})
}

// ---------------------------------------------------------------------------
// Tests for isTemporalRelationship
// ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion internal/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type ServerConfig struct {
type ServerDeps struct {
Store store.Store
LLM llm.Provider
ModelManager llm.ModelManager // can be nil if not using embedded provider
ModelManager llm.ModelManager // can be nil if not using embedded provider
Bus events.Bus
Retriever *retrieval.RetrievalAgent
Consolidator routes.ConsolidationRunner // can be nil if disabled
Expand Down
35 changes: 27 additions & 8 deletions internal/mcp/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2596,39 +2596,58 @@ func (srv *MCPServer) handleListExclusions(ctx context.Context, args map[string]

// handleAmend updates a memory's content in place, preserving associations and history.
func (srv *MCPServer) handleAmend(ctx context.Context, args map[string]interface{}) (interface{}, error) {
memoryID, ok := args["memory_id"].(string)
if !ok || memoryID == "" {
return nil, fmt.Errorf("memory_id parameter is required")
rawID, _ := args["raw_id"].(string)
memoryID, _ := args["memory_id"].(string)

if rawID == "" && memoryID == "" {
return nil, fmt.Errorf("at least one of raw_id or memory_id is required")
}

correctedContent, ok := args["corrected_content"].(string)
if !ok || correctedContent == "" {
return nil, fmt.Errorf("corrected_content parameter is required")
}

// Resolve to encoded memory ID — try memory_id first, fall back to raw_id
var resolvedID string
if memoryID != "" {
if _, err := srv.store.GetMemory(ctx, memoryID); err == nil {
resolvedID = memoryID
}
}
if resolvedID == "" && rawID != "" {
m, err := srv.store.GetMemoryByRawID(ctx, rawID)
if err == nil {
resolvedID = m.ID
}
}
if resolvedID == "" {
return nil, fmt.Errorf("memory not found — check that the ID is correct (use check_memory to look up by raw_id)")
}

// Generate a simple summary (first 120 chars of content)
summary := correctedContent
if len(summary) > 120 {
summary = summary[:120] + "..."
}

// Use empty concepts and embedding — encoding agent can re-process if needed
if err := srv.store.AmendMemory(ctx, memoryID, correctedContent, summary, nil, nil); err != nil {
srv.log.Error("failed to amend memory", "memory_id", memoryID, "error", err)
if err := srv.store.AmendMemory(ctx, resolvedID, correctedContent, summary, nil, nil); err != nil {
srv.log.Error("failed to amend memory", "memory_id", resolvedID, "error", err)
return nil, fmt.Errorf("failed to amend memory: %w", err)
}

// Publish event
if srv.bus != nil {
_ = srv.bus.Publish(ctx, events.MemoryAmended{
MemoryID: memoryID,
MemoryID: resolvedID,
NewSummary: summary,
Ts: time.Now(),
})
}

srv.log.Info("memory amended", "memory_id", memoryID)
return toolResult(fmt.Sprintf("Amended memory %s. Content updated, associations and history preserved. Salience bumped +0.05.", memoryID)), nil
srv.log.Info("memory amended", "memory_id", resolvedID)
return toolResult(fmt.Sprintf("Amended memory %s. Content updated, associations and history preserved. Salience bumped +0.05.", resolvedID)), nil
}

// handleCheckMemory inspects a memory's encoding status, concepts, and associations.
Expand Down
10 changes: 7 additions & 3 deletions internal/mcp/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,20 +591,24 @@ func listExclusionsToolDef() ToolDefinition {
func amendToolDef() ToolDefinition {
return ToolDefinition{
Name: "amend",
Description: "Update a memory's content while preserving its ID, associations, activation history, and salience. Use when a recalled memory is stale or incorrect. Records an audit trail of the change.",
Description: "Update a memory's content while preserving its ID, associations, activation history, and salience. Use when a recalled memory is stale or incorrect. Records an audit trail of the change. Accepts either raw_id (from remember) or memory_id (encoded).",
InputSchema: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"memory_id": map[string]interface{}{
"type": "string",
"description": "The memory ID to amend",
"description": "The encoded memory ID to amend",
},
"raw_id": map[string]interface{}{
"type": "string",
"description": "The raw memory ID returned by remember — will be resolved to the encoded memory",
},
"corrected_content": map[string]interface{}{
"type": "string",
"description": "The updated memory content",
},
},
"required": []string{"memory_id", "corrected_content"},
"required": []string{"corrected_content"},
},
}
}
Expand Down
4 changes: 2 additions & 2 deletions internal/web/static/css/components.css
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,10 @@ blockquote.quote .quote-body {
font-size: 0.88rem;
font-weight: bold;
color: var(--text-dim);
background: linear-gradient(to bottom, rgba(92,114,184,0.08), rgba(92,114,184,0.02));
background: var(--bg-primary, #0f172a);
border-bottom: 1px solid var(--border-color);
position: sticky;
top: 30px;
top: 0;
z-index: 50;
display: flex;
justify-content: space-between;
Expand Down
5 changes: 4 additions & 1 deletion internal/web/static/js/timeline.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ export function renderTimelineItems() {
export function renderTimelineCard(item, idx) {
var kind = item._kind;
var salPct = Math.min(100, Math.round((item._salience || 0) * 100));
var absTime = item._date.toLocaleString(undefined, { hour: '2-digit', minute: '2-digit' });
var h = item._date.getHours(), m = item._date.getMinutes();
var ampm = h >= 12 ? 'PM' : 'AM';
h = h % 12 || 12;
var absTime = h + ':' + (m < 10 ? '0' : '') + m + ' ' + ampm;
var concepts = item._concepts || [];
var source = item._source || '';
var project = item._project || '';
Expand Down
17 changes: 17 additions & 0 deletions training/docs/experiment_registry.md
Original file line number Diff line number Diff line change
Expand Up @@ -1000,3 +1000,20 @@ Rotation parameter overhead per layer (rank=64):
- **Tracking:** GitHub issue #381 (Phase 4)
- **Result:** (pending — awaiting v7 gold-standard outputs from Gemini Batch API)
- **Verdict:** (pending)

### EXP-27: Qwen 3.5 4B — Model Scale Upgrade with V7 Data

- **Date:** 2026-04-09
- **Status:** REGISTERED
- **Hypothesis:** Qwen 3.5 4B (2560 hidden, 32 layers, 16/4 Q/KV heads) as the frozen base will match or exceed Qwen 3.5 2B spoke quality on encoding while providing a stronger foundation for multi-task spokes (synthesis, retrieval). The wider hidden dim and deeper architecture should improve faithfulness and generalization on diverse inputs without spoke architecture changes.
- **Variable:** Base model size (Qwen 3.5 2B → Qwen 3.5 4B). All other config matched to EXP-26.
- **Control:** EXP-26 (Qwen 3.5 2B, v7 data, same hardware). Direct comparison: same data, same spoke config (4 spokes, rank 64), same hyperparameters.
- **Prediction:** Faithfulness metrics match or exceed EXP-26 (EPR >90%, FR <5%, SC 100%). Eval loss ≤ EXP-26. Stress test 7/7. If 4B doesn't improve over 2B on encoding, the value is in multi-task spoke routing (synthesis/retrieval) where richer base representations matter.
- **Config:** Qwen 3.5 4B (frozen, bf16, ~8 GB) + 4 spokes rank 64 on all 32 layers (~33M trainable params, ~0.8% overhead), batch 1, grad_accum 8, seq_len 2375, LR 3e-4, scalar_lr_scale 0.1, Muon + AdamW, gradient_checkpointing, patience 5, eval_interval 200. Chunked cross-entropy (256 positions). Architecture note: 32 layers in 3:1 DeltaNet/attention ratio (24 DeltaNet + 8 full attention). Spokes applied to all 32 layers.
- **Data:** V7 dataset (same as EXP-26). Production prompt format via build_production_prompt(). Retokenized with Qwen 3.5 4B tokenizer (same tokenizer family, 248K vocab).
- **Hardware:** Local RX 7800 XT, 16GB VRAM, ROCm 7.2.1. Daemon stopped for training. VRAM budget: ~8 GB base (bf16) + ~132 MB spokes (fp32) + ~264 MB optimizer + activations (gradient checkpointing). Expected to fit within 16 GB.
- **Metrics:** Primary: 7-metric faithfulness eval (EPR, FR, TED, CCS, MIH, NP, SC). Secondary: eval loss/PPL, stress_test_hallucination.py (7/7 target), novel schema compliance. Tertiary: inference throughput (tok/s) at RQ4 via llama.cpp.
- **Inference plan:** Export via export_qwen35_spokes.py (now parameterized for any Qwen 3.5 size), quantize to RQ4 via rotorq_quantize_gguf.py, benchmark throughput on RX 7800 XT. Expected: ~2.25 GB weights (RQ4), ~60-70 tok/s.
- **Open question:** Should spokes be placed on all 32 layers, or only the 8 full-attention layers? DeltaNet layers use linear attention with recurrent state — spoke adaptation may not be needed there. Could test attention-only spoke placement as a follow-up (EXP-28).
- **Result:** (pending — blocked on EXP-26 completion)
- **Verdict:** (pending)
11 changes: 9 additions & 2 deletions training/scripts/export_qwen35_spokes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Export Qwen 3.5 2B + trained spoke weights to a single GGUF file.
"""Export Qwen 3.5 + trained spoke weights to a single GGUF file.

Two-phase approach: (1) convert the base HF model to GGUF using llama.cpp's
standard converter, then (2) patch the GGUF to add spoke tensors and metadata
Expand All @@ -12,6 +12,11 @@
--spokes checkpoints/exp20_v6_local/best_spokes.pt \
--output models/qwen35-2b-spokes-f16.gguf

python training/scripts/export_qwen35_spokes.py \
--model models/qwen3.5-4b \
--spokes checkpoints/exp27_v7_4b/best_spokes.pt \
--output models/qwen35-4b-spokes-f16.gguf

Requires: pip install gguf numpy torch (in the felixlm venv)
"""

Expand Down Expand Up @@ -107,7 +112,9 @@ def main():
print(f" Output: {output_path}")

# --- Phase 1: Convert base model to GGUF ---
base_gguf = output_path.parent / "qwen35-2b-f16.gguf"
# Derive base GGUF name from model directory (e.g., "qwen3.5-2b" -> "qwen35-2b-f16.gguf")
model_stem = model_path.name.replace(".", "") # "qwen3.5-4b" -> "qwen35-4b"
base_gguf = output_path.parent / f"{model_stem}-f16.gguf"
if not base_gguf.exists():
print(f"\nPhase 1: Converting base model to GGUF...")
converter = LLAMACPP_DIR / "convert_hf_to_gguf.py"
Expand Down
Loading