From 9e874ab87a123eeda74d12945df5d78472151226 Mon Sep 17 00:00:00 2001
From: Caleb Gross <caleb@grossventures.com>
Date: Thu, 9 Apr 2026 15:12:55 -0400
Subject: [PATCH] fix: encoding faithfulness + amend raw_id + dashboard
 timeline bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove getRelatedContext() from encoding pipeline — FTS5 keyword
  matching injected unrelated memory summaries into the LLM prompt,
  causing cross-contamination (#383). Also removes extractKeywords
  and joinConcepts (dead code after removal).

- amend tool now accepts raw_id in addition to memory_id, resolving
  via GetMemoryByRawID when memory_id lookup fails (#382). Mirrors
  the check_memory pattern.

- Dashboard: fix sticky "Today" header overlapping first timeline
  entry (top: 30px → 0, solid background). Fix time formatting
  producing single-digit minutes (manual zero-padding replaces
  locale-dependent toLocaleString).

- Sync Python training_constants.py with Go buildCompressionPrompt
  (remove related_ctx parameter). Remove RELATED_MEMORY_STUB from
  prepare_faithfulness_data.py.

Closes #382, closes #383

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/agent/encoding/agent.go              | 79 +----------------
 internal/agent/encoding/agent_test.go         | 85 -------------------
 internal/api/server.go                        |  2 +-
 internal/mcp/server.go                        | 35 ++++++--
 internal/mcp/tools.go                         | 10 ++-
 internal/web/static/css/components.css        |  4 +-
 internal/web/static/js/timeline.js            |  5 +-
 training/docs/experiment_registry.md          | 17 ++++
 training/scripts/export_qwen35_spokes.py      | 11 ++-
 training/scripts/prepare_faithfulness_data.py | 11 +--
 training/scripts/train_qwen_spokes.py         |  2 +-
 training/scripts/training_constants.py        |  3 -
 12 files changed, 71 insertions(+), 193 deletions(-)

diff --git a/internal/agent/encoding/agent.go b/internal/agent/encoding/agent.go
index cc215c8e..9bde7087 100644
--- a/internal/agent/encoding/agent.go
+++ b/internal/agent/encoding/agent.go
@@ -1147,10 +1147,9 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto
 
 	// Gather contextual information for richer encoding
 	episodeCtx := ea.getEpisodeContext(ctx, raw)
-	relatedCtx := ea.getRelatedContext(ctx, raw)
 
 	// Build the LLM prompt
-	prompt := buildCompressionPrompt(truncatedContent, raw.Source, raw.Type, episodeCtx, relatedCtx, ea.coachingInstructions, ea.config.ConceptVocabulary)
+	prompt := buildCompressionPrompt(truncatedContent, raw.Source, raw.Type, episodeCtx, ea.coachingInstructions, ea.config.ConceptVocabulary)
 
 	req := llm.CompletionRequest{
 		Messages: []llm.Message{
@@ -1221,7 +1220,7 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto
 // NOTE: The prompt deliberately avoids showing a JSON template because the local LLM model
 // echoes template placeholder text verbatim into the output fields. Structured output
 // (response_format with json_schema) enforces the JSON structure instead.
-func buildCompressionPrompt(content, source, memType, episodeCtx, relatedCtx, coachingInstructions string, conceptVocabulary []string) string {
+func buildCompressionPrompt(content, source, memType, episodeCtx, coachingInstructions string, conceptVocabulary []string) string {
 	var b strings.Builder
 
 	if source == "ingest" {
@@ -1268,10 +1267,6 @@ Fill in every JSON field based on the actual event content below:
 	if episodeCtx != "" {
 		b.WriteString(episodeCtx)
 	}
-	if relatedCtx != "" {
-		b.WriteString(relatedCtx)
-	}
-
 	if coachingInstructions != "" {
 		b.WriteString(coachingInstructions)
 		b.WriteString("\n\n")
@@ -1779,35 +1774,6 @@ func (ea *EncodingAgent) getEpisodeContext(ctx context.Context, raw store.RawMem
 	return result
 }
 
-// getRelatedContext gathers semantically similar existing memories for context.
-func (ea *EncodingAgent) getRelatedContext(ctx context.Context, raw store.RawMemory) string {
-	// Use concept-based search with keywords from the raw content
-	words := extractKeywords(raw.Content)
-	if len(words) == 0 {
-		return ""
-	}
-
-	if len(words) > 5 {
-		words = words[:5]
-	}
-
-	related, err := ea.store.SearchByConcepts(ctx, words, 3)
-	if err != nil || len(related) == 0 {
-		return ""
-	}
-
-	result := "RELATED EXISTING MEMORIES:\n"
-	for _, mem := range related {
-		result += fmt.Sprintf("  - [%s] %s (concepts: %s)\n",
-			mem.Timestamp.Format("2006-01-02 15:04"),
-			mem.Summary,
-			joinConcepts(mem.Concepts),
-		)
-	}
-	result += "\n"
-	return result
-}
-
 // getEpisodeIDForRaw finds which episode a raw memory belongs to.
 // Checks both open and recently closed episodes since encoding is async
 // and the episode may close before encoding completes.
@@ -1836,47 +1802,6 @@ func getEpisodeIDForRaw(ea *EncodingAgent, ctx context.Context, raw store.RawMem
 	return ""
 }
 
-// extractKeywords pulls significant words from content for concept search.
-func extractKeywords(content string) []string {
-	// Simple keyword extraction: split, filter short/common words
-	words := strings.Fields(strings.ToLower(content))
-	seen := make(map[string]bool)
-	var keywords []string
-
-	stopWords := map[string]bool{
-		"the": true, "a": true, "an": true, "is": true, "was": true,
-		"are": true, "were": true, "be": true, "been": true, "being": true,
-		"have": true, "has": true, "had": true, "do": true, "does": true,
-		"did": true, "will": true, "would": true, "could": true, "should": true,
-		"may": true, "might": true, "shall": true, "can": true, "to": true,
-		"of": true, "in": true, "for": true, "on": true, "with": true,
-		"at": true, "by": true, "from": true, "as": true, "into": true,
-		"through": true, "during": true, "before": true, "after": true,
-		"it": true, "its": true, "this": true, "that": true, "these": true,
-		"and": true, "but": true, "or": true, "nor": true, "not": true,
-	}
-
-	for _, w := range words {
-		if len(w) < 3 || stopWords[w] || seen[w] {
-			continue
-		}
-		seen[w] = true
-		keywords = append(keywords, w)
-		if len(keywords) >= 10 {
-			break
-		}
-	}
-	return keywords
-}
-
-// joinConcepts joins concepts with commas.
-func joinConcepts(concepts []string) string {
-	if len(concepts) == 0 {
-		return "none"
-	}
-	return strings.Join(concepts, ", ")
-}
-
 // truncateString truncates a string to maxLen characters.
 // Uses rune-aware slicing to avoid splitting multi-byte UTF-8 characters.
 func truncateString(s string, maxLen int) string {
diff --git a/internal/agent/encoding/agent_test.go b/internal/agent/encoding/agent_test.go
index 16f566d6..72f8a84b 100644
--- a/internal/agent/encoding/agent_test.go
+++ b/internal/agent/encoding/agent_test.go
@@ -539,91 +539,6 @@ func TestHeuristicSalience(t *testing.T) {
 	})
 }
 
-// ---------------------------------------------------------------------------
-// Tests for extractKeywords
-// ---------------------------------------------------------------------------
-
-func TestExtractKeywords(t *testing.T) {
-	t.Run("extracts meaningful words", func(t *testing.T) {
-		keywords := extractKeywords("debugging the authentication module for error handling")
-
-		if len(keywords) == 0 {
-			t.Fatal("expected at least one keyword")
-		}
-		// Should not contain stop words
-		for _, kw := range keywords {
-			if kw == "the" || kw == "for" {
-				t.Errorf("unexpected stop word %q in keywords", kw)
-			}
-		}
-	})
-
-	t.Run("limits to 10 keywords", func(t *testing.T) {
-		longContent := strings.Repeat("alpha bravo charlie delta echo foxtrot golf hotel india juliet kilo lima ", 5)
-		keywords := extractKeywords(longContent)
-
-		if len(keywords) > 10 {
-			t.Errorf("expected at most 10 keywords, got %d", len(keywords))
-		}
-	})
-
-	t.Run("deduplicates words", func(t *testing.T) {
-		keywords := extractKeywords("testing testing testing testing")
-		count := 0
-		for _, kw := range keywords {
-			if kw == "testing" {
-				count++
-			}
-		}
-		if count > 1 {
-			t.Errorf("expected 'testing' to appear at most once, appeared %d times", count)
-		}
-	})
-
-	t.Run("empty content returns empty", func(t *testing.T) {
-		keywords := extractKeywords("")
-		if len(keywords) != 0 {
-			t.Errorf("expected empty keywords for empty content, got %v", keywords)
-		}
-	})
-
-	t.Run("filters short words", func(t *testing.T) {
-		keywords := extractKeywords("go is ok to do it")
-		for _, kw := range keywords {
-			if len(kw) < 3 {
-				t.Errorf("unexpected short word %q in keywords", kw)
-			}
-		}
-	})
-}
-
-// ---------------------------------------------------------------------------
-// Tests for joinConcepts
-// ---------------------------------------------------------------------------
-
-func TestJoinConcepts(t *testing.T) {
-	t.Run("joins concepts with comma", func(t *testing.T) {
-		result := joinConcepts([]string{"go", "testing", "memory"})
-		if result != "go, testing, memory" {
-			t.Errorf("expected 'go, testing, memory', got %q", result)
-		}
-	})
-
-	t.Run("empty returns none", func(t *testing.T) {
-		result := joinConcepts([]string{})
-		if result != "none" {
-			t.Errorf("expected 'none', got %q", result)
-		}
-	})
-
-	t.Run("single concept", func(t *testing.T) {
-		result := joinConcepts([]string{"single"})
-		if result != "single" {
-			t.Errorf("expected 'single', got %q", result)
-		}
-	})
-}
-
 // ---------------------------------------------------------------------------
 // Tests for isTemporalRelationship
 // ---------------------------------------------------------------------------
diff --git a/internal/api/server.go b/internal/api/server.go
index 9ea6528d..8fab0a21 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -30,7 +30,7 @@ type ServerConfig struct {
 type ServerDeps struct {
 	Store                 store.Store
 	LLM                   llm.Provider
-	ModelManager          llm.ModelManager        // can be nil if not using embedded provider
+	ModelManager          llm.ModelManager // can be nil if not using embedded provider
 	Bus                   events.Bus
 	Retriever             *retrieval.RetrievalAgent
 	Consolidator          routes.ConsolidationRunner // can be nil if disabled
diff --git a/internal/mcp/server.go b/internal/mcp/server.go
index 4663e5a6..5372b66d 100644
--- a/internal/mcp/server.go
+++ b/internal/mcp/server.go
@@ -2596,9 +2596,11 @@ func (srv *MCPServer) handleListExclusions(ctx context.Context, args map[string]
 
 // handleAmend updates a memory's content in place, preserving associations and history.
 func (srv *MCPServer) handleAmend(ctx context.Context, args map[string]interface{}) (interface{}, error) {
-	memoryID, ok := args["memory_id"].(string)
-	if !ok || memoryID == "" {
-		return nil, fmt.Errorf("memory_id parameter is required")
+	rawID, _ := args["raw_id"].(string)
+	memoryID, _ := args["memory_id"].(string)
+
+	if rawID == "" && memoryID == "" {
+		return nil, fmt.Errorf("at least one of raw_id or memory_id is required")
 	}
 
 	correctedContent, ok := args["corrected_content"].(string)
@@ -2606,6 +2608,23 @@ func (srv *MCPServer) handleAmend(ctx context.Context, args map[string]interface
 		return nil, fmt.Errorf("corrected_content parameter is required")
 	}
 
+	// Resolve to encoded memory ID — try memory_id first, fall back to raw_id
+	var resolvedID string
+	if memoryID != "" {
+		if _, err := srv.store.GetMemory(ctx, memoryID); err == nil {
+			resolvedID = memoryID
+		}
+	}
+	if resolvedID == "" && rawID != "" {
+		m, err := srv.store.GetMemoryByRawID(ctx, rawID)
+		if err == nil {
+			resolvedID = m.ID
+		}
+	}
+	if resolvedID == "" {
+		return nil, fmt.Errorf("memory not found — check that the ID is correct (use check_memory to look up by raw_id)")
+	}
+
 	// Generate a simple summary (first 120 chars of content)
 	summary := correctedContent
 	if len(summary) > 120 {
@@ -2613,22 +2632,22 @@ func (srv *MCPServer) handleAmend(ctx context.Context, args map[string]interface
 	}
 
 	// Use empty concepts and embedding — encoding agent can re-process if needed
-	if err := srv.store.AmendMemory(ctx, memoryID, correctedContent, summary, nil, nil); err != nil {
-		srv.log.Error("failed to amend memory", "memory_id", memoryID, "error", err)
+	if err := srv.store.AmendMemory(ctx, resolvedID, correctedContent, summary, nil, nil); err != nil {
+		srv.log.Error("failed to amend memory", "memory_id", resolvedID, "error", err)
 		return nil, fmt.Errorf("failed to amend memory: %w", err)
 	}
 
 	// Publish event
 	if srv.bus != nil {
 		_ = srv.bus.Publish(ctx, events.MemoryAmended{
-			MemoryID:   memoryID,
+			MemoryID:   resolvedID,
 			NewSummary: summary,
 			Ts:         time.Now(),
 		})
 	}
 
-	srv.log.Info("memory amended", "memory_id", memoryID)
-	return toolResult(fmt.Sprintf("Amended memory %s. Content updated, associations and history preserved. Salience bumped +0.05.", memoryID)), nil
+	srv.log.Info("memory amended", "memory_id", resolvedID)
+	return toolResult(fmt.Sprintf("Amended memory %s. Content updated, associations and history preserved. Salience bumped +0.05.", resolvedID)), nil
 }
 
 // handleCheckMemory inspects a memory's encoding status, concepts, and associations.
diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go
index 6bb344b5..99c74338 100644
--- a/internal/mcp/tools.go
+++ b/internal/mcp/tools.go
@@ -591,20 +591,24 @@ func listExclusionsToolDef() ToolDefinition {
 func amendToolDef() ToolDefinition {
 	return ToolDefinition{
 		Name:        "amend",
-		Description: "Update a memory's content while preserving its ID, associations, activation history, and salience. Use when a recalled memory is stale or incorrect. Records an audit trail of the change.",
+		Description: "Update a memory's content while preserving its ID, associations, activation history, and salience. Use when a recalled memory is stale or incorrect. Records an audit trail of the change. Accepts either raw_id (from remember) or memory_id (encoded).",
 		InputSchema: map[string]interface{}{
 			"type": "object",
 			"properties": map[string]interface{}{
 				"memory_id": map[string]interface{}{
 					"type":        "string",
-					"description": "The memory ID to amend",
+					"description": "The encoded memory ID to amend",
+				},
+				"raw_id": map[string]interface{}{
+					"type":        "string",
+					"description": "The raw memory ID returned by remember — will be resolved to the encoded memory",
 				},
 				"corrected_content": map[string]interface{}{
 					"type":        "string",
 					"description": "The updated memory content",
 				},
 			},
-			"required": []string{"memory_id", "corrected_content"},
+			"required": []string{"corrected_content"},
 		},
 	}
 }
diff --git a/internal/web/static/css/components.css b/internal/web/static/css/components.css
index ac1bb01f..fee06eb5 100644
--- a/internal/web/static/css/components.css
+++ b/internal/web/static/css/components.css
@@ -466,10 +466,10 @@ blockquote.quote .quote-body {
     font-size: 0.88rem;
     font-weight: bold;
     color: var(--text-dim);
-    background: linear-gradient(to bottom, rgba(92,114,184,0.08), rgba(92,114,184,0.02));
+    background: var(--bg-primary, #0f172a);
     border-bottom: 1px solid var(--border-color);
     position: sticky;
-    top: 30px;
+    top: 0;
     z-index: 50;
     display: flex;
     justify-content: space-between;
diff --git a/internal/web/static/js/timeline.js b/internal/web/static/js/timeline.js
index d4bbb429..1cab3b0a 100644
--- a/internal/web/static/js/timeline.js
+++ b/internal/web/static/js/timeline.js
@@ -158,7 +158,10 @@ export function renderTimelineItems() {
 export function renderTimelineCard(item, idx) {
     var kind = item._kind;
     var salPct = Math.min(100, Math.round((item._salience || 0) * 100));
-    var absTime = item._date.toLocaleString(undefined, { hour: '2-digit', minute: '2-digit' });
+    var h = item._date.getHours(), m = item._date.getMinutes();
+    var ampm = h >= 12 ? 'PM' : 'AM';
+    h = h % 12 || 12;
+    var absTime = h + ':' + (m < 10 ? '0' : '') + m + ' ' + ampm;
     var concepts = item._concepts || [];
     var source = item._source || '';
     var project = item._project || '';
diff --git a/training/docs/experiment_registry.md b/training/docs/experiment_registry.md
index a5a9e144..3788889b 100644
--- a/training/docs/experiment_registry.md
+++ b/training/docs/experiment_registry.md
@@ -1000,3 +1000,20 @@ Rotation parameter overhead per layer (rank=64):
 - **Tracking:** GitHub issue #381 (Phase 4)
 - **Result:** (pending — awaiting v7 gold-standard outputs from Gemini Batch API)
 - **Verdict:** (pending)
+
+### EXP-27: Qwen 3.5 4B — Model Scale Upgrade with V7 Data
+
+- **Date:** 2026-04-09
+- **Status:** REGISTERED
+- **Hypothesis:** Qwen 3.5 4B (2560 hidden, 32 layers, 16/4 Q/KV heads) as the frozen base will match or exceed Qwen 3.5 2B spoke quality on encoding while providing a stronger foundation for multi-task spokes (synthesis, retrieval). The wider hidden dim and deeper architecture should improve faithfulness and generalization on diverse inputs without spoke architecture changes.
+- **Variable:** Base model size (Qwen 3.5 2B → Qwen 3.5 4B). All other config matched to EXP-26.
+- **Control:** EXP-26 (Qwen 3.5 2B, v7 data, same hardware). Direct comparison: same data, same spoke config (4 spokes, rank 64), same hyperparameters.
+- **Prediction:** Faithfulness metrics match or exceed EXP-26 (EPR >90%, FR <5%, SC 100%). Eval loss ≤ EXP-26. Stress test 7/7. If 4B doesn't improve over 2B on encoding, the value is in multi-task spoke routing (synthesis/retrieval) where richer base representations matter.
+- **Config:** Qwen 3.5 4B (frozen, bf16, ~8 GB) + 4 spokes rank 64 on all 32 layers (~33M trainable params, ~0.8% overhead), batch 1, grad_accum 8, seq_len 2375, LR 3e-4, scalar_lr_scale 0.1, Muon + AdamW, gradient_checkpointing, patience 5, eval_interval 200. Chunked cross-entropy (256 positions). Architecture note: 32 layers in 3:1 DeltaNet/attention ratio (24 DeltaNet + 8 full attention). Spokes applied to all 32 layers.
+- **Data:** V7 dataset (same as EXP-26). Production prompt format via build_production_prompt(). Retokenized with Qwen 3.5 4B tokenizer (same tokenizer family, 248K vocab).
+- **Hardware:** Local RX 7800 XT, 16GB VRAM, ROCm 7.2.1. Daemon stopped for training. VRAM budget: ~8 GB base (bf16) + ~132 MB spokes (fp32) + ~264 MB optimizer + activations (gradient checkpointing). Expected to fit within 16 GB.
+- **Metrics:** Primary: 7-metric faithfulness eval (EPR, FR, TED, CCS, MIH, NP, SC). Secondary: eval loss/PPL, stress_test_hallucination.py (7/7 target), novel schema compliance. Tertiary: inference throughput (tok/s) at RQ4 via llama.cpp.
+- **Inference plan:** Export via export_qwen35_spokes.py (now parameterized for any Qwen 3.5 size), quantize to RQ4 via rotorq_quantize_gguf.py, benchmark throughput on RX 7800 XT. Expected: ~2.25 GB weights (RQ4), ~60-70 tok/s.
+- **Open question:** Should spokes be placed on all 32 layers, or only the 8 full-attention layers? DeltaNet layers use linear attention with recurrent state — spoke adaptation may not be needed there. Could test attention-only spoke placement as a follow-up (EXP-28).
+- **Result:** (pending — blocked on EXP-26 completion)
+- **Verdict:** (pending)
diff --git a/training/scripts/export_qwen35_spokes.py b/training/scripts/export_qwen35_spokes.py
index 1e46b015..0d229e94 100644
--- a/training/scripts/export_qwen35_spokes.py
+++ b/training/scripts/export_qwen35_spokes.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Export Qwen 3.5 2B + trained spoke weights to a single GGUF file.
+"""Export Qwen 3.5 + trained spoke weights to a single GGUF file.
 
 Two-phase approach: (1) convert the base HF model to GGUF using llama.cpp's
 standard converter, then (2) patch the GGUF to add spoke tensors and metadata
@@ -12,6 +12,11 @@
         --spokes checkpoints/exp20_v6_local/best_spokes.pt \
         --output models/qwen35-2b-spokes-f16.gguf
 
+    python training/scripts/export_qwen35_spokes.py \
+        --model models/qwen3.5-4b \
+        --spokes checkpoints/exp27_v7_4b/best_spokes.pt \
+        --output models/qwen35-4b-spokes-f16.gguf
+
 Requires: pip install gguf numpy torch (in the felixlm venv)
 """
 
@@ -107,7 +112,9 @@ def main():
     print(f"  Output:  {output_path}")
 
     # --- Phase 1: Convert base model to GGUF ---
-    base_gguf = output_path.parent / "qwen35-2b-f16.gguf"
+    # Derive base GGUF name from model directory (e.g., "qwen3.5-2b" -> "qwen35-2b-f16.gguf")
+    model_stem = model_path.name.replace(".", "")  # "qwen3.5-4b" -> "qwen35-4b"
+    base_gguf = output_path.parent / f"{model_stem}-f16.gguf"
     if not base_gguf.exists():
         print(f"\nPhase 1: Converting base model to GGUF...")
         converter = LLAMACPP_DIR / "convert_hf_to_gguf.py"
diff --git a/training/scripts/prepare_faithfulness_data.py b/training/scripts/prepare_faithfulness_data.py
index 345f6185..333cad7a 100644
--- a/training/scripts/prepare_faithfulness_data.py
+++ b/training/scripts/prepare_faithfulness_data.py
@@ -38,13 +38,7 @@
     "(make test), 1 MCP remember call.\n\n"
 )
 
-RELATED_MEMORY_STUB = (
-    "RELATED EXISTING MEMORIES (for context, do not copy into encoding):\n"
-    "- [mem-001] Decision: chose SQLite over Postgres for local-first simplicity\n"
-    "- [mem-002] Insight: spread activation with decay 0.7 limits distant associations\n\n"
-)
-
-# Ids that get episode + related context (per issue spec: 2 of 25)
+# Ids that get episode context (per issue spec: 2 of 25)
 CONTEXT_IDS = {3, 18}
 
 
@@ -110,14 +104,11 @@ def format_for_training(
 
     # Build the production-format user prompt
     episode_ctx = EPISODE_CONTEXT_STUB if entry_id in CONTEXT_IDS else ""
-    related_ctx = RELATED_MEMORY_STUB if entry_id in CONTEXT_IDS else ""
-
     user_prompt = build_production_prompt(
         content=raw_input,
         source=source,
         mem_type=mem_type,
         episode_ctx=episode_ctx,
-        related_ctx=related_ctx,
     )
 
     # The assistant response is the gold JSON
diff --git a/training/scripts/train_qwen_spokes.py b/training/scripts/train_qwen_spokes.py
index 3798d85e..71998ab1 100644
--- a/training/scripts/train_qwen_spokes.py
+++ b/training/scripts/train_qwen_spokes.py
@@ -226,7 +226,7 @@ def train(args):
     ModelClass = GemmaWithSpokes if model_type == "gemma" else QwenWithSpokes
     extra_kwargs = {}
     if model_type == "qwen":
-        extra_kwargs["attn_implementation"] = "eager"  # Flash attention may not work with hooks
+        extra_kwargs["attn_implementation"] = "sdpa"  # Memory-efficient attention (SpokeWrappedLayer is SDPA-compatible)
     if model_type == "gemma" and not args.gradient_checkpointing:
         # No gradient checkpointing implies high-VRAM hardware — skip NF4 and PLE offload
         extra_kwargs["no_quantize"] = True
diff --git a/training/scripts/training_constants.py b/training/scripts/training_constants.py
index b2d24689..5ae3f7e4 100644
--- a/training/scripts/training_constants.py
+++ b/training/scripts/training_constants.py
@@ -102,7 +102,6 @@ def build_production_prompt(
     source: str = "mcp",
     mem_type: str = "general",
     episode_ctx: str = "",
-    related_ctx: str = "",
     coaching_instructions: str = "",
     concept_vocabulary: list[str] | None = None,
 ) -> str:
@@ -162,8 +161,6 @@ def build_production_prompt(
 
     if episode_ctx:
         parts.append(episode_ctx)
-    if related_ctx:
-        parts.append(related_ctx)
     if coaching_instructions:
         parts.append(coaching_instructions)
         parts.append("\n\n")