From ecc6f945e57f02dfeb1c69377d811ac898bdcf5d Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Sat, 21 Mar 2026 18:49:24 -0400 Subject: [PATCH] fix: NULL raw_id crash, feedback bloat, and runtime metrics (#332, #333, #334) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #334 — Fix scanMemoryFrom crash on NULL raw_id: - Use sql.NullString for raw_id in scanMemoryFrom (same pattern as all other nullable columns). Prevents "converting NULL to string" crash when memories have NULL raw_id from consolidation merges or imports. - Add raw_id fallback in backup import: set raw_id = id if empty. #333 — Fix retrieval_feedback table bloat: - Null out traversed_assocs and access_snapshot after feedback is applied in HandleFeedback. These fields are only needed for Hebbian adjustment and are never read again after. - Add PruneOldFeedback store method (30-day TTL) called during consolidation Step 10. Prevents unbounded table growth. #332 — Add runtime resource metrics to health API: - HealthResponse now includes heap_alloc_mb, heap_sys_mb, goroutines, gc_pause_total_ms, uptime_seconds, db_size_mb. - Periodic disk HealthReport includes heap_alloc_mb, goroutines, db_size_mb. - ServerDeps gains StartTime for uptime calculation. Closes #332, closes #333, closes #334. Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/mnemonic/main.go | 1 + internal/agent/consolidation/agent.go | 8 +++ internal/agent/orchestrator/orchestrator.go | 10 ++++ internal/api/routes/feedback.go | 5 ++ internal/api/routes/routes_test.go | 6 +-- internal/api/routes/system.go | 56 ++++++++++++++------- internal/api/server.go | 3 +- internal/backup/import.go | 4 ++ internal/store/sqlite/sqlite.go | 19 ++++++- internal/store/store.go | 1 + internal/store/storetest/mock.go | 1 + 11 files changed, 92 insertions(+), 22 deletions(-) diff --git a/cmd/mnemonic/main.go b/cmd/mnemonic/main.go index 9f4f96a8..d4d52770 100644 --- a/cmd/mnemonic/main.go +++ b/cmd/mnemonic/main.go @@ -1703,6 +1703,7 @@ func serveCommand(configPath string) { ServiceRestarter: daemon.NewServiceManager(), PIDRestart: daemon.PIDRestart, MCPToolCount: mcp.ToolCount(), + StartTime: time.Now(), Log: log, } // Only set Consolidator if it's non-nil (avoids Go nil-interface trap) diff --git a/internal/agent/consolidation/agent.go b/internal/agent/consolidation/agent.go index 1db1c6ba..b8f4d6cd 100644 --- a/internal/agent/consolidation/agent.go +++ b/internal/agent/consolidation/agent.go @@ -266,6 +266,7 @@ type CycleReport struct { PatternsDecayed int PatternsDeduplicated int NeverRecalledArchived int + FeedbackPruned int } // runCycle executes the full consolidation pipeline. @@ -366,6 +367,13 @@ func (ca *ConsolidationAgent) runCycle(ctx context.Context) (*CycleReport, error } report.PatternsDeduplicated = patternsDeduped + // Step 10: Prune old retrieval feedback records (30-day TTL) + feedbackPruned, err := ca.store.PruneOldFeedback(ctx, 30*24*time.Hour) + if err != nil { + ca.log.Warn("feedback pruning failed", "error", err) + } + report.FeedbackPruned = feedbackPruned + // Record the cycle report.Duration = time.Since(startTime) if err := ca.recordCycle(ctx, report); err != nil { diff --git a/internal/agent/orchestrator/orchestrator.go b/internal/agent/orchestrator/orchestrator.go index 8d52e53b..62a9cf76 100644 --- a/internal/agent/orchestrator/orchestrator.go +++ b/internal/agent/orchestrator/orchestrator.go @@ -7,6 +7,7 @@ import ( "log/slog" "os" "path/filepath" + "runtime" "sync" "time" @@ -39,6 +40,9 @@ type HealthReport struct { LastConsolidation string `json:"last_consolidation"` LastDreamCycle string `json:"last_dream_cycle"` AutonomousActions int `json:"autonomous_actions_total"` + HeapAllocMB float64 `json:"heap_alloc_mb"` + Goroutines int `json:"goroutines"` + DBSizeMB float64 `json:"db_size_mb"` Warnings []string `json:"warnings,omitempty"` } @@ -374,6 +378,9 @@ func (o *Orchestrator) writeHealthReport() { } o.mu.Lock() + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + report := HealthReport{ Timestamp: time.Now(), Uptime: time.Since(o.startTime).Round(time.Second).String(), @@ -384,6 +391,9 @@ func (o *Orchestrator) writeHealthReport() { AbstractionCount: len(level2) + len(level3), LastConsolidation: lastConsolidation, AutonomousActions: o.autonomousCount, + HeapAllocMB: float64(memStats.HeapAlloc) / (1024 * 1024), + Goroutines: runtime.NumGoroutine(), + DBSizeMB: float64(stats.StorageSizeBytes) / (1024 * 1024), Warnings: append([]string{}, o.warnings...), AgentStatus: map[string]string{ "orchestrator": "running", diff --git a/internal/api/routes/feedback.go b/internal/api/routes/feedback.go index 47e05fa6..4d2f979c 100644 --- a/internal/api/routes/feedback.go +++ b/internal/api/routes/feedback.go @@ -166,6 +166,11 @@ func HandleFeedback(s store.Store, log *slog.Logger) http.HandlerFunc { } } + // Prune bulky traversal data now that feedback has been applied + fb.TraversedAssocs = nil + fb.AccessSnapshot = nil + _ = s.WriteRetrievalFeedback(ctx, fb) + log.Info("feedback recorded", "query_id", req.QueryID, "quality", req.Quality, diff --git a/internal/api/routes/routes_test.go b/internal/api/routes/routes_test.go index e565bbcf..53a89602 100644 --- a/internal/api/routes/routes_test.go +++ b/internal/api/routes/routes_test.go @@ -590,7 +590,7 @@ func TestHandleHealthCheck(t *testing.T) { }, } llmProv := &mockLLMProvider{} - handler := HandleHealth(ms, llmProv, "test", 23, testLogger()) + handler := HandleHealth(ms, llmProv, "test", 23, time.Now(), testLogger()) req := httptest.NewRequest(http.MethodGet, "/health", nil) rr := httptest.NewRecorder() @@ -629,7 +629,7 @@ func TestHandleHealthCheck(t *testing.T) { }, } llmProv := &failingLLMProvider{} - handler := HandleHealth(ms, llmProv, "test", 23, testLogger()) + handler := HandleHealth(ms, llmProv, "test", 23, time.Now(), testLogger()) req := httptest.NewRequest(http.MethodGet, "/health", nil) rr := httptest.NewRecorder() @@ -662,7 +662,7 @@ func TestHandleHealthCheck(t *testing.T) { }, } llmProv := &mockLLMProvider{} - handler := HandleHealth(ms, llmProv, "test", 23, testLogger()) + handler := HandleHealth(ms, llmProv, "test", 23, time.Now(), testLogger()) req := httptest.NewRequest(http.MethodGet, "/health", nil) rr := httptest.NewRecorder() diff --git a/internal/api/routes/system.go b/internal/api/routes/system.go index 700e4efc..ea44f221 100644 --- a/internal/api/routes/system.go +++ b/internal/api/routes/system.go @@ -4,6 +4,7 @@ import ( "context" "log/slog" "net/http" + "runtime" "time" "github.com/appsprout-dev/mnemonic/internal/llm" @@ -12,20 +13,26 @@ import ( // HealthResponse is the JSON response for the health check endpoint. type HealthResponse struct { - Status string `json:"status"` - Version string `json:"version,omitempty"` - LLMAvailable bool `json:"llm_available"` - LLMModel string `json:"llm_model,omitempty"` - StoreHealthy bool `json:"store_healthy"` - MemoryCount int `json:"memory_count"` - ToolCount int `json:"tool_count"` - Timestamp string `json:"timestamp"` + Status string `json:"status"` + Version string `json:"version,omitempty"` + LLMAvailable bool `json:"llm_available"` + LLMModel string `json:"llm_model,omitempty"` + StoreHealthy bool `json:"store_healthy"` + MemoryCount int `json:"memory_count"` + ToolCount int `json:"tool_count"` + HeapAllocMB float64 `json:"heap_alloc_mb"` + HeapSysMB float64 `json:"heap_sys_mb"` + Goroutines int `json:"goroutines"` + GCPauseTotalMs float64 `json:"gc_pause_total_ms"` + UptimeSeconds int64 `json:"uptime_seconds"` + DBSizeMB float64 `json:"db_size_mb"` + Timestamp string `json:"timestamp"` } // HandleHealth returns an HTTP handler that performs a health check. // Checks LLM availability with 2s timeout and store health. // Returns 200 with health status JSON. -func HandleHealth(s store.Store, llmProv llm.Provider, version string, toolCount int, log *slog.Logger) http.HandlerFunc { +func HandleHealth(s store.Store, llmProv llm.Provider, version string, toolCount int, startTime time.Time, log *slog.Logger) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { log.Debug("health check requested") @@ -64,15 +71,30 @@ func HandleHealth(s store.Store, llmProv llm.Provider, version string, toolCount status = "degraded" } + // Runtime metrics + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + + var dbSizeMB float64 + if stats, err := s.GetStatistics(storeCtx); err == nil { + dbSizeMB = float64(stats.StorageSizeBytes) / (1024 * 1024) + } + resp := HealthResponse{ - Status: status, - Version: version, - LLMAvailable: llmAvailable, - LLMModel: llmModel, - StoreHealthy: storeHealthy, - MemoryCount: memoryCount, - ToolCount: toolCount, - Timestamp: time.Now().UTC().Format(time.RFC3339), + Status: status, + Version: version, + LLMAvailable: llmAvailable, + LLMModel: llmModel, + StoreHealthy: storeHealthy, + MemoryCount: memoryCount, + ToolCount: toolCount, + HeapAllocMB: float64(memStats.HeapAlloc) / (1024 * 1024), + HeapSysMB: float64(memStats.HeapSys) / (1024 * 1024), + Goroutines: runtime.NumGoroutine(), + GCPauseTotalMs: float64(memStats.PauseTotalNs) / 1e6, + UptimeSeconds: int64(time.Since(startTime).Seconds()), + DBSizeMB: dbSizeMB, + Timestamp: time.Now().UTC().Format(time.RFC3339), } log.Info("health check completed", "status", status, "llm_available", llmAvailable, "store_healthy", storeHealthy, "memory_count", memoryCount) diff --git a/internal/api/server.go b/internal/api/server.go index 6ed70e15..6eff042a 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -41,6 +41,7 @@ type ServerDeps struct { ServiceRestarter routes.ServiceRestarter // can be nil if not installed as service PIDRestart routes.PIDRestartFunc // fallback restart when service manager unavailable MCPToolCount int // number of registered MCP tools + StartTime time.Time // daemon start time for uptime calculation Log *slog.Logger } @@ -79,7 +80,7 @@ func NewServer(cfg ServerConfig, deps ServerDeps) *Server { // registerRoutes registers all API routes with the mux. func (s *Server) registerRoutes() { // Health and stats - s.mux.HandleFunc("GET /api/v1/health", routes.HandleHealth(s.deps.Store, s.deps.LLM, s.deps.Version, s.deps.MCPToolCount, s.deps.Log)) + s.mux.HandleFunc("GET /api/v1/health", routes.HandleHealth(s.deps.Store, s.deps.LLM, s.deps.Version, s.deps.MCPToolCount, s.deps.StartTime, s.deps.Log)) s.mux.HandleFunc("GET /api/v1/stats", routes.HandleStats(s.deps.Store, s.deps.Log)) // Self-update diff --git a/internal/backup/import.go b/internal/backup/import.go index fc41ff72..8ba88575 100644 --- a/internal/backup/import.go +++ b/internal/backup/import.go @@ -48,6 +48,10 @@ func ImportFromJSON(ctx context.Context, s store.Store, filePath string, mode Im // Import memories for _, memory := range exportData.Memories { + // Ensure raw_id is never empty — use id as fallback + if memory.RawID == "" { + memory.RawID = memory.ID + } if err := s.WriteMemory(ctx, memory); err != nil { result.SkippedDuplicates++ } else { diff --git a/internal/store/sqlite/sqlite.go b/internal/store/sqlite/sqlite.go index 2b9ecc80..be1b2455 100644 --- a/internal/store/sqlite/sqlite.go +++ b/internal/store/sqlite/sqlite.go @@ -353,6 +353,7 @@ const memoryColumns = `id, raw_id, timestamp, type, content, summary, concepts, // scanMemory scans a memory row from the database. func scanMemoryFrom(s scanner) (store.Memory, error) { var mem store.Memory + var rawID sql.NullString var memType sql.NullString var conceptsStr sql.NullString var embeddingBlob []byte @@ -365,7 +366,7 @@ func scanMemoryFrom(s scanner) (store.Memory, error) { var recallSuppressed int err := s.Scan( &mem.ID, - &mem.RawID, + &rawID, &mem.Timestamp, &memType, &mem.Content, @@ -391,6 +392,8 @@ func scanMemoryFrom(s scanner) (store.Memory, error) { return mem, err } + mem.RawID = rawID.String + // Decode concepts if conceptsStr.Valid && conceptsStr.String != "" { concepts, err := decodeStringSlice(conceptsStr.String) @@ -2172,6 +2175,20 @@ func (s *SQLiteStore) ListRecentRetrievalFeedback(ctx context.Context, since tim return results, rows.Err() } +// PruneOldFeedback deletes retrieval_feedback records older than the given duration. +func (s *SQLiteStore) PruneOldFeedback(ctx context.Context, olderThan time.Duration) (int, error) { + cutoff := time.Now().Add(-olderThan).Format(time.RFC3339) + result, err := s.db.ExecContext(ctx, `DELETE FROM retrieval_feedback WHERE created_at < ?`, cutoff) + if err != nil { + return 0, fmt.Errorf("prune old feedback: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return 0, fmt.Errorf("prune old feedback rows affected: %w", err) + } + return int(rows), nil +} + // GetMemoryFeedbackScores computes a normalized feedback score for each memory ID // by scanning retrieval_feedback rows where the memory appears in retrieved_memory_ids. // "helpful" = +1, "irrelevant" = -1, "partial" = 0. Returns sum/count per memory. diff --git a/internal/store/store.go b/internal/store/store.go index 51caad97..88858f79 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -443,6 +443,7 @@ type Store interface { WriteRetrievalFeedback(ctx context.Context, fb RetrievalFeedback) error GetRetrievalFeedback(ctx context.Context, queryID string) (RetrievalFeedback, error) ListRecentRetrievalFeedback(ctx context.Context, since time.Time, limit int) ([]RetrievalFeedback, error) + PruneOldFeedback(ctx context.Context, olderThan time.Duration) (int, error) // GetMemoryFeedbackScores computes a normalized feedback score for each memory ID // based on retrieval_feedback records. "helpful" = +1, "irrelevant" = -1, "partial" = 0. // Returns sum/count per memory, so scores range from -1.0 to +1.0. diff --git a/internal/store/storetest/mock.go b/internal/store/storetest/mock.go index 18db1b72..92487417 100644 --- a/internal/store/storetest/mock.go +++ b/internal/store/storetest/mock.go @@ -177,6 +177,7 @@ func (MockStore) GetRetrievalFeedback(context.Context, string) (store.RetrievalF func (MockStore) ListRecentRetrievalFeedback(context.Context, time.Time, int) ([]store.RetrievalFeedback, error) { return nil, nil } +func (MockStore) PruneOldFeedback(context.Context, time.Duration) (int, error) { return 0, nil } func (MockStore) GetMemoryFeedbackScores(context.Context, []string) (map[string]float32, error) { return nil, nil }