Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions TECHNICAL_DEBT.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Technical Debt

## Privacy / Secret Detection (2026-03-12)

### ~~P1: privacy/secrets.go functions never called from production code~~ — RESOLVED 2026-03-12
`ContainsSecrets()`, `RedactSecrets()`, and `SanitizeObservation()` in `internal/privacy/secrets.go`
were fully implemented and tested but unreachable from any production path.

**Resolution:**
- `internal/mcp/tools_memory.go` `handleStoreMemory`: changed from hard-reject to warn-and-redact.
Content containing secrets is now logged at WARN level and redacted via `RedactSecrets()` before storage.
- `internal/worker/handlers_ingest.go` `handleIngestEvent`: secret detection added on `toolInputStr` and
`toolResultStr` immediately after stringification, before any pipeline processing. Secrets are redacted
in-memory; the raw event stored in `raw_events` may still contain original data (source-of-truth store).


## Credential Storage (2026-03-12)

### ~~S2: vault_status missing key_source field~~ — RESOLVED 2026-03-12
Expand Down
6 changes: 6 additions & 0 deletions internal/mcp/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ import (
"strings"
"time"

"sync"

"github.com/thebtf/engram/internal/chunking"
"github.com/thebtf/engram/internal/collections"
"github.com/thebtf/engram/internal/consolidation"
"github.com/thebtf/engram/internal/crypto"
"github.com/thebtf/engram/internal/db/gorm"
"github.com/thebtf/engram/internal/embedding"
graphpkg "github.com/thebtf/engram/internal/graph"
Expand Down Expand Up @@ -47,6 +50,9 @@ type Server struct {
embedSvc *embedding.Service
chunkManager *chunking.Manager
graphStore graphpkg.GraphStore
vault *crypto.Vault
vaultInitErr error
vaultOnce sync.Once
backfillStatusFunc func() (any, error)
version string
}
Expand Down
34 changes: 12 additions & 22 deletions internal/mcp/tools_credential.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,25 @@ import (
"context"
"encoding/json"
"fmt"
"sync"

"github.com/rs/zerolog/log"
"github.com/thebtf/engram/internal/config"
"github.com/thebtf/engram/internal/crypto"
"github.com/thebtf/engram/pkg/models"
)

// Vault singleton — initialized lazily on first credential operation.
// If initialization fails (missing key file, invalid key), the error is permanent.
// Restart the server after fixing the key configuration to retry.
// Test isolation: vault state is package-level; tests that trigger getVault()
// with failing config will poison all subsequent tests in the same binary.
var (
sharedVault *crypto.Vault
vaultInitErr error
vaultOnce sync.Once
)

// getVault returns the shared Vault, initializing it lazily on first call.
func getVault() (*crypto.Vault, error) {
vaultOnce.Do(func() {
// getVault returns the Server's Vault, initializing it lazily on first call.
// The vault is a singleton within the server instance; initialization errors
// are permanent — restart the server after fixing key configuration to retry.
func (s *Server) getVault() (*crypto.Vault, error) {
s.vaultOnce.Do(func() {
cfg := config.Get()
sharedVault, vaultInitErr = crypto.NewVault(cfg)
if vaultInitErr != nil {
log.Error().Err(vaultInitErr).Msg("vault: failed to initialize")
s.vault, s.vaultInitErr = crypto.NewVault(cfg)
if s.vaultInitErr != nil {
log.Error().Err(s.vaultInitErr).Msg("vault: failed to initialize")
}
})
return sharedVault, vaultInitErr
return s.vault, s.vaultInitErr
}

// handleStoreCredential encrypts and stores a credential observation.
Expand Down Expand Up @@ -70,7 +60,7 @@ func (s *Server) handleStoreCredential(ctx context.Context, args json.RawMessage
return "", fmt.Errorf("project is required for project-scoped credentials")
}

v, err := getVault()
v, err := s.getVault()
if err != nil {
return "", fmt.Errorf("vault not available: %w", err)
}
Expand Down Expand Up @@ -143,7 +133,7 @@ func (s *Server) handleGetCredential(ctx context.Context, args json.RawMessage)
return "", fmt.Errorf("name is required")
}

v, err := getVault()
v, err := s.getVault()
if err != nil {
return "", fmt.Errorf("vault not available — configure ENGRAM_ENCRYPTION_KEY or ENGRAM_ENCRYPTION_KEY_FILE: %w", err)
}
Expand Down Expand Up @@ -286,7 +276,7 @@ func (s *Server) handleVaultStatus(ctx context.Context, _ json.RawMessage) (stri
// Only load fingerprint and key source when vault is already configured (read existing key).
keySource := ""
if keyConfigured {
if v, err := getVault(); err == nil && v != nil {
if v, err := s.getVault(); err == nil && v != nil {
fingerprint = v.Fingerprint()
keySource = v.KeySource()
}
Expand Down
5 changes: 3 additions & 2 deletions internal/mcp/tools_memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ func (s *Server) handleStoreMemory(ctx context.Context, args json.RawMessage) (s
Msg("store_memory: content truncated to soft limit")
}

// Reject content containing plaintext secrets — use store_credential instead.
// Redact secrets from content before storing — warn and continue rather than reject.
if privacy.ContainsSecrets(params.Content) {
return "", fmt.Errorf("content appears to contain secrets (API keys, tokens, passwords). Use store_credential for secret values, not store_memory")
log.Warn().Msg("store_memory: content contains secrets — redacting before storage")
params.Content = privacy.RedactSecrets(params.Content)
}

// Classify observation type from content keywords when not provided.
Expand Down
11 changes: 11 additions & 0 deletions internal/worker/handlers_ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/rs/zerolog/log"
"github.com/thebtf/engram/internal/pipeline"
"github.com/thebtf/engram/internal/privacy"
"github.com/thebtf/engram/pkg/models"
)

Expand Down Expand Up @@ -98,6 +99,16 @@ func (s *Service) handleIngestEvent(w http.ResponseWriter, r *http.Request) {
toolInputStr := toJSONString(req.ToolInput)
toolResultStr := toJSONString(req.ToolResult)

// Redact secrets from tool input/result before any pipeline processing.
if privacy.ContainsSecrets(toolInputStr) {
log.Warn().Str("tool", req.ToolName).Msg("ingest: tool_input contains secrets — redacting before pipeline processing")
toolInputStr = privacy.RedactSecrets(toolInputStr)
}
if privacy.ContainsSecrets(toolResultStr) {
log.Warn().Str("tool", req.ToolName).Msg("ingest: tool_result contains secrets — redacting before pipeline processing")
toolResultStr = privacy.RedactSecrets(toolResultStr)
}
Comment on lines +102 to +110
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for redacting secrets in toolInputStr and toolResultStr is duplicated. You can refactor this into a local helper function to improve readability and maintainability. This would also make it easier to add redaction for more fields in the future.

Suggested change
// Redact secrets from tool input/result before any pipeline processing.
if privacy.ContainsSecrets(toolInputStr) {
log.Warn().Str("tool", req.ToolName).Msg("ingest: tool_input contains secrets — redacting before pipeline processing")
toolInputStr = privacy.RedactSecrets(toolInputStr)
}
if privacy.ContainsSecrets(toolResultStr) {
log.Warn().Str("tool", req.ToolName).Msg("ingest: tool_result contains secrets — redacting before pipeline processing")
toolResultStr = privacy.RedactSecrets(toolResultStr)
}
// Redact secrets from tool input/result before any pipeline processing.
redactAndLog := func(s, fieldName string) string {
if privacy.ContainsSecrets(s) {
log.Warn().Str("tool", req.ToolName).Msgf("ingest: %s contains secrets — redacting before pipeline processing", fieldName)
return privacy.RedactSecrets(s)
}
return s
}
toolInputStr = redactAndLog(toolInputStr, "tool_input")
toolResultStr = redactAndLog(toolResultStr, "tool_result")


// Filter: skip tools that should never be observed
if pipeline.ShouldSkipTool(req.ToolName) {
w.Header().Set("Content-Type", "application/json")
Expand Down
Loading