diff --git a/cmd/mnemonic/main.go b/cmd/mnemonic/main.go index 3c219e03..ecf65236 100644 --- a/cmd/mnemonic/main.go +++ b/cmd/mnemonic/main.go @@ -911,9 +911,18 @@ func serveCommand(configPath string) { if cfg.Perception.Enabled { if cfg.Perception.Filesystem.Enabled { + // Auto-detect noisy app directories and merge with configured exclusions + autoExclusions := fswatcher.DetectNoisyApps(log) + allExclusions := cfg.Perception.Filesystem.ExcludePatterns + for _, pattern := range autoExclusions { + if !fswatcher.MatchesExcludePattern(pattern, allExclusions) { + allExclusions = append(allExclusions, pattern) + } + } + fsw, err := fswatcher.NewFilesystemWatcher(fswatcher.Config{ WatchDirs: cfg.Perception.Filesystem.WatchDirs, - ExcludePatterns: cfg.Perception.Filesystem.ExcludePatterns, + ExcludePatterns: allExclusions, MaxContentBytes: cfg.Perception.Filesystem.MaxContentBytes, }, log) if err != nil { diff --git a/internal/watcher/filesystem/autodetect.go b/internal/watcher/filesystem/autodetect.go new file mode 100644 index 00000000..ad7436f5 --- /dev/null +++ b/internal/watcher/filesystem/autodetect.go @@ -0,0 +1,139 @@ +package filesystem + +import ( + "log/slog" + "os" + "path/filepath" + "runtime" +) + +// knownNoisyApp maps a directory name (found under XDG/Library base dirs) +// to a human-readable description of why it's noisy. +type knownNoisyApp struct { + Dir string // directory name to match (e.g., "Code", "google-chrome") + Description string // why it's noisy +} + +// Registry of known noisy applications. These write high-frequency internal +// state to disk that is never useful as developer memories. +var knownNoisyApps = []knownNoisyApp{ + // Browsers + {Dir: "google-chrome", Description: "Chrome browser storage"}, + {Dir: "chromium", Description: "Chromium browser storage"}, + {Dir: "BraveSoftware", Description: "Brave browser storage"}, + {Dir: "firefox", Description: "Firefox browser storage"}, + {Dir: "vivaldi", Description: "Vivaldi browser storage"}, + {Dir: "opera", Description: "Opera browser storage"}, + + // Editors/IDEs + {Dir: "Code", Description: "VS Code internal state"}, + {Dir: "Code - Insiders", Description: "VS Code Insiders internal state"}, + {Dir: "Cursor", Description: "Cursor editor internal state"}, + {Dir: "JetBrains", Description: "JetBrains IDE state"}, + + // Communication + {Dir: "Slack", Description: "Slack desktop state"}, + {Dir: "discord", Description: "Discord desktop state"}, + {Dir: "Signal", Description: "Signal messenger state"}, + {Dir: "teams", Description: "MS Teams state"}, + {Dir: "Microsoft Teams", Description: "MS Teams state"}, + {Dir: "Telegram Desktop", Description: "Telegram state"}, + {Dir: "zoom.us", Description: "Zoom state"}, + + // Media/Desktop + {Dir: "spotify", Description: "Spotify cache"}, + {Dir: "Spotify", Description: "Spotify cache"}, + {Dir: "vlc", Description: "VLC media player state"}, + + // Desktop environments + {Dir: "gnome-shell", Description: "GNOME shell temp files"}, + {Dir: "plasma", Description: "KDE Plasma state"}, + {Dir: "xfce4", Description: "XFCE desktop state"}, + {Dir: "cinnamon", Description: "Cinnamon desktop state"}, + + // System services + {Dir: "dconf", Description: "GNOME settings backend"}, + {Dir: "gconf", Description: "legacy GNOME settings"}, + {Dir: "pulse", Description: "PulseAudio state"}, + {Dir: "pipewire", Description: "PipeWire audio state"}, + + // Package managers / runtimes + {Dir: "yarn", Description: "Yarn package cache"}, + {Dir: "pnpm", Description: "pnpm package cache"}, + {Dir: "Docker Desktop", Description: "Docker Desktop state"}, + + // Cloud sync / misc + {Dir: "Dropbox", Description: "Dropbox sync state"}, + {Dir: "OneDrive", Description: "OneDrive sync state"}, + {Dir: "obsidian", Description: "Obsidian vault metadata"}, + {Dir: "1Password", Description: "1Password state"}, +} + +// linuxBaseDirs returns the XDG base directories to scan on Linux. +func linuxBaseDirs(home string) []string { + return []string{ + filepath.Join(home, ".config"), + filepath.Join(home, ".local", "share"), + } +} + +// darwinBaseDirs returns the base directories to scan on macOS. +func darwinBaseDirs(home string) []string { + return []string{ + filepath.Join(home, "Library", "Application Support"), + filepath.Join(home, "Library", "Caches"), + } +} + +// DetectNoisyApps scans known base directories for installed applications +// that are known to produce high-frequency filesystem noise. Returns +// exclusion patterns for any that are found. +func DetectNoisyApps(log *slog.Logger) []string { + home, err := os.UserHomeDir() + if err != nil { + log.Warn("auto-detect: could not determine home directory", "error", err) + return nil + } + + var baseDirs []string + switch runtime.GOOS { + case "linux": + baseDirs = linuxBaseDirs(home) + case "darwin": + baseDirs = darwinBaseDirs(home) + default: + log.Debug("auto-detect: unsupported platform, skipping", "os", runtime.GOOS) + return nil + } + + // Build a lookup set from existing config patterns so we don't duplicate + var detected []string + + for _, baseDir := range baseDirs { + for _, app := range knownNoisyApps { + candidate := filepath.Join(baseDir, app.Dir) + if info, err := os.Stat(candidate); err == nil && info.IsDir() { + // Use the path relative to home for the exclusion pattern, + // with trailing slash to match the substring convention + relPattern := "." + candidate[len(home):] + if relPattern[len(relPattern)-1] != '/' { + relPattern += "/" + } + detected = append(detected, relPattern) + log.Info("auto-detected noisy app", + "path", candidate, + "pattern", relPattern, + "description", app.Description, + ) + } + } + } + + if len(detected) > 0 { + log.Info("auto-detect complete", "exclusions_found", len(detected)) + } else { + log.Debug("auto-detect: no additional noisy apps found") + } + + return detected +} diff --git a/internal/watcher/filesystem/autodetect_test.go b/internal/watcher/filesystem/autodetect_test.go new file mode 100644 index 00000000..5d6c0121 --- /dev/null +++ b/internal/watcher/filesystem/autodetect_test.go @@ -0,0 +1,93 @@ +package filesystem + +import ( + "log/slog" + "os" + "path/filepath" + "runtime" + "testing" +) + +func testLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelWarn})) +} + +func TestDetectNoisyApps(t *testing.T) { + // DetectNoisyApps scans real system directories, so results vary by machine. + // We just verify it runs without error and returns valid patterns. + detected := DetectNoisyApps(testLogger()) + + for _, pattern := range detected { + if pattern == "" { + t.Error("detected empty exclusion pattern") + } + if pattern[0] != '.' { + t.Errorf("pattern should start with '.': got %q", pattern) + } + if pattern[len(pattern)-1] != '/' { + t.Errorf("pattern should end with '/': got %q", pattern) + } + } +} + +func TestDetectNoisyApps_FindsKnownDirs(t *testing.T) { + if runtime.GOOS != "linux" && runtime.GOOS != "darwin" { + t.Skip("test only runs on Linux/macOS") + } + + home, err := os.UserHomeDir() + if err != nil { + t.Fatalf("could not get home dir: %v", err) + } + + // Create a fake noisy app dir in a temp location, then verify it would be detected + // We can't mock os.Stat in the real function, so instead create a real dir + // under the actual XDG path if it exists. + var baseDir string + if runtime.GOOS == "linux" { + baseDir = filepath.Join(home, ".config") + } else { + baseDir = filepath.Join(home, "Library", "Application Support") + } + + if _, err := os.Stat(baseDir); os.IsNotExist(err) { + t.Skipf("base dir %s does not exist", baseDir) + } + + // Check if any known noisy app is actually installed + detected := DetectNoisyApps(testLogger()) + t.Logf("detected %d noisy app exclusions on this system", len(detected)) + for _, pattern := range detected { + t.Logf(" %s", pattern) + } +} + +func TestDetectNoisyApps_NoDuplicatesWithExisting(t *testing.T) { + // Verify that using MatchesExcludePattern to deduplicate works + existing := []string{".config/Code/", ".config/google-chrome/"} + detected := []string{".config/Code/", ".config/discord/", ".config/google-chrome/"} + + var merged []string + merged = append(merged, existing...) + for _, pattern := range detected { + if !MatchesExcludePattern(pattern, merged) { + merged = append(merged, pattern) + } + } + + // Should have existing 2 + 1 new = 3 + if len(merged) != 3 { + t.Errorf("expected 3 merged patterns, got %d: %v", len(merged), merged) + } + + // discord should be the new one + found := false + for _, p := range merged { + if p == ".config/discord/" { + found = true + } + } + if !found { + t.Error("expected .config/discord/ in merged patterns") + } +}