From 7d82ec493a62401d27a5bb6ee98636d33a829d8c Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Sat, 21 Mar 2026 01:21:07 -0400 Subject: [PATCH] fix: filter binary asset paths and numeric segments from concept extraction Expand the skip list in FromPath to filter out asset directories (images, icons, fonts, docs), generic noise segments (bytes, data, cache), and documentation directories. Add isNumeric filter to drop dimension-like segments (96x96, 512x512) from themes. Fixes #305 Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/concepts/extract.go | 32 +++++++++++++++++++++++++------ internal/concepts/extract_test.go | 25 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/internal/concepts/extract.go b/internal/concepts/extract.go index 4b1f4276..4bd058c5 100644 --- a/internal/concepts/extract.go +++ b/internal/concepts/extract.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "strings" + "unicode" ) // homeDir is cached at init to avoid repeated syscalls. @@ -49,13 +50,24 @@ func FromPath(path string) []string { // Version control / build "git": true, "node_modules": true, "vendor": true, "dist": true, "build": true, "target": true, + // Asset / binary directories + "images": true, "icons": true, "fonts": true, "assets": true, + "static": true, "public": true, "resources": true, "res": true, + "img": true, "figures": true, "screenshots": true, + // Documentation directories + "docs": true, "doc": true, + // Generic noise segments + "bytes": true, "data": true, "cache": true, "temp": true, + "logs": true, "output": true, "out": true, + "generated": true, "gen": true, "third_party": true, + "thirdparty": true, "deps": true, "extern": true, "external": true, } seen := make(map[string]bool) var concepts []string for _, seg := range parts { seg = strings.ToLower(seg) - if len(seg) <= 2 || skip[seg] || seen[seg] { + if len(seg) <= 2 || skip[seg] || seen[seg] || isNumeric(seg) { continue } seen[seg] = true @@ -64,15 +76,23 @@ func FromPath(path string) []string { return concepts } +// isNumeric returns true if the segment is purely digits or a dimension pattern +// like "96x96", "512x512", etc. These are noise in concept extraction. +func isNumeric(s string) bool { + for _, r := range s { + if !unicode.IsDigit(r) && r != 'x' { + return false + } + } + return true +} + // FromEventType extracts a meaningful action verb from a watcher event type. // e.g. "file_created" → "created", "file_modified" → "modified". // Returns empty string for generic types like "dir_activity". func FromEventType(eventType string) string { - if strings.HasPrefix(eventType, "file_") { - action := strings.TrimPrefix(eventType, "file_") - if action != "" { - return action - } + if action, ok := strings.CutPrefix(eventType, "file_"); ok && action != "" { + return action } return "" } diff --git a/internal/concepts/extract_test.go b/internal/concepts/extract_test.go index 7ad64a04..7cc4c1c9 100644 --- a/internal/concepts/extract_test.go +++ b/internal/concepts/extract_test.go @@ -48,6 +48,31 @@ func TestFromPath(t *testing.T) { path: "agent/agent/agent.go", expected: []string{"agent"}, }, + { + name: "binary asset path filters asset dirs", + path: "docs/images/mnemonic.png", + expected: []string{"mnemonic"}, + }, + { + name: "icon asset path", + path: "resources/icons/app-icon.svg", + expected: []string{"app", "icon"}, + }, + { + name: "static web assets all filtered", + path: "public/static/images/logo.png", + expected: []string{"logo"}, + }, + { + name: "bytes segment filtered", + path: "internal/store/bytes/reader.go", + expected: []string{"store", "reader"}, + }, + { + name: "favicon path", + path: "docs/images/favicon-96x96.png", + expected: []string{"favicon"}, + }, { name: "empty path", path: "",