From a8fc8e6e88fbbe19f690a6b1bfe027e4c8fa252d Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 11:49:05 +0100 Subject: [PATCH] perf(markdown): cache syntax highlighting results for code blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During streaming, the FastRenderer re-renders the full accumulated content on every incoming chunk. Code blocks that were already fully received were being re-tokenized through chroma's expensive regex engine on every render, causing O(n²) performance degradation. Add a package-level cache keyed by (lang, code) for syntax highlighting token results. Once a code block has been tokenized, subsequent renders reuse the cached tokens. The cache is cleared on theme changes via ResetStyles(). Streaming benchmark results (BenchmarkStreamingFastRenderer): Time: 3,311ms → ~350ms (~10x faster) Allocs: 26M → ~2.3M (~11x fewer) Memory: 2,497MB → ~906MB (~2.8x less) Assisted-By: docker-agent --- pkg/tui/components/markdown/fast_renderer.go | 94 +++++++++---- pkg/tui/components/markdown/lru_cache.go | 72 ++++++++++ pkg/tui/components/markdown/lru_cache_test.go | 130 ++++++++++++++++++ 3 files changed, 272 insertions(+), 24 deletions(-) create mode 100644 pkg/tui/components/markdown/lru_cache.go create mode 100644 pkg/tui/components/markdown/lru_cache_test.go diff --git a/pkg/tui/components/markdown/fast_renderer.go b/pkg/tui/components/markdown/fast_renderer.go index 420afa35c..687c644d5 100644 --- a/pkg/tui/components/markdown/fast_renderer.go +++ b/pkg/tui/components/markdown/fast_renderer.go @@ -173,10 +173,14 @@ func ResetStyles() { globalStylesOnce = sync.Once{} globalStylesMu.Unlock() - // Also clear chroma syntax highlighting cache + // Also clear chroma syntax highlighting caches chromaStyleCacheMu.Lock() chromaStyleCache = make(map[chroma.TokenType]ansiStyle) chromaStyleCacheMu.Unlock() + + syntaxHighlightCacheMu.Lock() + syntaxHighlightCache.clear() + syntaxHighlightCacheMu.Unlock() } func getGlobalStyles() *cachedStyles { @@ -2158,6 +2162,12 @@ type token struct { style ansiStyle } +// syntaxCacheKey builds a cache key for syntax highlighting results. +type syntaxCacheKey struct { + lang string + code string +} + var ( lexerCache = make(map[string]chroma.Lexer) lexerCacheMu sync.RWMutex @@ -2165,34 +2175,43 @@ var ( // Cache for chroma token type to ansiStyle conversion (with code bg) chromaStyleCache = make(map[chroma.TokenType]ansiStyle) chromaStyleCacheMu sync.RWMutex + + // Cache for syntax highlighting results to avoid re-tokenizing unchanged code blocks. + // Uses an LRU cache bounded to 128 entries to prevent unbounded memory growth + // in long-running TUI sessions with many unique code blocks. + syntaxHighlightCache = newLRUCache[syntaxCacheKey, []token](syntaxHighlightCacheSize) + syntaxHighlightCacheMu sync.RWMutex +) + +const ( + // syntaxHighlightCacheSize is the maximum number of syntax-highlighted code blocks + // to keep in cache. This bounds memory usage while retaining recently viewed blocks. + syntaxHighlightCacheSize = 128 ) func (p *parser) syntaxHighlight(code, lang string) []token { - var lexer chroma.Lexer - - if lang != "" { - // Try cache first - lexerCacheMu.RLock() - lexer = lexerCache[lang] - lexerCacheMu.RUnlock() - - if lexer == nil { - lexer = lexers.Get(lang) - if lexer == nil { - // Try with file extension - lexer = lexers.Match("file." + lang) - } - if lexer != nil { - lexer = chroma.Coalesce(lexer) - lexerCacheMu.Lock() - lexerCache[lang] = lexer - lexerCacheMu.Unlock() - } - } + cacheKey := syntaxCacheKey{lang: lang, code: code} + + syntaxHighlightCacheMu.RLock() + if cached, ok := syntaxHighlightCache.get(cacheKey); ok { + syntaxHighlightCacheMu.RUnlock() + return cached } + syntaxHighlightCacheMu.RUnlock() + + tokens := p.doSyntaxHighlight(code, lang) + + syntaxHighlightCacheMu.Lock() + syntaxHighlightCache.put(cacheKey, tokens) + syntaxHighlightCacheMu.Unlock() + + return tokens +} +// doSyntaxHighlight performs the actual syntax highlighting without caching. +func (p *parser) doSyntaxHighlight(code, lang string) []token { + lexer := p.getLexer(lang) if lexer == nil { - // No highlighting - return plain text with code background return []token{{text: code, style: p.getCodeStyle(chroma.None)}} } @@ -2212,10 +2231,37 @@ func (p *parser) syntaxHighlight(code, lang string) []token { style: p.getCodeStyle(tok.Type), }) } - return tokens } +// getLexer returns a cached chroma lexer for the given language, or nil if unknown. +func (p *parser) getLexer(lang string) chroma.Lexer { + if lang == "" { + return nil + } + + lexerCacheMu.RLock() + lexer := lexerCache[lang] + lexerCacheMu.RUnlock() + if lexer != nil { + return lexer + } + + lexer = lexers.Get(lang) + if lexer == nil { + lexer = lexers.Match("file." + lang) + } + if lexer == nil { + return nil + } + + lexer = chroma.Coalesce(lexer) + lexerCacheMu.Lock() + lexerCache[lang] = lexer + lexerCacheMu.Unlock() + return lexer +} + func (p *parser) getCodeStyle(tokenType chroma.TokenType) ansiStyle { chromaStyleCacheMu.RLock() style, ok := chromaStyleCache[tokenType] diff --git a/pkg/tui/components/markdown/lru_cache.go b/pkg/tui/components/markdown/lru_cache.go new file mode 100644 index 000000000..e944acfa8 --- /dev/null +++ b/pkg/tui/components/markdown/lru_cache.go @@ -0,0 +1,72 @@ +package markdown + +import "container/list" + +// lruCache is a simple LRU (Least Recently Used) cache with a fixed maximum size. +// It is NOT safe for concurrent use; callers must provide their own synchronization. +type lruCache[K comparable, V any] struct { + maxSize int + items map[K]*list.Element + order *list.List // front = most recently used +} + +type lruEntry[K comparable, V any] struct { + key K + value V +} + +// newLRUCache creates an LRU cache that holds at most maxSize entries. +func newLRUCache[K comparable, V any](maxSize int) *lruCache[K, V] { + return &lruCache[K, V]{ + maxSize: maxSize, + items: make(map[K]*list.Element, maxSize), + order: list.New(), + } +} + +// get retrieves a value from the cache, promoting it to most-recently-used. +// Returns the value and true if found, or the zero value and false otherwise. +func (c *lruCache[K, V]) get(key K) (V, bool) { + if elem, ok := c.items[key]; ok { + c.order.MoveToFront(elem) + return elem.Value.(*lruEntry[K, V]).value, true + } + var zero V + return zero, false +} + +// put adds or updates a key-value pair in the cache. +// If the cache is at capacity, the least recently used entry is evicted. +func (c *lruCache[K, V]) put(key K, value V) { + if elem, ok := c.items[key]; ok { + // Update existing entry + c.order.MoveToFront(elem) + elem.Value.(*lruEntry[K, V]).value = value + return + } + + // Evict if at capacity + if c.order.Len() >= c.maxSize { + c.evictOldest() + } + + entry := &lruEntry[K, V]{key: key, value: value} + elem := c.order.PushFront(entry) + c.items[key] = elem +} + +// clear removes all entries from the cache. +func (c *lruCache[K, V]) clear() { + c.items = make(map[K]*list.Element, c.maxSize) + c.order.Init() +} + +// evictOldest removes the least recently used entry. +func (c *lruCache[K, V]) evictOldest() { + oldest := c.order.Back() + if oldest == nil { + return + } + c.order.Remove(oldest) + delete(c.items, oldest.Value.(*lruEntry[K, V]).key) +} diff --git a/pkg/tui/components/markdown/lru_cache_test.go b/pkg/tui/components/markdown/lru_cache_test.go new file mode 100644 index 000000000..48abd1edf --- /dev/null +++ b/pkg/tui/components/markdown/lru_cache_test.go @@ -0,0 +1,130 @@ +package markdown + +import "testing" + +func TestLRUCache_BasicGetPut(t *testing.T) { + c := newLRUCache[string, int](3) + + c.put("a", 1) + c.put("b", 2) + c.put("c", 3) + + v, ok := c.get("a") + if !ok || v != 1 { + t.Fatalf("expected (1, true), got (%d, %v)", v, ok) + } + v, ok = c.get("b") + if !ok || v != 2 { + t.Fatalf("expected (2, true), got (%d, %v)", v, ok) + } + v, ok = c.get("c") + if !ok || v != 3 { + t.Fatalf("expected (3, true), got (%d, %v)", v, ok) + } +} + +func TestLRUCache_Miss(t *testing.T) { + c := newLRUCache[string, int](2) + + _, ok := c.get("missing") + if ok { + t.Fatal("expected miss for non-existent key") + } +} + +func TestLRUCache_Eviction(t *testing.T) { + c := newLRUCache[string, int](2) + + c.put("a", 1) + c.put("b", 2) + // Cache is full: [b, a] (b is most recent) + + c.put("c", 3) + // "a" should be evicted as least recently used: [c, b] + + _, ok := c.get("a") + if ok { + t.Fatal("expected 'a' to be evicted") + } + + v, ok := c.get("b") + if !ok || v != 2 { + t.Fatalf("expected (2, true), got (%d, %v)", v, ok) + } + v, ok = c.get("c") + if !ok || v != 3 { + t.Fatalf("expected (3, true), got (%d, %v)", v, ok) + } +} + +func TestLRUCache_GetPromotesEntry(t *testing.T) { + c := newLRUCache[string, int](2) + + c.put("a", 1) + c.put("b", 2) + // [b, a] + + // Access "a" to promote it + c.get("a") + // Now [a, b] + + // Add "c" - should evict "b" (now least recently used) + c.put("c", 3) + + _, ok := c.get("b") + if ok { + t.Fatal("expected 'b' to be evicted after 'a' was promoted") + } + + v, ok := c.get("a") + if !ok || v != 1 { + t.Fatalf("expected (1, true), got (%d, %v)", v, ok) + } +} + +func TestLRUCache_UpdateExistingKey(t *testing.T) { + c := newLRUCache[string, int](2) + + c.put("a", 1) + c.put("b", 2) + + // Update "a" + c.put("a", 10) + + v, ok := c.get("a") + if !ok || v != 10 { + t.Fatalf("expected (10, true), got (%d, %v)", v, ok) + } + + // "a" was promoted by the update, so adding "c" should evict "b" + c.put("c", 3) + _, ok = c.get("b") + if ok { + t.Fatal("expected 'b' to be evicted") + } +} + +func TestLRUCache_Clear(t *testing.T) { + c := newLRUCache[string, int](3) + + c.put("a", 1) + c.put("b", 2) + + c.clear() + + _, ok := c.get("a") + if ok { + t.Fatal("expected empty cache after clear") + } + _, ok = c.get("b") + if ok { + t.Fatal("expected empty cache after clear") + } + + // Should work normally after clear + c.put("c", 3) + v, ok := c.get("c") + if !ok || v != 3 { + t.Fatalf("expected (3, true), got (%d, %v)", v, ok) + } +}