From 255be37af6887e49ec8616dd82776e45d2a37dad Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sat, 22 Nov 2025 20:39:25 +0100 Subject: [PATCH 01/14] Add performance timing instrumentation to renderer Add optional timing measurements to identify performance bottlenecks in the HTML rendering process. Controlled by CLAUDE_CODE_LOG_DEBUG_TIMING environment variable. Timing points added: - Initialization - Message deduplication - Session summary processing - Tool use context building - Main message processing loop - Session navigation building - Message pair identification - Paired message reordering - Mark messages with children - Template environment setup - Template rendering (with HTML size) Usage: CLAUDE_CODE_LOG_DEBUG_TIMING=1 claude-code-log file.jsonl This will print timing output like: [TIMING] Initialization 0.001s (total: 0.001s) [TIMING] Deduplication (1234 messages) 0.050s (total: 0.051s) [TIMING] Main message processing loop 5.234s (total: 5.285s) [TIMING] Template rendering (30MB chars) 15.432s (total: 20.717s) No impact on performance when disabled (default). --- claude_code_log/renderer.py | 55 ++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 02e779e0..530ff702 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -2,7 +2,9 @@ """Render Claude transcript data to HTML format.""" import json +import os import re +import time from pathlib import Path from typing import List, Optional, Dict, Any, cast, TYPE_CHECKING @@ -17,6 +19,13 @@ from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] +# Performance debugging +DEBUG_TIMING = os.getenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "").lower() in ( + "1", + "true", + "yes", +) + from .models import ( TranscriptEntry, SummaryTranscriptEntry, @@ -2517,9 +2526,28 @@ def generate_html( combined_transcript_link: Optional[str] = None, ) -> str: """Generate HTML from transcript messages using Jinja2 templates.""" + # Performance timing + t_start = time.time() + t_last = t_start + + def log_timing(phase: str) -> None: + """Log timing for a phase if DEBUG_TIMING is enabled.""" + nonlocal t_last + if DEBUG_TIMING: + t_now = time.time() + phase_time = t_now - t_last + total_time = t_now - t_start + print( + f"[TIMING] {phase:40s} {phase_time:8.3f}s (total: {total_time:8.3f}s)", + flush=True, + ) + t_last = t_now + if not title: title = "Claude Transcript" + log_timing("Initialization") + # Deduplicate messages by (message_type, timestamp) # Messages with the exact same timestamp are duplicates by definition - # the differences (like IDE selection tags) are just logging artifacts @@ -2551,6 +2579,8 @@ def generate_html( messages = deduplicated_messages + log_timing(f"Deduplication ({len(messages)} messages)") + # Pre-process to find and attach session summaries session_summaries: Dict[str, str] = {} uuid_to_session: Dict[str, str] = {} @@ -2588,6 +2618,8 @@ def generate_html( if session_id in session_summaries: setattr(message, "_session_summary", session_summaries[session_id]) + log_timing("Session summary processing") + # Group messages by session and collect session info for navigation sessions: Dict[str, Dict[str, Any]] = {} session_order: List[str] = [] @@ -2627,6 +2659,8 @@ def generate_html( ) tool_use_context[tool_id] = tool_ctx + log_timing(f"Tool use context building ({len(tool_use_context)} tools)") + # Process messages into template-friendly format template_messages: List[TemplateMessage] = [] @@ -3249,6 +3283,10 @@ def generate_html( pending_dedup = None # Reset for next iteration + log_timing( + f"Main message processing loop ({len(template_messages)} template messages)" + ) + # Prepare session navigation data session_nav: List[Dict[str, Any]] = [] for session_id in session_order: @@ -3302,19 +3340,30 @@ def generate_html( } ) + log_timing(f"Session navigation building ({len(session_nav)} sessions)") + # Identify and mark paired messages (command+output, tool_use+tool_result, etc.) _identify_message_pairs(template_messages) + log_timing("Identify message pairs") + # Reorder messages so pairs are adjacent while preserving chronological order template_messages = _reorder_paired_messages(template_messages) + log_timing("Reorder paired messages") + # Mark messages that have children for fold/unfold controls _mark_messages_with_children(template_messages) + log_timing("Mark messages with children") + # Render template env = _get_template_environment() template = env.get_template("transcript.html") - return str( + + log_timing("Template environment setup") + + html_output = str( template.render( title=title, messages=template_messages, @@ -3324,6 +3373,10 @@ def generate_html( ) ) + log_timing(f"Template rendering ({len(html_output)} chars)") + + return html_output + def generate_projects_index_html( project_summaries: List[Dict[str, Any]], From 90fb3abf35a47cd47019dfb34afac1d890a6045d Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sat, 22 Nov 2025 23:28:45 +0100 Subject: [PATCH 02/14] Add per-message timing statistics to main processing loop Track time spent on each individual message during the main processing loop and report statistics about slowest messages. This helps identify performance outliers. New timing output when DEBUG_TIMING is enabled: - Average time per message - Slowest 10 messages with their types and indices This helps answer: - What's the average time to process a message? - Which specific messages are slow? - Is there a pattern in slow messages (type, content, etc.)? Sample output: [TIMING] Loop statistics: [TIMING] Total messages: 7027 [TIMING] Average time per message: 7.5ms [TIMING] Slowest 10 messages: [TIMING] Message #1234 (assistant): 245.3ms [TIMING] Message #5678 (user): 189.7ms [TIMING] ... --- claude_code_log/renderer.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 530ff702..e883a4cd 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -2676,7 +2676,14 @@ def log_timing(phase: str) -> None: # Maps raw content -> (template_messages index, message_id, type: "task" or "assistant") content_map: Dict[str, tuple[int, str, str]] = {} - for message in messages: + # Per-message timing tracking + message_timings: List[ + tuple[float, str, int] + ] = [] # (duration, message_type, index) + loop_phase_times: Dict[str, float] = {} # Accumulate time per phase + + for msg_idx, message in enumerate(messages): + msg_start_time = time.time() if DEBUG_TIMING else 0.0 message_type = message.type # Skip summary messages - they should already be attached to their sessions @@ -3283,6 +3290,33 @@ def log_timing(phase: str) -> None: pending_dedup = None # Reset for next iteration + # Track message timing + if DEBUG_TIMING: + msg_duration = time.time() - msg_start_time + message_timings.append((msg_duration, message_type, msg_idx)) + + # Report loop statistics + if DEBUG_TIMING and message_timings: + # Sort by duration descending + sorted_timings = sorted(message_timings, key=lambda x: x[0], reverse=True) + + # Calculate statistics + total_msg_time = sum(t[0] for t in message_timings) + avg_time = total_msg_time / len(message_timings) + + # Report slowest messages + print(f"\n[TIMING] Loop statistics:", flush=True) + print(f"[TIMING] Total messages: {len(message_timings)}", flush=True) + print( + f"[TIMING] Average time per message: {avg_time * 1000:.1f}ms", flush=True + ) + print(f"[TIMING] Slowest 10 messages:", flush=True) + for duration, msg_type, idx in sorted_timings[:10]: + print( + f"[TIMING] Message #{idx} ({msg_type}): {duration * 1000:.1f}ms", + flush=True, + ) + log_timing( f"Main message processing loop ({len(template_messages)} template messages)" ) From a557948435ccee81d88c942ef2ec61dba01a6efb Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sat, 22 Nov 2025 23:59:25 +0100 Subject: [PATCH 03/14] Add detailed timing instrumentation for markdown and Pygments operations Extended the DEBUG_TIMING instrumentation to track and report: - Per-message timing with UUIDs instead of indices - Cumulative markdown rendering statistics (total time + slowest 10) - Cumulative Pygments highlighting statistics (total time + slowest 10) This helps identify performance bottlenecks in content rendering by: - Wrapping render_markdown() to track mistune operations - Instrumenting Pygments highlight() calls in both block_code plugin and _highlight_code_with_pygments() function - Using module-level globals to pass timing context to nested functions - Reporting UUIDs to easily identify which messages are slow Statistics now show: - Message UUIDs for easy identification - Total markdown rendering operations and cumulative time - Total Pygments highlighting operations and cumulative time - Top 10 slowest operations for each category --- claude_code_log/renderer.py | 101 +++++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 12 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index e883a4cd..35b451c0 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -241,7 +241,18 @@ def block_code(code: str, info: Optional[str] = None) -> str: cssclass="highlight", wrapcode=True, ) - return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + # Track Pygments timing if enabled + if DEBUG_TIMING: + t_start = time.time() + result = str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + duration = time.time() - t_start + if "_pygments_timings" in globals(): + globals()["_pygments_timings"].append( + (duration, globals().get("_current_msg_uuid", "")) + ) + return result + else: + return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] else: # No language hint, use default rendering return original_render(code, info) @@ -253,6 +264,10 @@ def block_code(code: str, info: Optional[str] = None) -> str: def render_markdown(text: str) -> str: """Convert markdown text to HTML using mistune with Pygments syntax highlighting.""" + # Track markdown rendering time if enabled + if DEBUG_TIMING: + t_start = time.time() + # Configure mistune with GitHub-flavored markdown features renderer = mistune.create_markdown( plugins=[ @@ -267,7 +282,17 @@ def render_markdown(text: str) -> str: escape=False, # Don't escape HTML since we want to render markdown properly hard_wrap=True, # Line break for newlines (checklists in Assistant messages) ) - return str(renderer(text)) + result = str(renderer(text)) + + # Record timing if enabled + if DEBUG_TIMING: + duration = time.time() - t_start + if "_markdown_timings" in globals(): + globals()["_markdown_timings"].append( + (duration, globals().get("_current_msg_uuid", "")) + ) + + return result def extract_command_info(text_content: str) -> tuple[str, str, str]: @@ -396,8 +421,18 @@ def _highlight_code_with_pygments( linenostart=linenostart, ) - # Highlight the code - return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + # Highlight the code with timing if enabled + if DEBUG_TIMING: + t_start = time.time() + result = str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + duration = time.time() - t_start + if "_pygments_timings" in globals(): + globals()["_pygments_timings"].append( + (duration, globals().get("_current_msg_uuid", "")) + ) + return result + else: + return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] def format_read_tool_content(tool_use: ToolUseContent) -> str: # noqa: ARG001 @@ -2678,13 +2713,27 @@ def log_timing(phase: str) -> None: # Per-message timing tracking message_timings: List[ - tuple[float, str, int] - ] = [] # (duration, message_type, index) - loop_phase_times: Dict[str, float] = {} # Accumulate time per phase + tuple[float, str, int, str] + ] = [] # (duration, message_type, index, uuid) + + # Track expensive operations + markdown_timings: List[tuple[float, str]] = [] # (duration, context_uuid) + pygments_timings: List[tuple[float, str]] = [] # (duration, context_uuid) + + # Make timing lists available to nested functions via globals + if DEBUG_TIMING: + globals()["_markdown_timings"] = markdown_timings + globals()["_pygments_timings"] = pygments_timings + globals()["_current_msg_uuid"] = "" for msg_idx, message in enumerate(messages): msg_start_time = time.time() if DEBUG_TIMING else 0.0 message_type = message.type + msg_uuid = getattr(message, "uuid", f"no-uuid-{msg_idx}") + + # Update current message UUID for timing tracking + if DEBUG_TIMING: + globals()["_current_msg_uuid"] = msg_uuid # Skip summary messages - they should already be attached to their sessions if isinstance(message, SummaryTranscriptEntry): @@ -3293,7 +3342,7 @@ def log_timing(phase: str) -> None: # Track message timing if DEBUG_TIMING: msg_duration = time.time() - msg_start_time - message_timings.append((msg_duration, message_type, msg_idx)) + message_timings.append((msg_duration, message_type, msg_idx, msg_uuid)) # Report loop statistics if DEBUG_TIMING and message_timings: @@ -3305,18 +3354,46 @@ def log_timing(phase: str) -> None: avg_time = total_msg_time / len(message_timings) # Report slowest messages - print(f"\n[TIMING] Loop statistics:", flush=True) + print("\n[TIMING] Loop statistics:", flush=True) print(f"[TIMING] Total messages: {len(message_timings)}", flush=True) print( f"[TIMING] Average time per message: {avg_time * 1000:.1f}ms", flush=True ) - print(f"[TIMING] Slowest 10 messages:", flush=True) - for duration, msg_type, idx in sorted_timings[:10]: + print("[TIMING] Slowest 10 messages:", flush=True) + for duration, msg_type, idx, uuid in sorted_timings[:10]: print( - f"[TIMING] Message #{idx} ({msg_type}): {duration * 1000:.1f}ms", + f"[TIMING] Message {uuid} (#{idx}, {msg_type}): {duration * 1000:.1f}ms", flush=True, ) + # Report markdown rendering statistics + if markdown_timings: + sorted_markdown = sorted(markdown_timings, key=lambda x: x[0], reverse=True) + total_markdown_time = sum(t[0] for t in markdown_timings) + print(f"\n[TIMING] Markdown rendering:", flush=True) + print(f"[TIMING] Total operations: {len(markdown_timings)}", flush=True) + print(f"[TIMING] Total time: {total_markdown_time:.3f}s", flush=True) + print(f"[TIMING] Slowest 10 operations:", flush=True) + for duration, uuid in sorted_markdown[:10]: + print( + f"[TIMING] {uuid}: {duration * 1000:.1f}ms", + flush=True, + ) + + # Report Pygments highlighting statistics + if pygments_timings: + sorted_pygments = sorted(pygments_timings, key=lambda x: x[0], reverse=True) + total_pygments_time = sum(t[0] for t in pygments_timings) + print(f"\n[TIMING] Pygments highlighting:", flush=True) + print(f"[TIMING] Total operations: {len(pygments_timings)}", flush=True) + print(f"[TIMING] Total time: {total_pygments_time:.3f}s", flush=True) + print(f"[TIMING] Slowest 10 operations:", flush=True) + for duration, uuid in sorted_pygments[:10]: + print( + f"[TIMING] {uuid}: {duration * 1000:.1f}ms", + flush=True, + ) + log_timing( f"Main message processing loop ({len(template_messages)} template messages)" ) From 797a7a494ec4825d3a15da2a75473e28e0574dbc Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 11:41:07 +0100 Subject: [PATCH 04/14] Fix critical performance bottleneck in syntax highlighting (40-125x speedup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: Message rendering was experiencing severe slowdowns (2+ seconds per message) when processing Read tool results with file paths. User's excellent isolation testing identified that identical messages took 0.7ms in one context but 185ms in another, with the only difference being message ordering. Root cause: Pygments' get_lexer_for_filename() performs filesystem I/O operations (stat(), file existence checks, byte reading) to determine the appropriate lexer. On Windows with antivirus scanning, this causes severe slowdowns (100-1000ms per file path) even when the file no longer exists or has changed since the transcript was recorded. Solution: Use Pygments' internal LEXERS mapping to build a reverse filename pattern → lexer alias lookup table (built once on first use, cached). This approach: 1. Supports all 597 Pygments lexers automatically (vs ~30 with manual mapping) 2. Eliminates ALL filesystem I/O by pattern matching against basenames 3. Uses Pygments' own filename patterns for complete coverage 4. Correctly handles wildcards like "*.py", "*.ABAP", etc. The cached mapping is built from pygments.lexers._mapping.LEXERS which contains tuples of (module, name, aliases, filename_patterns, mimetypes). We extract filename patterns and map them to lexer aliases for instant lookup via fnmatch. Benefits: - 40x speedup: 2136ms → 53ms for problematic messages - Complete lexer coverage: All 597 Pygments lexers (909 filename patterns) - No dependency on filesystem state at render time - Consistent behavior regardless of file existence or antivirus settings - Correct business logic: render time shouldn't depend on current filesystem Performance impact: - Fast case (88a-3.jsonl): 0.9ms (unchanged) - Slow case (88a-4.jsonl): 2136ms → 53ms (40x improvement) - Pygments operations: Now consistently <3ms regardless of file paths - Pattern cache built once: ~900 patterns, negligible overhead Fixes performance regression that made large transcript rendering impractical on Windows systems with active antivirus protection. --- claude_code_log/renderer.py | 43 +++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 35b451c0..3816b7bc 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -406,9 +406,42 @@ def _highlight_code_with_pygments( Returns: HTML string with syntax-highlighted code """ + # PERFORMANCE FIX: Use Pygments' internal filename pattern mapping to avoid filesystem I/O + # get_lexer_for_filename performs I/O operations (file existence checks, reading bytes) + # which causes severe slowdowns, especially on Windows with antivirus scanning + # Solution: Build a reverse mapping from filename patterns to lexer aliases (done once) + import os + import fnmatch + from pygments.lexers import get_lexer_by_name # type: ignore[reportUnknownVariableType] + from pygments.lexers._mapping import LEXERS # type: ignore[reportUnknownVariableType] + + # Build pattern->alias mapping on first call (cached as function attribute) + if not hasattr(_highlight_code_with_pygments, "_pattern_cache"): + pattern_cache: dict[str, str] = {} + for lexer_name, (module, name, aliases, patterns, mimetypes) in LEXERS.items(): + if aliases and patterns: + # Use first alias as the lexer name + lexer_alias = aliases[0] + # Map each filename pattern to this lexer alias + for pattern in patterns: + pattern_cache[pattern.lower()] = lexer_alias + _highlight_code_with_pygments._pattern_cache = pattern_cache # type: ignore[attr-defined] + + # Get basename for matching (patterns are like "*.py") + basename = os.path.basename(file_path).lower() + try: - # Try to get lexer based on filename - lexer = get_lexer_for_filename(file_path, code) # type: ignore[reportUnknownVariableType] + # Try exact pattern match first (fastest) + pattern_cache = _highlight_code_with_pygments._pattern_cache # type: ignore[attr-defined] + + # Check for direct pattern match + for pattern, lexer_alias in pattern_cache.items(): + if fnmatch.fnmatch(basename, pattern): + lexer = get_lexer_by_name(lexer_alias, stripall=True) # type: ignore[reportUnknownVariableType] + break + else: + # No match found, use TextLexer + lexer = TextLexer() # type: ignore[reportUnknownVariableType] except ClassNotFound: # Fall back to plain text lexer lexer = TextLexer() # type: ignore[reportUnknownVariableType] @@ -2734,6 +2767,8 @@ def log_timing(phase: str) -> None: # Update current message UUID for timing tracking if DEBUG_TIMING: globals()["_current_msg_uuid"] = msg_uuid + phase_timings: List[tuple[str, float]] = [] + phase_start = time.time() # Skip summary messages - they should already be attached to their sessions if isinstance(message, SummaryTranscriptEntry): @@ -2743,6 +2778,10 @@ def log_timing(phase: str) -> None: if isinstance(message, QueueOperationTranscriptEntry): continue + if DEBUG_TIMING: + phase_timings.append(("initial_checks", time.time() - phase_start)) + phase_start = time.time() + # Handle system messages separately if isinstance(message, SystemTranscriptEntry): session_id = getattr(message, "sessionId", "unknown") From 8ae2dfffa0c46dcf4012257ee5df7622e3035576 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 18:12:47 +0100 Subject: [PATCH 05/14] Use public Pygments API instead of internal _mapping.LEXERS Replace pygments.lexers._mapping.LEXERS with the public get_all_lexers() API for better stability across Pygments versions. Maintains 40x performance improvement (from 2136ms to ~36ms per message) while using only documented stable APIs. Performance validated: - Fast case (88a-3.jsonl): 0.9ms per message (unchanged) - Slow case (88a-4.jsonl): 36ms per message (vs 2136ms original) - All 248 unit tests passing --- claude_code_log/renderer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 3816b7bc..11833fbc 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -406,19 +406,19 @@ def _highlight_code_with_pygments( Returns: HTML string with syntax-highlighted code """ - # PERFORMANCE FIX: Use Pygments' internal filename pattern mapping to avoid filesystem I/O + # PERFORMANCE FIX: Use Pygments' public API to build filename pattern mapping, avoiding filesystem I/O # get_lexer_for_filename performs I/O operations (file existence checks, reading bytes) # which causes severe slowdowns, especially on Windows with antivirus scanning - # Solution: Build a reverse mapping from filename patterns to lexer aliases (done once) + # Solution: Build a reverse mapping from filename patterns to lexer aliases using get_all_lexers() (done once) import os import fnmatch - from pygments.lexers import get_lexer_by_name # type: ignore[reportUnknownVariableType] - from pygments.lexers._mapping import LEXERS # type: ignore[reportUnknownVariableType] + from pygments.lexers import get_lexer_by_name, get_all_lexers # type: ignore[reportUnknownVariableType] # Build pattern->alias mapping on first call (cached as function attribute) if not hasattr(_highlight_code_with_pygments, "_pattern_cache"): pattern_cache: dict[str, str] = {} - for lexer_name, (module, name, aliases, patterns, mimetypes) in LEXERS.items(): + # Use public API: get_all_lexers() returns (name, aliases, patterns, mimetypes) tuples + for name, aliases, patterns, mimetypes in get_all_lexers(): # type: ignore[reportUnknownVariableType] if aliases and patterns: # Use first alias as the lexer name lexer_alias = aliases[0] From 64824d8d3e0a9d3c84a2dd90427129df3d479a16 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 19:51:08 +0100 Subject: [PATCH 06/14] Optimize Read tool result rendering by eliminating double Pygments call When rendering Read tool results with >12 lines, we were calling Pygments twice: 1. Once to highlight the full content 2. Again to highlight a 5-line preview This caused ~75ms overhead per large Read result (115-line file). **Solution:** Extract preview HTML from the already-highlighted full content using regex to find the first 5 rows, rather than re-highlighting the preview lines. **Performance:** - Before: 96ms for message with pyproject.toml Read result - After: Still ~66ms (regex has overhead, but eliminated redundant work) - More importantly: No longer highlighting the same code twice The optimization maintains correctness while reducing unnecessary computation. --- claude_code_log/renderer.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 11833fbc..fc43c407 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -4,6 +4,7 @@ import json import os import re +import sys import time from pathlib import Path from typing import List, Optional, Dict, Any, cast, TYPE_CHECKING @@ -1057,7 +1058,7 @@ def format_tool_result_content( if parsed_result: code_content, system_reminder, line_offset = parsed_result - # Highlight code with Pygments using correct line offset + # Highlight code with Pygments using correct line offset (single call) highlighted_html = _highlight_code_with_pygments( code_content, file_path, linenostart=line_offset ) @@ -1068,11 +1069,21 @@ def format_tool_result_content( # Make collapsible if content has more than 12 lines lines = code_content.split("\n") if len(lines) > 12: - # Get preview (first ~5 lines) - preview_lines = lines[:5] - preview_html = _highlight_code_with_pygments( - "\n".join(preview_lines), file_path, linenostart=line_offset + # Extract preview from already-highlighted HTML to avoid double-highlighting + # The highlighted HTML has structure:
...
+ # Extract first ~5 rows + tr_matches = list( + re.finditer(r".*?", highlighted_html, re.DOTALL) ) + if len(tr_matches) >= 5: + # Get HTML up to and including the 5th + preview_end = tr_matches[4].end() + preview_html_fragment = highlighted_html[:preview_end] + # Close unclosed tags properly + preview_html = preview_html_fragment + "" + else: + # Fallback if we can't extract rows (shouldn't happen) + preview_html = highlighted_html result_parts.append(f"""
From c094113bdf634303dcf7daab94d4f5c6fe424ae3 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 20:15:38 +0100 Subject: [PATCH 07/14] Optimize Pygments lexer lookup with extension-based cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Identified and fixed a critical performance bottleneck in lexer pattern matching. The previous implementation iterated through all 909 filename patterns using fnmatch for every file, causing 50ms+ delays per Read tool result. Changes: - Added extension_cache alongside pattern_cache for O(1) lookups - Simple *.ext patterns are indexed by extension (e.g., "toml" -> "toml" lexer) - Falls back to full pattern matching only when extension lookup fails - Added detailed timing instrumentation for debugging (DEBUG_TIMING flag) Performance improvement: - Lexer lookup: 50.0ms → 10.3ms (5x faster) - Total Read formatting: 120ms → 44ms (2.7x faster) - Full message rendering maintains similar speedup All 253 unit tests passing. This fixes the unexplained 90ms+ gap between message timing and reported Pygments time. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- claude_code_log/renderer.py | 88 +++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index fc43c407..39ff8807 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -416,8 +416,11 @@ def _highlight_code_with_pygments( from pygments.lexers import get_lexer_by_name, get_all_lexers # type: ignore[reportUnknownVariableType] # Build pattern->alias mapping on first call (cached as function attribute) + # OPTIMIZATION: Create both direct extension lookup and full pattern cache if not hasattr(_highlight_code_with_pygments, "_pattern_cache"): pattern_cache: dict[str, str] = {} + extension_cache: dict[str, str] = {} # Fast lookup for simple *.ext patterns + # Use public API: get_all_lexers() returns (name, aliases, patterns, mimetypes) tuples for name, aliases, patterns, mimetypes in get_all_lexers(): # type: ignore[reportUnknownVariableType] if aliases and patterns: @@ -425,35 +428,66 @@ def _highlight_code_with_pygments( lexer_alias = aliases[0] # Map each filename pattern to this lexer alias for pattern in patterns: - pattern_cache[pattern.lower()] = lexer_alias + pattern_lower = pattern.lower() + pattern_cache[pattern_lower] = lexer_alias + # Extract simple extension patterns (*.ext) for fast lookup + if ( + pattern_lower.startswith("*.") + and "*" not in pattern_lower[2:] + and "?" not in pattern_lower[2:] + ): + ext = pattern_lower[2:] # Remove "*." + # Prefer first match for each extension + if ext not in extension_cache: + extension_cache[ext] = lexer_alias + _highlight_code_with_pygments._pattern_cache = pattern_cache # type: ignore[attr-defined] + _highlight_code_with_pygments._extension_cache = extension_cache # type: ignore[attr-defined] # Get basename for matching (patterns are like "*.py") basename = os.path.basename(file_path).lower() try: - # Try exact pattern match first (fastest) + # Get caches pattern_cache = _highlight_code_with_pygments._pattern_cache # type: ignore[attr-defined] + extension_cache = _highlight_code_with_pygments._extension_cache # type: ignore[attr-defined] + + # OPTIMIZATION: Try fast extension lookup first (O(1) dict lookup) + lexer_alias = None + if "." in basename: + ext = basename.split(".")[-1] # Get last extension (handles .tar.gz, etc.) + lexer_alias = extension_cache.get(ext) + + # Fall back to pattern matching only if extension lookup failed + if lexer_alias is None: + for pattern, lex_alias in pattern_cache.items(): + if fnmatch.fnmatch(basename, pattern): + lexer_alias = lex_alias + break - # Check for direct pattern match - for pattern, lexer_alias in pattern_cache.items(): - if fnmatch.fnmatch(basename, pattern): - lexer = get_lexer_by_name(lexer_alias, stripall=True) # type: ignore[reportUnknownVariableType] - break + # Get lexer or use TextLexer as fallback + if lexer_alias: + lexer = get_lexer_by_name(lexer_alias, stripall=True) # type: ignore[reportUnknownVariableType] else: - # No match found, use TextLexer lexer = TextLexer() # type: ignore[reportUnknownVariableType] except ClassNotFound: # Fall back to plain text lexer lexer = TextLexer() # type: ignore[reportUnknownVariableType] # Create formatter with line numbers in table format + if DEBUG_TIMING: + t_formatter = time.time() formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType] linenos="table" if show_linenos else False, cssclass="highlight", wrapcode=True, linenostart=linenostart, ) + if DEBUG_TIMING: + print( + f"[HIGHLIGHT-DEBUG] Create formatter: {(time.time() - t_formatter) * 1000:.1f}ms", + file=sys.stderr, + ) # Highlight the code with timing if enabled if DEBUG_TIMING: @@ -1054,16 +1088,40 @@ def format_tool_result_content( # Try to parse as Read tool result if file_path is provided if file_path and tool_name == "Read" and not has_images: + if DEBUG_TIMING: + t_parse = time.time() parsed_result = _parse_read_tool_result(raw_content) + if DEBUG_TIMING: + print( + f"[READ-DEBUG] Parse: {(time.time() - t_parse) * 1000:.1f}ms", + file=sys.stderr, + ) + if parsed_result: + if DEBUG_TIMING: + t_extract = time.time() code_content, system_reminder, line_offset = parsed_result + if DEBUG_TIMING: + print( + f"[READ-DEBUG] Extract tuple: {(time.time() - t_extract) * 1000:.1f}ms", + file=sys.stderr, + ) # Highlight code with Pygments using correct line offset (single call) + if DEBUG_TIMING: + t_highlight = time.time() highlighted_html = _highlight_code_with_pygments( code_content, file_path, linenostart=line_offset ) + if DEBUG_TIMING: + print( + f"[READ-DEBUG] Highlight: {(time.time() - t_highlight) * 1000:.1f}ms", + file=sys.stderr, + ) # Build result HTML + if DEBUG_TIMING: + t_build = time.time() result_parts = ["
"] # Make collapsible if content has more than 12 lines @@ -1072,9 +1130,17 @@ def format_tool_result_content( # Extract preview from already-highlighted HTML to avoid double-highlighting # The highlighted HTML has structure:
...
# Extract first ~5 rows + if DEBUG_TIMING: + t_regex = time.time() tr_matches = list( re.finditer(r".*?", highlighted_html, re.DOTALL) ) + if DEBUG_TIMING: + print( + f"[READ-DEBUG] Regex finditer: {(time.time() - t_regex) * 1000:.1f}ms", + file=sys.stderr, + ) + if len(tr_matches) >= 5: # Get HTML up to and including the 5th preview_end = tr_matches[4].end() @@ -1098,6 +1164,12 @@ def format_tool_result_content( # Show directly without collapsible result_parts.append(highlighted_html) + if DEBUG_TIMING: + print( + f"[READ-DEBUG] Build HTML: {(time.time() - t_build) * 1000:.1f}ms", + file=sys.stderr, + ) + # Add system reminder if present (after code, always visible) if system_reminder: escaped_reminder = escape_html(system_reminder) From 135092a789cb81a6af26be4e10d8e6df548b49ad Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 20:46:15 +0100 Subject: [PATCH 08/14] Remove detailed READ-DEBUG and HIGHLIGHT-DEBUG timing instrumentation The detailed timing code has served its purpose in identifying and fixing the performance bottleneck. Clean up the code by removing the granular timing instrumentation while keeping the core DEBUG_TIMING functionality. - Remove READ-DEBUG formatter timing - Remove READ-DEBUG parse, extract, highlight, regex, and build HTML timing - Remove HIGHLIGHT-DEBUG timing variables and print statements - Keep overall message timing and Pygments timing for high-level monitoring - All 253 unit tests passing --- claude_code_log/renderer.py | 43 ------------------------------------- 1 file changed, 43 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 39ff8807..8ef34eca 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -475,19 +475,12 @@ def _highlight_code_with_pygments( lexer = TextLexer() # type: ignore[reportUnknownVariableType] # Create formatter with line numbers in table format - if DEBUG_TIMING: - t_formatter = time.time() formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType] linenos="table" if show_linenos else False, cssclass="highlight", wrapcode=True, linenostart=linenostart, ) - if DEBUG_TIMING: - print( - f"[HIGHLIGHT-DEBUG] Create formatter: {(time.time() - t_formatter) * 1000:.1f}ms", - file=sys.stderr, - ) # Highlight the code with timing if enabled if DEBUG_TIMING: @@ -1088,40 +1081,17 @@ def format_tool_result_content( # Try to parse as Read tool result if file_path is provided if file_path and tool_name == "Read" and not has_images: - if DEBUG_TIMING: - t_parse = time.time() parsed_result = _parse_read_tool_result(raw_content) - if DEBUG_TIMING: - print( - f"[READ-DEBUG] Parse: {(time.time() - t_parse) * 1000:.1f}ms", - file=sys.stderr, - ) if parsed_result: - if DEBUG_TIMING: - t_extract = time.time() code_content, system_reminder, line_offset = parsed_result - if DEBUG_TIMING: - print( - f"[READ-DEBUG] Extract tuple: {(time.time() - t_extract) * 1000:.1f}ms", - file=sys.stderr, - ) # Highlight code with Pygments using correct line offset (single call) - if DEBUG_TIMING: - t_highlight = time.time() highlighted_html = _highlight_code_with_pygments( code_content, file_path, linenostart=line_offset ) - if DEBUG_TIMING: - print( - f"[READ-DEBUG] Highlight: {(time.time() - t_highlight) * 1000:.1f}ms", - file=sys.stderr, - ) # Build result HTML - if DEBUG_TIMING: - t_build = time.time() result_parts = ["
"] # Make collapsible if content has more than 12 lines @@ -1130,16 +1100,9 @@ def format_tool_result_content( # Extract preview from already-highlighted HTML to avoid double-highlighting # The highlighted HTML has structure:
...
# Extract first ~5 rows - if DEBUG_TIMING: - t_regex = time.time() tr_matches = list( re.finditer(r".*?", highlighted_html, re.DOTALL) ) - if DEBUG_TIMING: - print( - f"[READ-DEBUG] Regex finditer: {(time.time() - t_regex) * 1000:.1f}ms", - file=sys.stderr, - ) if len(tr_matches) >= 5: # Get HTML up to and including the 5th @@ -1164,12 +1127,6 @@ def format_tool_result_content( # Show directly without collapsible result_parts.append(highlighted_html) - if DEBUG_TIMING: - print( - f"[READ-DEBUG] Build HTML: {(time.time() - t_build) * 1000:.1f}ms", - file=sys.stderr, - ) - # Add system reminder if present (after code, always visible) if system_reminder: escaped_reminder = escape_html(system_reminder) From d3192f57488387ff5d55828e3bc654b837e77309 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 23 Nov 2025 20:49:42 +0100 Subject: [PATCH 09/14] Remove unused phase_timings dead code The phase_timings variable was initialized and populated but never actually used for output. This is leftover from the detailed timing instrumentation that was previously removed. - Remove unused phase_timings list initialization - Remove unused phase_start variable and timing calculation - All 253 unit tests passing --- claude_code_log/renderer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 8ef34eca..081cd9bf 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -2807,8 +2807,6 @@ def log_timing(phase: str) -> None: # Update current message UUID for timing tracking if DEBUG_TIMING: globals()["_current_msg_uuid"] = msg_uuid - phase_timings: List[tuple[str, float]] = [] - phase_start = time.time() # Skip summary messages - they should already be attached to their sessions if isinstance(message, SummaryTranscriptEntry): @@ -2818,10 +2816,6 @@ def log_timing(phase: str) -> None: if isinstance(message, QueueOperationTranscriptEntry): continue - if DEBUG_TIMING: - phase_timings.append(("initial_checks", time.time() - phase_start)) - phase_start = time.time() - # Handle system messages separately if isinstance(message, SystemTranscriptEntry): session_id = getattr(message, "sessionId", "unknown") From bb43a96dfc2281b586de13ec6ec1fed3d286f9d5 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Mon, 24 Nov 2025 01:16:41 +0100 Subject: [PATCH 10/14] Refactor timing instrumentation and optimize tool_use_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract timing utilities to renderer_timings.py module - Standardize log_timing pattern with work inside context managers - Use lambda expressions for dynamic phase names - Remove _define_tool_use_context() pre-processing pass - Change tool_use_context to Dict[str, ToolUseContent] for type safety - Build tool_use_context inline during message processing - Replace dead code in render_message_content with warnings - All tests passing, 0 type errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- claude_code_log/renderer.py | 615 ++++++++++++---------------- claude_code_log/renderer_timings.py | 153 +++++++ 2 files changed, 415 insertions(+), 353 deletions(-) create mode 100644 claude_code_log/renderer_timings.py diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 081cd9bf..ff12e605 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -4,7 +4,6 @@ import json import os import re -import sys import time from pathlib import Path from typing import List, Optional, Dict, Any, cast, TYPE_CHECKING @@ -16,7 +15,7 @@ import mistune from jinja2 import Environment, FileSystemLoader, select_autoescape from pygments import highlight # type: ignore[reportUnknownVariableType] -from pygments.lexers import get_lexer_for_filename, TextLexer # type: ignore[reportUnknownVariableType] +from pygments.lexers import TextLexer # type: ignore[reportUnknownVariableType] from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] @@ -29,6 +28,8 @@ from .models import ( TranscriptEntry, + AssistantTranscriptEntry, + SystemTranscriptEntry, SummaryTranscriptEntry, QueueOperationTranscriptEntry, ContentItem, @@ -48,6 +49,12 @@ should_use_as_session_starter, create_session_preview, ) +from .renderer_timings import ( + report_timing_statistics, + timing_stat, + set_timing_var, + log_timing, +) from .cache import get_library_version @@ -243,16 +250,7 @@ def block_code(code: str, info: Optional[str] = None) -> str: wrapcode=True, ) # Track Pygments timing if enabled - if DEBUG_TIMING: - t_start = time.time() - result = str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] - duration = time.time() - t_start - if "_pygments_timings" in globals(): - globals()["_pygments_timings"].append( - (duration, globals().get("_current_msg_uuid", "")) - ) - return result - else: + with timing_stat("_pygments_timings"): return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] else: # No language hint, use default rendering @@ -266,34 +264,22 @@ def block_code(code: str, info: Optional[str] = None) -> str: def render_markdown(text: str) -> str: """Convert markdown text to HTML using mistune with Pygments syntax highlighting.""" # Track markdown rendering time if enabled - if DEBUG_TIMING: - t_start = time.time() - - # Configure mistune with GitHub-flavored markdown features - renderer = mistune.create_markdown( - plugins=[ - "strikethrough", - "footnotes", - "table", - "url", - "task_lists", - "def_list", - _create_pygments_plugin(), - ], - escape=False, # Don't escape HTML since we want to render markdown properly - hard_wrap=True, # Line break for newlines (checklists in Assistant messages) - ) - result = str(renderer(text)) - - # Record timing if enabled - if DEBUG_TIMING: - duration = time.time() - t_start - if "_markdown_timings" in globals(): - globals()["_markdown_timings"].append( - (duration, globals().get("_current_msg_uuid", "")) - ) - - return result + with timing_stat("_markdown_timings"): + # Configure mistune with GitHub-flavored markdown features + renderer = mistune.create_markdown( + plugins=[ + "strikethrough", + "footnotes", + "table", + "url", + "task_lists", + "def_list", + _create_pygments_plugin(), + ], + escape=False, # Don't escape HTML since we want to render markdown properly + hard_wrap=True, # Line break for newlines (checklists in Assistant messages) + ) + return str(renderer(text)) def extract_command_info(text_content: str) -> tuple[str, str, str]: @@ -483,16 +469,7 @@ def _highlight_code_with_pygments( ) # Highlight the code with timing if enabled - if DEBUG_TIMING: - t_start = time.time() - result = str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] - duration = time.time() - t_start - if "_pygments_timings" in globals(): - globals()["_pygments_timings"].append( - (duration, globals().get("_current_msg_uuid", "")) - ) - return result - else: + with timing_stat("_pygments_timings"): return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] @@ -1501,46 +1478,27 @@ def render_message_content(content: List[ContentItem], message_type: str) -> str elif type(item) is ToolUseContent or ( hasattr(item, "type") and item_type == "tool_use" ): - # Handle both ToolUseContent and Anthropic ToolUseBlock - # Convert Anthropic type to our format if necessary - if not isinstance(item, ToolUseContent): - # Create a ToolUseContent from Anthropic ToolUseBlock - tool_use_item = ToolUseContent( - type="tool_use", - id=getattr(item, "id", ""), - name=getattr(item, "name", ""), - input=getattr(item, "input", {}), - ) - else: - tool_use_item = item - rendered_parts.append(format_tool_use_content(tool_use_item)) # type: ignore + # Tool use items should not appear here - they are filtered out before this function + print( + "Warning: tool_use content should not be processed in render_message_content", + flush=True, + ) elif type(item) is ToolResultContent or ( hasattr(item, "type") and item_type == "tool_result" ): - # Handle both ToolResultContent and Anthropic types - if not isinstance(item, ToolResultContent): - # Convert from Anthropic type if needed - tool_result_item = ToolResultContent( - type="tool_result", - tool_use_id=getattr(item, "tool_use_id", ""), - content=getattr(item, "content", ""), - is_error=getattr(item, "is_error", False), - ) - else: - tool_result_item = item - rendered_parts.append(format_tool_result_content(tool_result_item)) # type: ignore + # Tool result items should not appear here - they are filtered out before this function + print( + "Warning: tool_result content should not be processed in render_message_content", + flush=True, + ) elif type(item) is ThinkingContent or ( hasattr(item, "type") and item_type == "thinking" ): - # Handle both ThinkingContent and Anthropic ThinkingBlock - if not isinstance(item, ThinkingContent): - # Convert from Anthropic type if needed - thinking_item = ThinkingContent( - type="thinking", thinking=getattr(item, "thinking", str(item)) - ) - else: - thinking_item = item - rendered_parts.append(format_thinking_content(thinking_item)) # type: ignore + # Thinking items should not appear here - they are filtered out before this function + print( + "Warning: thinking content should not be processed in render_message_content", + flush=True, + ) elif type(item) is ImageContent: rendered_parts.append(format_image_content(item)) # type: ignore @@ -2636,98 +2594,200 @@ def generate_html( """Generate HTML from transcript messages using Jinja2 templates.""" # Performance timing t_start = time.time() - t_last = t_start - - def log_timing(phase: str) -> None: - """Log timing for a phase if DEBUG_TIMING is enabled.""" - nonlocal t_last - if DEBUG_TIMING: - t_now = time.time() - phase_time = t_now - t_last - total_time = t_now - t_start - print( - f"[TIMING] {phase:40s} {phase_time:8.3f}s (total: {total_time:8.3f}s)", - flush=True, - ) - t_last = t_now - - if not title: - title = "Claude Transcript" - log_timing("Initialization") + with log_timing("Initialization", t_start): + if not title: + title = "Claude Transcript" # Deduplicate messages by (message_type, timestamp) # Messages with the exact same timestamp are duplicates by definition - # the differences (like IDE selection tags) are just logging artifacts - from claude_code_log.models import AssistantTranscriptEntry, SystemTranscriptEntry + with log_timing( + lambda: f"Deduplication ({len(deduplicated_messages)} messages)", t_start + ): + # Track seen (message_type, timestamp) pairs + seen: set[tuple[str, str]] = set() + deduplicated_messages: List[TranscriptEntry] = [] - # Track seen (message_type, timestamp) pairs - seen: set[tuple[str, str]] = set() - deduplicated_messages: List[TranscriptEntry] = [] + for message in messages: + # Get basic message type + message_type = getattr(message, "type", "unknown") - for message in messages: - # Get basic message type - message_type = getattr(message, "type", "unknown") + # For system messages, include level to differentiate info/warning/error + if isinstance(message, SystemTranscriptEntry): + level = getattr(message, "level", "info") + message_type = f"system-{level}" - # For system messages, include level to differentiate info/warning/error - if isinstance(message, SystemTranscriptEntry): - level = getattr(message, "level", "info") - message_type = f"system-{level}" + # Get timestamp + timestamp = getattr(message, "timestamp", "") - # Get timestamp - timestamp = getattr(message, "timestamp", "") + # Create deduplication key + dedup_key = (message_type, timestamp) - # Create deduplication key - dedup_key = (message_type, timestamp) + # Keep only first occurrence + if dedup_key not in seen: + seen.add(dedup_key) + deduplicated_messages.append(message) - # Keep only first occurrence - if dedup_key not in seen: - seen.add(dedup_key) - deduplicated_messages.append(message) + messages = deduplicated_messages - messages = deduplicated_messages + # Pre-process to find and attach session summaries + with log_timing("Session summary processing", t_start): + session_summaries: Dict[str, str] = {} + uuid_to_session: Dict[str, str] = {} + uuid_to_session_backup: Dict[str, str] = {} + + # Build mapping from message UUID to session ID + for message in messages: + if hasattr(message, "uuid") and hasattr(message, "sessionId"): + message_uuid = getattr(message, "uuid", "") + session_id = getattr(message, "sessionId", "") + if message_uuid and session_id: + # There is often duplication, in that case we want to prioritise the assistant + # message because summaries are generated from Claude's (last) success message + if type(message) is AssistantTranscriptEntry: + uuid_to_session[message_uuid] = session_id + else: + uuid_to_session_backup[message_uuid] = session_id + + # Map summaries to sessions via leafUuid -> message UUID -> session ID + for message in messages: + if isinstance(message, SummaryTranscriptEntry): + leaf_uuid = message.leafUuid + if leaf_uuid in uuid_to_session: + session_summaries[uuid_to_session[leaf_uuid]] = message.summary + elif ( + leaf_uuid in uuid_to_session_backup + and uuid_to_session_backup[leaf_uuid] not in session_summaries + ): + session_summaries[uuid_to_session_backup[leaf_uuid]] = ( + message.summary + ) - log_timing(f"Deduplication ({len(messages)} messages)") + # Attach summaries to messages + for message in messages: + if hasattr(message, "sessionId"): + session_id = getattr(message, "sessionId", "") + if session_id in session_summaries: + setattr(message, "_session_summary", session_summaries[session_id]) - # Pre-process to find and attach session summaries - session_summaries: Dict[str, str] = {} - uuid_to_session: Dict[str, str] = {} - uuid_to_session_backup: Dict[str, str] = {} + # Process messages through the main rendering loop + template_messages, sessions, session_order = _process_messages_loop(messages) - # Build mapping from message UUID to session ID - for message in messages: - if hasattr(message, "uuid") and hasattr(message, "sessionId"): - message_uuid = getattr(message, "uuid", "") - session_id = getattr(message, "sessionId", "") - if message_uuid and session_id: - # There is often duplication, in that case we want to prioritise the assistant - # message because summaries are generated from Claude's (last) success message - if type(message) is AssistantTranscriptEntry: - uuid_to_session[message_uuid] = session_id + # Prepare session navigation data + session_nav: List[Dict[str, Any]] = [] + with log_timing( + lambda: f"Session navigation building ({len(session_nav)} sessions)", t_start + ): + for session_id in session_order: + session_info = sessions[session_id] + + # Format timestamp range + first_ts = session_info["first_timestamp"] + last_ts = session_info["last_timestamp"] + timestamp_range = "" + if first_ts and last_ts: + if first_ts == last_ts: + timestamp_range = format_timestamp(first_ts) else: - uuid_to_session_backup[message_uuid] = session_id + timestamp_range = ( + f"{format_timestamp(first_ts)} - {format_timestamp(last_ts)}" + ) + elif first_ts: + timestamp_range = format_timestamp(first_ts) - # Map summaries to sessions via leafUuid -> message UUID -> session ID - for message in messages: - if isinstance(message, SummaryTranscriptEntry): - leaf_uuid = message.leafUuid - if leaf_uuid in uuid_to_session: - session_summaries[uuid_to_session[leaf_uuid]] = message.summary - elif ( - leaf_uuid in uuid_to_session_backup - and uuid_to_session_backup[leaf_uuid] not in session_summaries - ): - session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary + # Format token usage summary + token_summary = "" + total_input = session_info["total_input_tokens"] + total_output = session_info["total_output_tokens"] + total_cache_creation = session_info["total_cache_creation_tokens"] + total_cache_read = session_info["total_cache_read_tokens"] + + if total_input > 0 or total_output > 0: + token_parts: List[str] = [] + if total_input > 0: + token_parts.append(f"Input: {total_input}") + if total_output > 0: + token_parts.append(f"Output: {total_output}") + if total_cache_creation > 0: + token_parts.append(f"Cache Creation: {total_cache_creation}") + if total_cache_read > 0: + token_parts.append(f"Cache Read: {total_cache_read}") + token_summary = "Token usage – " + " | ".join(token_parts) + + session_nav.append( + { + "id": session_id, + "summary": session_info["summary"], + "timestamp_range": timestamp_range, + "first_timestamp": first_ts, + "last_timestamp": last_ts, + "message_count": session_info["message_count"], + "first_user_message": session_info["first_user_message"] + if session_info["first_user_message"] != "" + else "[No user message found in session.]", + "token_summary": token_summary, + } + ) - # Attach summaries to messages - for message in messages: - if hasattr(message, "sessionId"): - session_id = getattr(message, "sessionId", "") - if session_id in session_summaries: - setattr(message, "_session_summary", session_summaries[session_id]) + # Identify and mark paired messages (command+output, tool_use+tool_result, etc.) + with log_timing("Identify message pairs", t_start): + _identify_message_pairs(template_messages) + + # Reorder messages so pairs are adjacent while preserving chronological order + with log_timing("Reorder paired messages", t_start): + template_messages = _reorder_paired_messages(template_messages) + + # Mark messages that have children for fold/unfold controls + with log_timing("Mark messages with children", t_start): + _mark_messages_with_children(template_messages) + + # Render template + with log_timing("Template environment setup", t_start): + env = _get_template_environment() + template = env.get_template("transcript.html") + + with log_timing(lambda: f"Template rendering ({len(html_output)} chars)", t_start): + html_output = str( + template.render( + title=title, + messages=template_messages, + sessions=session_nav, + combined_transcript_link=combined_transcript_link, + library_version=get_library_version(), + ) + ) + + return html_output - log_timing("Session summary processing") +def _process_messages_loop( + messages: List[TranscriptEntry], +) -> tuple[ + List[TemplateMessage], + Dict[str, Dict[str, Any]], # sessions + List[str], # session_order +]: + """Process messages through the main rendering loop. + + This function handles the core message processing logic: + - Processes each message into template-friendly format + - Tracks sessions and token usage + - Handles message deduplication and hierarchy + - Collects timing statistics + + Note: Tool use context must be built before calling this function via + _define_tool_use_context() + + Args: + messages: List of transcript entries to process + + Returns: + Tuple containing: + - template_messages: Processed messages ready for template rendering + - sessions: Session metadata dict mapping session_id to info + - session_order: List of session IDs in chronological order + """ # Group messages by session and collect session info for navigation sessions: Dict[str, Dict[str, Any]] = {} session_order: List[str] = [] @@ -2738,36 +2798,9 @@ def log_timing(phase: str) -> None: # Track which messages should show token usage (first occurrence of each requestId) show_tokens_for_message: set[str] = set() - # Build mapping of tool_use_id to tool info for specialized tool result rendering - tool_use_context: Dict[str, Dict[str, Any]] = {} - for message in messages: - if hasattr(message, "message") and hasattr(message.message, "content"): # type: ignore - content = message.message.content # type: ignore - if isinstance(content, list): - for item in content: # type: ignore[reportUnknownVariableType] - # Check if it's a tool_use item - if hasattr(item, "type") and hasattr(item, "id"): # type: ignore[reportUnknownArgumentType] - item_type = getattr(item, "type", None) # type: ignore[reportUnknownArgumentType] - if item_type == "tool_use": - tool_id = getattr(item, "id", "") # type: ignore[reportUnknownArgumentType] - tool_name = getattr(item, "name", "") # type: ignore[reportUnknownArgumentType] - tool_input = getattr(item, "input", {}) # type: ignore[reportUnknownArgumentType] - if tool_id: - tool_ctx: Dict[str, Any] = { - "name": tool_name, - "input": tool_input, - } - # For Task tools, store the prompt for comparison - if tool_name == "Task" and isinstance(tool_input, dict): - prompt_value = tool_input.get("prompt", "") # type: ignore[reportUnknownVariableType, reportUnknownMemberType] - tool_ctx["prompt"] = ( - prompt_value - if isinstance(prompt_value, str) - else "" - ) - tool_use_context[tool_id] = tool_ctx - - log_timing(f"Tool use context building ({len(tool_use_context)} tools)") + # Build mapping of tool_use_id to ToolUseContent for specialized tool result rendering + # This will be populated inline as we encounter tool_use items during message processing + tool_use_context: Dict[str, ToolUseContent] = {} # Process messages into template-friendly format template_messages: List[TemplateMessage] = [] @@ -2793,11 +2826,10 @@ def log_timing(phase: str) -> None: markdown_timings: List[tuple[float, str]] = [] # (duration, context_uuid) pygments_timings: List[tuple[float, str]] = [] # (duration, context_uuid) - # Make timing lists available to nested functions via globals - if DEBUG_TIMING: - globals()["_markdown_timings"] = markdown_timings - globals()["_pygments_timings"] = pygments_timings - globals()["_current_msg_uuid"] = "" + # Initialize timing tracking + set_timing_var("_markdown_timings", markdown_timings) + set_timing_var("_pygments_timings", pygments_timings) + set_timing_var("_current_msg_uuid", "") for msg_idx, message in enumerate(messages): msg_start_time = time.time() if DEBUG_TIMING else 0.0 @@ -2805,8 +2837,7 @@ def log_timing(phase: str) -> None: msg_uuid = getattr(message, "uuid", f"no-uuid-{msg_idx}") # Update current message UUID for timing tracking - if DEBUG_TIMING: - globals()["_current_msg_uuid"] = msg_uuid + set_timing_var("_current_msg_uuid", msg_uuid) # Skip summary messages - they should already be attached to their sessions if isinstance(message, SummaryTranscriptEntry): @@ -3198,32 +3229,35 @@ def log_timing(phase: str) -> None: if isinstance(tool_item, ToolUseContent) or item_type == "tool_use": # Convert Anthropic type to our format if necessary if not isinstance(tool_item, ToolUseContent): - tool_use_converted = ToolUseContent( + tool_use = ToolUseContent( type="tool_use", id=getattr(tool_item, "id", ""), name=getattr(tool_item, "name", ""), input=getattr(tool_item, "input", {}), ) else: - tool_use_converted = tool_item + tool_use = tool_item - tool_content_html = format_tool_use_content(tool_use_converted) - escaped_name = escape_html(tool_use_converted.name) - escaped_id = escape_html(tool_use_converted.id) - item_tool_use_id = tool_use_converted.id + tool_content_html = format_tool_use_content(tool_use) + escaped_name = escape_html(tool_use.name) + escaped_id = escape_html(tool_use.id) + item_tool_use_id = tool_use.id tool_title_hint = f"ID: {escaped_id}" + # Populate tool_use_context for later use when processing tool results + tool_use_context[item_tool_use_id] = tool_use + # Get summary for header (description or filepath) - summary = get_tool_summary(tool_use_converted) + summary = get_tool_summary(tool_use) # Set message_type (for CSS/logic) and message_title (for display) tool_message_type = "tool_use" - if tool_use_converted.name == "TodoWrite": + if tool_use.name == "TodoWrite": tool_message_title = "📝 Todo List" - elif tool_use_converted.name == "Task": + elif tool_use.name == "Task": # Special handling for Task tool: show subagent_type and description - subagent_type = tool_use_converted.input.get("subagent_type", "") - description = tool_use_converted.input.get("description", "") + subagent_type = tool_use.input.get("subagent_type", "") + description = tool_use.input.get("description", "") escaped_subagent = ( escape_html(subagent_type) if subagent_type else "" ) @@ -3238,14 +3272,14 @@ def log_timing(phase: str) -> None: tool_message_title = f"🔧 {escaped_name} ({escaped_subagent})" else: tool_message_title = f"🔧 {escaped_name}" - elif tool_use_converted.name in ("Edit", "Write"): + elif tool_use.name in ("Edit", "Write"): # Use 📝 icon for Edit/Write if summary: escaped_summary = escape_html(summary) tool_message_title = f"📝 {escaped_name} {escaped_summary}" else: tool_message_title = f"📝 {escaped_name}" - elif tool_use_converted.name == "Read": + elif tool_use.name == "Read": # Use 📄 icon for Read if summary: escaped_summary = escape_html(summary) @@ -3275,14 +3309,20 @@ def log_timing(phase: str) -> None: result_file_path: Optional[str] = None result_tool_name: Optional[str] = None if tool_result_converted.tool_use_id in tool_use_context: - tool_ctx = tool_use_context[tool_result_converted.tool_use_id] - result_tool_name = tool_ctx.get("name") - if result_tool_name in ( - "Read", - "Edit", - "Write", - ) and "file_path" in tool_ctx.get("input", {}): - result_file_path = tool_ctx["input"]["file_path"] + tool_use_from_ctx = tool_use_context[ + tool_result_converted.tool_use_id + ] + result_tool_name = tool_use_from_ctx.name + if ( + result_tool_name + in ( + "Read", + "Edit", + "Write", + ) + and "file_path" in tool_use_from_ctx.input + ): + result_file_path = tool_use_from_ctx.input["file_path"] tool_content_html = format_tool_result_content( tool_result_converted, @@ -3418,149 +3458,18 @@ def log_timing(phase: str) -> None: message_timings.append((msg_duration, message_type, msg_idx, msg_uuid)) # Report loop statistics - if DEBUG_TIMING and message_timings: - # Sort by duration descending - sorted_timings = sorted(message_timings, key=lambda x: x[0], reverse=True) - - # Calculate statistics - total_msg_time = sum(t[0] for t in message_timings) - avg_time = total_msg_time / len(message_timings) - - # Report slowest messages - print("\n[TIMING] Loop statistics:", flush=True) - print(f"[TIMING] Total messages: {len(message_timings)}", flush=True) - print( - f"[TIMING] Average time per message: {avg_time * 1000:.1f}ms", flush=True - ) - print("[TIMING] Slowest 10 messages:", flush=True) - for duration, msg_type, idx, uuid in sorted_timings[:10]: - print( - f"[TIMING] Message {uuid} (#{idx}, {msg_type}): {duration * 1000:.1f}ms", - flush=True, - ) - - # Report markdown rendering statistics - if markdown_timings: - sorted_markdown = sorted(markdown_timings, key=lambda x: x[0], reverse=True) - total_markdown_time = sum(t[0] for t in markdown_timings) - print(f"\n[TIMING] Markdown rendering:", flush=True) - print(f"[TIMING] Total operations: {len(markdown_timings)}", flush=True) - print(f"[TIMING] Total time: {total_markdown_time:.3f}s", flush=True) - print(f"[TIMING] Slowest 10 operations:", flush=True) - for duration, uuid in sorted_markdown[:10]: - print( - f"[TIMING] {uuid}: {duration * 1000:.1f}ms", - flush=True, - ) - - # Report Pygments highlighting statistics - if pygments_timings: - sorted_pygments = sorted(pygments_timings, key=lambda x: x[0], reverse=True) - total_pygments_time = sum(t[0] for t in pygments_timings) - print(f"\n[TIMING] Pygments highlighting:", flush=True) - print(f"[TIMING] Total operations: {len(pygments_timings)}", flush=True) - print(f"[TIMING] Total time: {total_pygments_time:.3f}s", flush=True) - print(f"[TIMING] Slowest 10 operations:", flush=True) - for duration, uuid in sorted_pygments[:10]: - print( - f"[TIMING] {uuid}: {duration * 1000:.1f}ms", - flush=True, - ) - - log_timing( - f"Main message processing loop ({len(template_messages)} template messages)" - ) - - # Prepare session navigation data - session_nav: List[Dict[str, Any]] = [] - for session_id in session_order: - session_info = sessions[session_id] - - # Format timestamp range - first_ts = session_info["first_timestamp"] - last_ts = session_info["last_timestamp"] - timestamp_range = "" - if first_ts and last_ts: - if first_ts == last_ts: - timestamp_range = format_timestamp(first_ts) - else: - timestamp_range = ( - f"{format_timestamp(first_ts)} - {format_timestamp(last_ts)}" - ) - elif first_ts: - timestamp_range = format_timestamp(first_ts) - - # Format token usage summary - token_summary = "" - total_input = session_info["total_input_tokens"] - total_output = session_info["total_output_tokens"] - total_cache_creation = session_info["total_cache_creation_tokens"] - total_cache_read = session_info["total_cache_read_tokens"] - - if total_input > 0 or total_output > 0: - token_parts: List[str] = [] - if total_input > 0: - token_parts.append(f"Input: {total_input}") - if total_output > 0: - token_parts.append(f"Output: {total_output}") - if total_cache_creation > 0: - token_parts.append(f"Cache Creation: {total_cache_creation}") - if total_cache_read > 0: - token_parts.append(f"Cache Read: {total_cache_read}") - token_summary = "Token usage – " + " | ".join(token_parts) - - session_nav.append( - { - "id": session_id, - "summary": session_info["summary"], - "timestamp_range": timestamp_range, - "first_timestamp": first_ts, - "last_timestamp": last_ts, - "message_count": session_info["message_count"], - "first_user_message": session_info["first_user_message"] - if session_info["first_user_message"] != "" - else "[No user message found in session.]", - "token_summary": token_summary, - } + if DEBUG_TIMING: + report_timing_statistics( + message_timings, + [("Markdown", markdown_timings), ("Pygments", pygments_timings)], ) - log_timing(f"Session navigation building ({len(session_nav)} sessions)") - - # Identify and mark paired messages (command+output, tool_use+tool_result, etc.) - _identify_message_pairs(template_messages) - - log_timing("Identify message pairs") - - # Reorder messages so pairs are adjacent while preserving chronological order - template_messages = _reorder_paired_messages(template_messages) - - log_timing("Reorder paired messages") - - # Mark messages that have children for fold/unfold controls - _mark_messages_with_children(template_messages) - - log_timing("Mark messages with children") - - # Render template - env = _get_template_environment() - template = env.get_template("transcript.html") - - log_timing("Template environment setup") - - html_output = str( - template.render( - title=title, - messages=template_messages, - sessions=session_nav, - combined_transcript_link=combined_transcript_link, - library_version=get_library_version(), - ) + return ( + template_messages, + sessions, + session_order, ) - log_timing(f"Template rendering ({len(html_output)} chars)") - - return html_output - def generate_projects_index_html( project_summaries: List[Dict[str, Any]], diff --git a/claude_code_log/renderer_timings.py b/claude_code_log/renderer_timings.py new file mode 100644 index 00000000..fa422506 --- /dev/null +++ b/claude_code_log/renderer_timings.py @@ -0,0 +1,153 @@ +"""Timing utilities for renderer performance profiling.""" + +import os +import time +from contextlib import contextmanager +from typing import List, Tuple, Iterator, Any, Dict, Callable, Union, Optional + +# Performance debugging +DEBUG_TIMING = os.getenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "").lower() in ( + "1", + "true", + "yes", +) + +# Global timing data storage +_timing_data: Dict[str, Any] = {} + + +def set_timing_var(name: str, value: Any) -> None: + """Set a timing variable in the global timing data dict. + + Args: + name: Variable name (e.g., "_markdown_timings", "_pygments_timings", "_current_msg_uuid") + value: Value to set + """ + if DEBUG_TIMING: + _timing_data[name] = value + + +@contextmanager +def log_timing( + phase: Union[str, Callable[[], str]], + t_start: Optional[float] = None, +) -> Iterator[None]: + """Context manager for logging phase timing. + + Args: + phase: Phase name (static string) or callable returning phase name (for dynamic names) + t_start: Optional start time for calculating total elapsed time + + Example: + # Static phase name + with log_timing("Initialization", t_start): + setup_code() + + # Dynamic phase name (evaluated at end) + with log_timing(lambda: f"Processing ({len(items)} items)", t_start): + items = process() + """ + if not DEBUG_TIMING: + yield + return + + t_phase_start = time.time() + + try: + yield + finally: + t_now = time.time() + phase_time = t_now - t_phase_start + + # Evaluate phase name (call if callable, use directly if string) + phase_name = phase() if callable(phase) else phase + + # Calculate total time if t_start provided + if t_start is not None: + total_time = t_now - t_start + print( + f"[TIMING] {phase_name:40s} {phase_time:8.3f}s (total: {total_time:8.3f}s)", + flush=True, + ) + else: + print( + f"[TIMING] {phase_name:40s} {phase_time:8.3f}s", + flush=True, + ) + + # Update last timing checkpoint + _timing_data["_t_last"] = t_now + + +@contextmanager +def timing_stat(list_name: str) -> Iterator[None]: + """Context manager for tracking timing statistics. + + Args: + list_name: Name of the timing list to append to + (e.g., "_markdown_timings", "_pygments_timings") + + Example: + with timing_stat("_pygments_timings"): + result = expensive_operation() + """ + if not DEBUG_TIMING: + yield + return + + t_start = time.time() + try: + yield + finally: + duration = time.time() - t_start + if list_name in _timing_data: + msg_uuid = _timing_data.get("_current_msg_uuid", "") + _timing_data[list_name].append((duration, msg_uuid)) + + +def report_timing_statistics( + message_timings: List[Tuple[float, str, int, str]], + operation_timings: List[Tuple[str, List[Tuple[float, str]]]], +) -> None: + """Report timing statistics for message rendering. + + Args: + message_timings: List of (duration, message_type, index, uuid) tuples + operation_timings: List of (name, timings) tuples where timings is a list of (duration, uuid) + e.g., [("Markdown", markdown_timings), ("Pygments", pygments_timings)] + """ + if not message_timings: + return + + # Sort by duration descending + sorted_timings = sorted(message_timings, key=lambda x: x[0], reverse=True) + + # Calculate statistics + total_msg_time = sum(t[0] for t in message_timings) + avg_time = total_msg_time / len(message_timings) + + # Report slowest messages + print("\n[TIMING] Loop statistics:", flush=True) + print(f"[TIMING] Total messages: {len(message_timings)}", flush=True) + print(f"[TIMING] Average time per message: {avg_time * 1000:.1f}ms", flush=True) + print("[TIMING] Slowest 10 messages:", flush=True) + for duration, msg_type, idx, uuid in sorted_timings[:10]: + print( + f"[TIMING] Message {uuid} (#{idx}, {msg_type}): {duration * 1000:.1f}ms", + flush=True, + ) + + # Report operation-specific statistics + for operation_name, timings in operation_timings: + if timings: + sorted_ops = sorted(timings, key=lambda x: x[0], reverse=True) + total_time = sum(t[0] for t in timings) + print(f"\n[TIMING] {operation_name} rendering:", flush=True) + print(f"[TIMING] Total operations: {len(timings)}", flush=True) + print(f"[TIMING] Total time: {total_time:.3f}s", flush=True) + print("[TIMING] Slowest 10 operations:", flush=True) + for duration, uuid in sorted_ops[:10]: + print( + f"[TIMING] {uuid}: {duration * 1000:.1f}ms", + flush=True, + ) From 9300840759641a6c9405960c56c90aacdcf110ab Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Mon, 24 Nov 2025 01:25:03 +0100 Subject: [PATCH 11/14] Move DEBUG_TIMING constant to renderer_timings module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Centralizes all timing-related configuration in renderer_timings.py - Adds improved documentation for DEBUG_TIMING environment variable - Removes top-level os import from renderer.py (no longer needed) - Imports DEBUG_TIMING from renderer_timings in renderer.py - All tests passing, 0 type errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- claude_code_log/renderer.py | 9 +-------- claude_code_log/renderer_timings.py | 9 +++++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index ff12e605..136c0d4a 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -19,13 +19,6 @@ from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] -# Performance debugging -DEBUG_TIMING = os.getenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "").lower() in ( - "1", - "true", - "yes", -) - from .models import ( TranscriptEntry, AssistantTranscriptEntry, @@ -50,6 +43,7 @@ create_session_preview, ) from .renderer_timings import ( + DEBUG_TIMING, report_timing_statistics, timing_stat, set_timing_var, @@ -397,7 +391,6 @@ def _highlight_code_with_pygments( # get_lexer_for_filename performs I/O operations (file existence checks, reading bytes) # which causes severe slowdowns, especially on Windows with antivirus scanning # Solution: Build a reverse mapping from filename patterns to lexer aliases using get_all_lexers() (done once) - import os import fnmatch from pygments.lexers import get_lexer_by_name, get_all_lexers # type: ignore[reportUnknownVariableType] diff --git a/claude_code_log/renderer_timings.py b/claude_code_log/renderer_timings.py index fa422506..fb5111cb 100644 --- a/claude_code_log/renderer_timings.py +++ b/claude_code_log/renderer_timings.py @@ -1,11 +1,16 @@ -"""Timing utilities for renderer performance profiling.""" +"""Timing utilities for renderer performance profiling. + +This module provides timing and performance profiling utilities for the renderer. +All timing-related configuration and functionality is centralized here. +""" import os import time from contextlib import contextmanager from typing import List, Tuple, Iterator, Any, Dict, Callable, Union, Optional -# Performance debugging +# Performance debugging - enabled via CLAUDE_CODE_LOG_DEBUG_TIMING environment variable +# Set to "1", "true", or "yes" to enable timing output DEBUG_TIMING = os.getenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "").lower() in ( "1", "true", From e7704138ebab02a1375e7ee8d3aa89b52095adad Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Tue, 25 Nov 2025 23:38:25 +0100 Subject: [PATCH 12/14] Simplify state transitions diagram with correct flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace confusing multi-path ASCII diagram with cleaner version showing: - A → B (▶) and A → C (▶▶) downward transitions - B ↔ C bidirectional (▶▶/▼▼) horizontal transition - B → A and C → A (▼) upward return transitions Uses consistent Unicode box drawing characters throughout. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- dev-docs/FOLD_STATE_DIAGRAM.md | 66 +++++++++++----------------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/dev-docs/FOLD_STATE_DIAGRAM.md b/dev-docs/FOLD_STATE_DIAGRAM.md index 31556320..fd3df528 100644 --- a/dev-docs/FOLD_STATE_DIAGRAM.md +++ b/dev-docs/FOLD_STATE_DIAGRAM.md @@ -43,52 +43,26 @@ The fold bar has two buttons with three possible states: ## State Transitions ``` - ┌─────────────────────────────────────┐ - │ State A (▶ ▶▶) │ - │ Nothing visible │ - └─────────────────────────────────────┘ - │ │ - Click ▶ │ │ Click ▶▶ - (unfold 1) │ │ (unfold all) - ▼ ▼ - ┌─────────────┐ ┌─────────────┐ - │ State B │ │ State B │ - │ (▼ ▶▶) │◄──┤ (▼ ▶▶) │ - │ First │ │ First │ - │ level │ │ level │ - │ visible │ │ visible │ - └─────────────┘ └─────────────┘ - │ │ - Click ▶▶ │ │ Click ▼ - (unfold all) │ │ (fold 1) - ▼ ▼ - ┌─────────────┐ ┌─────────────┐ - │ State C │ │ State A │ - │ (▼ ▼▼) │ │ (▶ ▶▶) │ - │ All levels │ │ Nothing │ - │ visible │ │ visible │ - └─────────────┘ └─────────────┘ - │ │ - Click ▼▼ │ │ Click ▶▶ - (fold all) │ │ (unfold all) - ▼ ▼ - ┌─────────────┐ ┌─────────────┐ - │ State B │ │ State C │ - │ (▼ ▶▶) │ │ (▼ ▼▼) │ - │ First │ │ All levels │ - │ level │ │ visible │ - │ visible │ │ │ - └─────────────┘ └─────────────┘ - │ - Click ▼ │ - (fold 1) │ - ▼ - ┌─────────────┐ - │ State A │ - │ (▶ ▶▶) │ - │ Nothing │ - │ visible │ - └─────────────┘ + ┌────────────────────────────────┐ + ┌────────►│ State A (▶ / ▶▶) │◄────────┐ + │ │ Nothing visible │ │ + │ └────────────────────────────────┘ │ + │ │ │ │ + │ Click ▶ │ │ Click ▶▶ │ + │ (unfold 1) │ │ (unfold all) │ + │ ▼ ▼ │ + │ ┌─────────────┐ ┌─────────────┐ │ + │ │ State B │ │ State C │ │ + │ │ (▼ / ▶▶) │ │ (▼ / ▼▼) │ │ + │ │ First │ │ All │ │ + │ │ level │ │ levels │ │ + │ │ visible │ │ visible │ │ + │ └─────────────┘ └─────────────┘ │ + │ │ │ │ │ │ + │ Click ▼│ └── ▶▶ ↔ ▼▼ ──┘ │Click ▼ │ + │ │ (unfold all / fold 1) │ │ + └─────────┘ └──────────┘ + (fold all) (fold all) ``` ## Simplified Transition Table From 0fc13f2ebe64008eedae8007eb43abc0ee31c773 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Wed, 26 Nov 2025 00:04:13 +0100 Subject: [PATCH 13/14] Fix Read tool preview truncation regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preview extraction was looking for multiple tags, but Pygments' HtmlFormatter(linenos="table") produces a single containing two s (one for line numbers, one for code). Since there's only one , the condition `len(tr_matches) >= 5` was always False, falling back to showing the full content without truncation. Fix: Add _truncate_highlighted_preview() helper that correctly truncates content within each
 tag (both linenos and code sections) to the
specified number of lines.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude 
---
 claude_code_log/renderer.py | 63 ++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
index 136c0d4a..7a87fdee 100644
--- a/claude_code_log/renderer.py
+++ b/claude_code_log/renderer.py
@@ -466,6 +466,49 @@ def _highlight_code_with_pygments(
         return str(highlight(code, lexer, formatter))  # type: ignore[reportUnknownArgumentType]
 
 
+def _truncate_highlighted_preview(highlighted_html: str, max_lines: int) -> str:
+    """Truncate Pygments highlighted HTML to first N lines.
+
+    HtmlFormatter(linenos="table") produces a single  with two s:
+      
LINE_NUMS
+
CODE
+ + We truncate content within each
 tag to the first max_lines lines.
+
+    Args:
+        highlighted_html: Full Pygments-highlighted HTML
+        max_lines: Maximum number of lines to include in preview
+
+    Returns:
+        Truncated HTML with same structure but fewer lines
+    """
+
+    def truncate_pre_content(match: re.Match[str]) -> str:
+        """Truncate content inside a 
 tag to max_lines."""
+        prefix, content, suffix = match.groups()
+        lines = content.split("\n")
+        truncated = "\n".join(lines[:max_lines])
+        return prefix + truncated + suffix
+
+    # Truncate linenos 
 content (line numbers separated by newlines)
+    result = re.sub(
+        r'(
)(.*?)(
)', + truncate_pre_content, + highlighted_html, + flags=re.DOTALL, + ) + + # Truncate code
 content
+    result = re.sub(
+        r'(
]*>)(.*?)(
)', + truncate_pre_content, + result, + flags=re.DOTALL, + ) + + return result + + def format_read_tool_content(tool_use: ToolUseContent) -> str: # noqa: ARG001 """Format Read tool use content showing file path. @@ -1068,21 +1111,11 @@ def format_tool_result_content( lines = code_content.split("\n") if len(lines) > 12: # Extract preview from already-highlighted HTML to avoid double-highlighting - # The highlighted HTML has structure:
...
- # Extract first ~5 rows - tr_matches = list( - re.finditer(r".*?", highlighted_html, re.DOTALL) - ) - - if len(tr_matches) >= 5: - # Get HTML up to and including the 5th - preview_end = tr_matches[4].end() - preview_html_fragment = highlighted_html[:preview_end] - # Close unclosed tags properly - preview_html = preview_html_fragment + "
" - else: - # Fallback if we can't extract rows (shouldn't happen) - preview_html = highlighted_html + # HtmlFormatter(linenos="table") produces a single with two s: + # ...
LINE_NUMS
... + # ...
CODE
... + # We truncate content within each
 to first 5 lines
+                preview_html = _truncate_highlighted_preview(highlighted_html, 5)
 
                 result_parts.append(f"""
                 
From 5504cdae82be21b231d3e41b950884d138ffa2c6 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Wed, 26 Nov 2025 00:35:45 +0100 Subject: [PATCH 14/14] Add regression tests for preview truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test coverage for the _truncate_highlighted_preview() function to prevent future regressions in collapsible code preview behavior. Tests verify: - Helper function correctly truncates Pygments table HTML structure - Edit tool results show truncated preview (5 lines) in collapsible blocks - Full content remains available in expanded section Test data: Real Edit tool use/result pair for renderer_timings.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- test/test_data/edit_tool.jsonl | 2 + test/test_preview_truncation.py | 129 ++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 test/test_data/edit_tool.jsonl create mode 100644 test/test_preview_truncation.py diff --git a/test/test_data/edit_tool.jsonl b/test/test_data/edit_tool.jsonl new file mode 100644 index 00000000..896e9d44 --- /dev/null +++ b/test/test_data/edit_tool.jsonl @@ -0,0 +1,2 @@ +{"parentUuid":"49306c7f-62f4-41ce-b0e7-b67a891148b7","isSidechain":false,"userType":"external","cwd":"e:\\Workspace\\src\\github\\claude-code-log","sessionId":"88a8d761-7b9a-4bf1-a8ca-c1febe6bf358","version":"2.0.46","gitBranch":"dev/fold-ui-horizontal","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01A76saVFjpeVZQmepohx5gy","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01PhNkTBr17qvahqhCdkUkvM","name":"Edit","input":{"file_path":"e:\\Workspace\\src\\github\\claude-code-log\\claude_code_log\\renderer_timings.py","old_string":"def set_timing_var(name: str, value: Any) -> None:\n \"\"\"Set a timing variable in the global timing data dict.\n\n Args:\n name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\")\n value: Value to set\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[name] = value\n\n\ndef set_current_msg_uuid(uuid: str) -> None:\n \"\"\"Set the current message UUID for timing tracking.\n\n Args:\n uuid: Message UUID\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[\"_current_msg_uuid\"] = uuid","new_string":"def set_timing_var(name: str, value: Any) -> None:\n \"\"\"Set a timing variable in the global timing data dict.\n\n Args:\n name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\", \"_current_msg_uuid\")\n value: Value to set\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[name] = value"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":0,"cache_creation_input_tokens":10391,"cache_read_input_tokens":102802,"cache_creation":{"ephemeral_5m_input_tokens":10391,"ephemeral_1h_input_tokens":0},"output_tokens":526,"service_tier":"standard"}},"requestId":"req_011CVRRU3KTqNrUhZ9K35Zd2","type":"assistant","uuid":"e450d9e3-ccb9-4595-a7cb-e13f13bab5a0","timestamp":"2025-11-23T20:06:04.851Z"} +{"parentUuid":"e450d9e3-ccb9-4595-a7cb-e13f13bab5a0","isSidechain":false,"userType":"external","cwd":"e:\\Workspace\\src\\github\\claude-code-log","sessionId":"88a8d761-7b9a-4bf1-a8ca-c1febe6bf358","version":"2.0.46","gitBranch":"dev/fold-ui-horizontal","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01PhNkTBr17qvahqhCdkUkvM","type":"tool_result","content":"The file e:\\Workspace\\src\\github\\claude-code-log\\claude_code_log\\renderer_timings.py has been updated. Here's the result of running `cat -n` on a snippet of the edited file:\n 15→# Global timing data storage\n 16→_timing_data: Dict[str, Any] = {}\n 17→\n 18→\n 19→def set_timing_var(name: str, value: Any) -> None:\n 20→ \"\"\"Set a timing variable in the global timing data dict.\n 21→\n 22→ Args:\n 23→ name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\", \"_current_msg_uuid\")\n 24→ value: Value to set\n 25→ \"\"\"\n 26→ if DEBUG_TIMING:\n 27→ _timing_data[name] = value\n 28→\n 29→\n 30→@contextmanager\n 31→def timing_stat(list_name: str) -> Iterator[None]:"}]},"uuid":"46b5acca-1860-4ae3-be78-ed3537a81d8b","timestamp":"2025-11-23T20:06:14.160Z","toolUseResult":{"filePath":"e:\\Workspace\\src\\github\\claude-code-log\\claude_code_log\\renderer_timings.py","oldString":"def set_timing_var(name: str, value: Any) -> None:\n \"\"\"Set a timing variable in the global timing data dict.\n\n Args:\n name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\")\n value: Value to set\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[name] = value\n\n\ndef set_current_msg_uuid(uuid: str) -> None:\n \"\"\"Set the current message UUID for timing tracking.\n\n Args:\n uuid: Message UUID\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[\"_current_msg_uuid\"] = uuid","newString":"def set_timing_var(name: str, value: Any) -> None:\n \"\"\"Set a timing variable in the global timing data dict.\n\n Args:\n name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\", \"_current_msg_uuid\")\n value: Value to set\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[name] = value","originalFile":"\"\"\"Timing utilities for renderer performance profiling.\"\"\"\n\nimport os\nimport time\nfrom contextlib import contextmanager\nfrom typing import List, Tuple, Iterator, Any, Dict\n\n# Performance debugging\nDEBUG_TIMING = os.getenv(\"CLAUDE_CODE_LOG_DEBUG_TIMING\", \"\").lower() in (\n \"1\",\n \"true\",\n \"yes\",\n)\n\n# Global timing data storage\n_timing_data: Dict[str, Any] = {}\n\n\ndef set_timing_var(name: str, value: Any) -> None:\n \"\"\"Set a timing variable in the global timing data dict.\n\n Args:\n name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\")\n value: Value to set\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[name] = value\n\n\ndef set_current_msg_uuid(uuid: str) -> None:\n \"\"\"Set the current message UUID for timing tracking.\n\n Args:\n uuid: Message UUID\n \"\"\"\n if DEBUG_TIMING:\n _timing_data[\"_current_msg_uuid\"] = uuid\n\n\n@contextmanager\ndef timing_stat(list_name: str) -> Iterator[None]:\n \"\"\"Context manager for tracking timing statistics.\n\n Args:\n list_name: Name of the timing list to append to\n (e.g., \"_markdown_timings\", \"_pygments_timings\")\n\n Example:\n with timing_stat(\"_pygments_timings\"):\n result = expensive_operation()\n \"\"\"\n if not DEBUG_TIMING:\n yield\n return\n\n t_start = time.time()\n try:\n yield\n finally:\n duration = time.time() - t_start\n if list_name in _timing_data:\n msg_uuid = _timing_data.get(\"_current_msg_uuid\", \"\")\n _timing_data[list_name].append((duration, msg_uuid))\n\n\ndef report_timing_statistics(\n message_timings: List[Tuple[float, str, int, str]],\n markdown_timings: List[Tuple[float, str]],\n pygments_timings: List[Tuple[float, str]],\n) -> None:\n \"\"\"Report timing statistics for message rendering.\n\n Args:\n message_timings: List of (duration, message_type, index, uuid) tuples\n markdown_timings: List of (duration, uuid) tuples for markdown rendering\n pygments_timings: List of (duration, uuid) tuples for Pygments highlighting\n \"\"\"\n if not message_timings:\n return\n\n # Sort by duration descending\n sorted_timings = sorted(message_timings, key=lambda x: x[0], reverse=True)\n\n # Calculate statistics\n total_msg_time = sum(t[0] for t in message_timings)\n avg_time = total_msg_time / len(message_timings)\n\n # Report slowest messages\n print(\"\\n[TIMING] Loop statistics:\", flush=True)\n print(f\"[TIMING] Total messages: {len(message_timings)}\", flush=True)\n print(f\"[TIMING] Average time per message: {avg_time * 1000:.1f}ms\", flush=True)\n print(\"[TIMING] Slowest 10 messages:\", flush=True)\n for duration, msg_type, idx, uuid in sorted_timings[:10]:\n print(\n f\"[TIMING] Message {uuid} (#{idx}, {msg_type}): {duration * 1000:.1f}ms\",\n flush=True,\n )\n\n # Report markdown rendering statistics\n if markdown_timings:\n sorted_markdown = sorted(markdown_timings, key=lambda x: x[0], reverse=True)\n total_markdown_time = sum(t[0] for t in markdown_timings)\n print(f\"\\n[TIMING] Markdown rendering:\", flush=True)\n print(f\"[TIMING] Total operations: {len(markdown_timings)}\", flush=True)\n print(f\"[TIMING] Total time: {total_markdown_time:.3f}s\", flush=True)\n print(f\"[TIMING] Slowest 10 operations:\", flush=True)\n for duration, uuid in sorted_markdown[:10]:\n print(\n f\"[TIMING] {uuid}: {duration * 1000:.1f}ms\",\n flush=True,\n )\n\n # Report Pygments highlighting statistics\n if pygments_timings:\n sorted_pygments = sorted(pygments_timings, key=lambda x: x[0], reverse=True)\n total_pygments_time = sum(t[0] for t in pygments_timings)\n print(f\"\\n[TIMING] Pygments highlighting:\", flush=True)\n print(f\"[TIMING] Total operations: {len(pygments_timings)}\", flush=True)\n print(f\"[TIMING] Total time: {total_pygments_time:.3f}s\", flush=True)\n print(f\"[TIMING] Slowest 10 operations:\", flush=True)\n for duration, uuid in sorted_pygments[:10]:\n print(\n f\"[TIMING] {uuid}: {duration * 1000:.1f}ms\",\n flush=True,\n )\n","structuredPatch":[{"oldStart":20,"oldLines":23,"newStart":20,"newLines":13,"lines":[" \"\"\"Set a timing variable in the global timing data dict."," "," Args:","- name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\")","+ name: Variable name (e.g., \"_markdown_timings\", \"_pygments_timings\", \"_current_msg_uuid\")"," value: Value to set"," \"\"\""," if DEBUG_TIMING:"," _timing_data[name] = value"," "," ","-def set_current_msg_uuid(uuid: str) -> None:","- \"\"\"Set the current message UUID for timing tracking.","-","- Args:","- uuid: Message UUID","- \"\"\"","- if DEBUG_TIMING:","- _timing_data[\"_current_msg_uuid\"] = uuid","-","-"," @contextmanager"," def timing_stat(list_name: str) -> Iterator[None]:"," \"\"\"Context manager for tracking timing statistics."]}],"userModified":false,"replaceAll":false}} \ No newline at end of file diff --git a/test/test_preview_truncation.py b/test/test_preview_truncation.py new file mode 100644 index 00000000..0ccb45c4 --- /dev/null +++ b/test/test_preview_truncation.py @@ -0,0 +1,129 @@ +"""Tests for code preview truncation in tool results. + +Regression test for the bug where Pygments highlighted code previews +weren't truncated because the code assumed multiple rows per line, +but HtmlFormatter(linenos="table") produces a single with two s. +""" + +from pathlib import Path + +from claude_code_log.parser import load_transcript +from claude_code_log.renderer import generate_html, _truncate_highlighted_preview + + +class TestPreviewTruncation: + """Tests for preview truncation in collapsible code blocks.""" + + def test_truncate_highlighted_preview_function(self): + """Test the _truncate_highlighted_preview helper directly.""" + # Simulate Pygments output with 10 lines + html = """
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10
+
""" + + # Truncate to 5 lines + result = _truncate_highlighted_preview(html, 5) + + # Should have lines 1-5 in linenos + assert ' 1' in result + assert ' 5' in result + # Should NOT have lines 6-10 + assert ' 6' not in result + assert '10' not in result + + # Should have lines 1-5 in code + assert "line 1" in result + assert "line 5" in result + # Should NOT have lines 6-10 + assert "line 6" not in result + assert "line 10" not in result + + def test_edit_tool_result_preview_truncation(self): + """Test that Edit tool results have truncated previews in collapsible blocks. + + Regression test for: Preview extraction was looking for multiple tags, + but Pygments produces a single with two s, so the fallback showed + full content instead of truncated preview. + """ + test_data_path = Path(__file__).parent / "test_data" / "edit_tool.jsonl" + + messages = load_transcript(test_data_path) + html = generate_html(messages, "Edit Tool Test") + + # The Edit tool result has 17 lines (>12), so should be collapsible + assert "collapsible-code" in html, "Should have collapsible code block" + + # Find the preview content section + assert "preview-content" in html, "Should have preview content" + + # The preview should only show first 5 lines, not all 17 + # Line 15 is "_timing_data: Dict[str, Any] = {}" - should be in preview + # Line 31 is 'def timing_stat(list_name: str) -> Iterator[None]:"' - should NOT be in preview + + # Extract preview content (between preview-content div tags) + import re + + preview_match = re.search( + r"
(.*?)
\s*", + html, + re.DOTALL, + ) + assert preview_match, "Should find preview-content div" + preview_html = preview_match.group(1) + + # Preview should have early lines (within first 5) + # Line 15 (line 1 of snippet): "_timing_data" + assert "_timing_data" in preview_html, "Preview should contain line 15 content" + + # Preview should NOT have later lines (beyond first 5) + # Line 26 (line 12 of snippet): "if DEBUG_TIMING:" + # Note: Pygments wraps tokens in tags, so check for identifier + assert "DEBUG_TIMING" not in preview_html, ( + "Preview should NOT contain line 26 content (beyond 5 lines)" + ) + + # Line 30-31 (line 16-17 of snippet): "@contextmanager" + assert "contextmanager" not in preview_html, ( + "Preview should NOT contain line 30 content (beyond 5 lines)" + ) + + def test_full_content_still_available(self): + """Test that full content is still available in the expanded section.""" + test_data_path = Path(__file__).parent / "test_data" / "edit_tool.jsonl" + + messages = load_transcript(test_data_path) + html = generate_html(messages, "Edit Tool Test") + + # The full content section should have all lines + import re + + full_match = re.search( + r"
(.*?)
\s*
", + html, + re.DOTALL, + ) + assert full_match, "Should find code-full div" + full_html = full_match.group(1) + + # Full content should have both early and late lines + # Note: Pygments wraps tokens in tags, so we check for the identifier + assert "_timing_data" in full_html, "Full content should contain line 15" + assert "DEBUG_TIMING" in full_html, "Full content should contain line 26" + assert "contextmanager" in full_html, "Full content should contain line 30"