daaain · daaain · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025
diff --git a/README.md b/README.md
@@ -393,3 +393,6 @@ uv run claude-code-log
 - figure out minimum Python version and introduce a testing matrix
 - add minimalist theme and make it light + dark; animate gradient background in fancy theme
 - do we need special handling for hooks?
+- make processing parallel, currently we only use 1 CPU (core) and it's slow
+- migrate cache from JSON files to SQLite to make it faster and more versatile for downstream tasks and analytics
+- add more realistic fixtures reproducing the log directory structure for more thorough testing
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
@@ -299,12 +299,21 @@ def _update_cache_with_session_data(
                             usage.cache_read_input_tokens
                         )
 
-    # Update cache with session data
-    cache_manager.update_session_cache(sessions_cache_data)
+    # Filter out warmup-only sessions before caching
+    from .utils import is_warmup_only_session
+
+    filtered_sessions_cache_data = {
+        session_id: session_data
+        for session_id, session_data in sessions_cache_data.items()
+        if not is_warmup_only_session(messages, session_id)
+    }
+
+    # Update cache with filtered session data
+    cache_manager.update_session_cache(filtered_sessions_cache_data)
 
     # Update cache with working directories
     cache_manager.update_working_directories(
-        extract_working_directories(list(sessions_cache_data.values()))
+        extract_working_directories(list(filtered_sessions_cache_data.values()))
     )
 
     # Update cache with project aggregates
@@ -452,6 +461,8 @@ def _generate_individual_session_files(
     cache_was_updated: bool = False,
 ) -> None:
     """Generate individual HTML files for each session."""
+    from .utils import is_warmup_only_session
+
     # Find all unique session IDs
     session_ids: set[str] = set()
     for message in messages:
@@ -460,6 +471,13 @@ def _generate_individual_session_files(
             if session_id:
                 session_ids.add(session_id)
 
+    # Filter out warmup-only sessions
+    session_ids = {
+        session_id
+        for session_id in session_ids
+        if not is_warmup_only_session(messages, session_id)
+    }
+
     # Get session data from cache for better titles
     session_data: Dict[str, Any] = {}
     working_directories = None

diff --git a/claude_code_log/patterns.py b/claude_code_log/patterns.py
@@ -0,0 +1,46 @@
+"""Shared regex patterns for IDE tag detection and filtering.
+
+These patterns are used both for:
+1. HTML rendering (renderer.py) - extracts and renders IDE tags nicely
+2. Text filtering (utils.py) - strips IDE tags from summaries/previews
+"""
+
+import re
+
+# Pattern 1: <ide_opened_file>content</ide_opened_file>
+IDE_OPENED_FILE_PATTERN = re.compile(
+    r"<ide_opened_file>(.*?)</ide_opened_file>", flags=re.DOTALL
+)
+
+# Pattern 2: <ide_selection>content</ide_selection>
+IDE_SELECTION_PATTERN = re.compile(
+    r"<ide_selection>(.*?)</ide_selection>", flags=re.DOTALL
+)
+
+# Pattern 3: <post-tool-use-hook><ide_diagnostics>JSON</ide_diagnostics></post-tool-use-hook>
+IDE_DIAGNOSTICS_PATTERN = re.compile(
+    r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>",
+    flags=re.DOTALL,
+)
+
+
+def strip_ide_tags(text: str) -> str:
+    """Remove all IDE notification tags from text, keeping only user content.
+
+    This is a lightweight text-only version used for summaries and previews.
+    For HTML rendering with IDE tag extraction, see renderer.extract_ide_notifications().
+
+    Args:
+        text: User message text potentially containing IDE tags
+
+    Returns:
+        Text with all IDE tags removed and whitespace normalized
+    """
+    result = text
+
+    # Remove all IDE tag patterns
+    result = IDE_OPENED_FILE_PATTERN.sub("", result)
+    result = IDE_SELECTION_PATTERN.sub("", result)
+    result = IDE_DIAGNOSTICS_PATTERN.sub("", result)
+
+    return result.strip()
diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
@@ -1171,14 +1171,17 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
         where notifications are pre-rendered HTML divs and remaining_text
         is the message content with IDE tags removed.
     """
-    import re
+    from .patterns import (
+        IDE_DIAGNOSTICS_PATTERN,
+        IDE_OPENED_FILE_PATTERN,
+        IDE_SELECTION_PATTERN,
+    )
 
     notifications: List[str] = []
     remaining_text = text
 
     # Pattern 1: <ide_opened_file>content</ide_opened_file>
-    ide_file_pattern = r"<ide_opened_file>(.*?)</ide_opened_file>"
-    file_matches = list(re.finditer(ide_file_pattern, remaining_text, flags=re.DOTALL))
+    file_matches = list(IDE_OPENED_FILE_PATTERN.finditer(remaining_text))
 
     for match in file_matches:
         content = match.group(1).strip()
@@ -1187,13 +1190,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
         notifications.append(notification_html)
 
     # Remove ide_opened_file tags
-    remaining_text = re.sub(ide_file_pattern, "", remaining_text, flags=re.DOTALL)
+    remaining_text = IDE_OPENED_FILE_PATTERN.sub("", remaining_text)
 
     # Pattern 2: <ide_selection>content</ide_selection>
-    selection_pattern = r"<ide_selection>(.*?)</ide_selection>"
-    selection_matches = list(
-        re.finditer(selection_pattern, remaining_text, flags=re.DOTALL)
-    )
+    selection_matches = list(IDE_SELECTION_PATTERN.finditer(remaining_text))
 
     for match in selection_matches:
         content = match.group(1).strip()
@@ -1216,11 +1216,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
         notifications.append(notification_html)
 
     # Remove ide_selection tags
-    remaining_text = re.sub(selection_pattern, "", remaining_text, flags=re.DOTALL)
+    remaining_text = IDE_SELECTION_PATTERN.sub("", remaining_text)
 
     # Pattern 3: <post-tool-use-hook><ide_diagnostics>JSON</ide_diagnostics></post-tool-use-hook>
-    hook_pattern = r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>"
-    hook_matches = list(re.finditer(hook_pattern, remaining_text, flags=re.DOTALL))
+    hook_matches = list(IDE_DIAGNOSTICS_PATTERN.finditer(remaining_text))
 
     for match in hook_matches:
         json_content = match.group(1).strip()
@@ -1250,7 +1249,7 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
             notifications.append(notification_html)
 
     # Remove hook tags
-    remaining_text = re.sub(hook_pattern, "", remaining_text, flags=re.DOTALL)
+    remaining_text = IDE_DIAGNOSTICS_PATTERN.sub("", remaining_text)
 
     return notifications, remaining_text.strip()
 
@@ -2356,12 +2355,36 @@ def generate_html(
                                     "input": tool_input,
                                 }
 
+    # Filter out messages from warmup-only sessions
+    from .utils import is_warmup_only_session
+
+    # Step 1: Gather unique session IDs
+    unique_session_ids = {
+        getattr(msg, "sessionId", "")
+        for msg in messages
+        if hasattr(msg, "sessionId") and getattr(msg, "sessionId", "")
+    }
+
+    # Step 2: Check warmup status once per session and build set of warmup-only sessions
+    warmup_session_ids = {
+        session_id
+        for session_id in unique_session_ids
+        if is_warmup_only_session(messages, session_id)
+    }
+
     # Process messages into template-friendly format
     template_messages: List[TemplateMessage] = []
 
     for message in messages:
         message_type = message.type
 
+        # Skip messages from warmup-only sessions
+        if (
+            hasattr(message, "sessionId")
+            and getattr(message, "sessionId") in warmup_session_ids
+        ):
+            continue
+
         # Skip summary messages - they should already be attached to their sessions
         if isinstance(message, SummaryTranscriptEntry):
             continue
@@ -2765,9 +2788,16 @@ def generate_html(
             )
             template_messages.append(tool_template_message)
 
+    # Filter out warmup-only sessions from navigation (reuse warmup_session_ids)
+    filtered_session_order = [
+        session_id
+        for session_id in session_order
+        if session_id not in warmup_session_ids
+    ]
+
     # Prepare session navigation data
     session_nav: List[Dict[str, Any]] = []
-    for session_id in session_order:
+    for session_id in filtered_session_order:
         session_info = sessions[session_id]
 
         # Format timestamp range

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
@@ -491,7 +491,12 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non
 
     def _update_selected_session_from_cursor(self) -> None:
         """Update the selected session based on the current cursor position."""
-        table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
+        try:
+            table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
+        except Exception:
+            # Table not mounted yet, skip update
+            return
+
         try:
             row_data = table.get_row_at(table.cursor_row)
             if row_data:

diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py
@@ -67,9 +67,13 @@ def should_use_as_session_starter(text_content: str) -> bool:
     """
     Determine if a user message should be used as a session starter preview.
 
-    This filters out system messages and most command messages, except for 'init' commands
-    which are typically the start of a new session.
+    This filters out system messages, warmup messages, and most command messages,
+    except for 'init' commands which are typically the start of a new session.
     """
+    # Skip warmup messages
+    if text_content.strip() == "Warmup":
+        return False
+
     # Skip system messages
     if is_system_message(text_content):
         return False
@@ -93,9 +97,18 @@ def create_session_preview(text_content: str) -> str:
 
     Returns:
         A preview string, truncated to FIRST_USER_MESSAGE_PREVIEW_LENGTH with
-        ellipsis if needed, and with init commands converted to friendly descriptions.
+        ellipsis if needed, with init commands converted to friendly descriptions,
+        and with IDE notification tags stripped out.
     """
-    preview_content = extract_init_command_description(text_content)
+    from .patterns import strip_ide_tags
+
+    # First strip IDE tags to get clean user content
+    clean_content = strip_ide_tags(text_content)
+
+    # Then apply init command description transformation
+    preview_content = extract_init_command_description(clean_content)
+
+    # Finally truncate if needed
     if len(preview_content) > FIRST_USER_MESSAGE_PREVIEW_LENGTH:
         return preview_content[:FIRST_USER_MESSAGE_PREVIEW_LENGTH] + "..."
     return preview_content
@@ -149,3 +162,47 @@ def extract_working_directories(
     # Sort by timestamp (most recent first) and return just the paths
     sorted_dirs = sorted(working_directories.items(), key=lambda x: x[1], reverse=True)
     return [path for path, _ in sorted_dirs]
+
+
+def is_warmup_only_session(messages: List[TranscriptEntry], session_id: str) -> bool:
+    """Check if a session contains only warmup messages (no real user content).
+
+    A warmup-only session is one where there are user messages AND all of them are
+    literally "Warmup" text. Sessions with no user messages at all are NOT considered
+    warmup-only as they may contain system messages or other important content.
+
+    Args:
+        messages: List of all transcript messages
+        session_id: The session ID to check
+
+    Returns:
+        True if the session only contains warmup user messages, False otherwise
+    """
+    from .models import UserTranscriptEntry
+    from .parser import extract_text_content
+
+    # Find all user messages for this session
+    user_messages: List[UserTranscriptEntry] = []
+    for message in messages:
+        if (
+            isinstance(message, UserTranscriptEntry)
+            and hasattr(message, "sessionId")
+            and getattr(message, "sessionId") == session_id
+            and hasattr(message, "message")
+        ):
+            user_messages.append(message)
+
+    # If no user messages at all, NOT warmup-only (may have system messages, etc.)
+    if not user_messages:
+        return False
+
+    # Check if ALL user messages are literally "Warmup" text
+    for user_msg in user_messages:
+        text_content = extract_text_content(user_msg.message.content)
+        # Only check for exact "Warmup" match, not general session starter filter
+        if text_content.strip() != "Warmup":
+            # Found a non-warmup user message, so this is NOT a warmup-only session
+            return False
+
+    # All user messages are "Warmup", so this is warmup-only
+    return True