diff --git a/README.md b/README.md
index 65c7ca7a..f36afa31 100644
--- a/README.md
+++ b/README.md
@@ -393,3 +393,6 @@ uv run claude-code-log
- figure out minimum Python version and introduce a testing matrix
- add minimalist theme and make it light + dark; animate gradient background in fancy theme
- do we need special handling for hooks?
+- make processing parallel, currently we only use 1 CPU (core) and it's slow
+- migrate cache from JSON files to SQLite to make it faster and more versatile for downstream tasks and analytics
+- add more realistic fixtures reproducing the log directory structure for more thorough testing
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 4dad595d..c5daa663 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -299,12 +299,21 @@ def _update_cache_with_session_data(
usage.cache_read_input_tokens
)
- # Update cache with session data
- cache_manager.update_session_cache(sessions_cache_data)
+ # Filter out warmup-only sessions before caching
+ from .utils import is_warmup_only_session
+
+ filtered_sessions_cache_data = {
+ session_id: session_data
+ for session_id, session_data in sessions_cache_data.items()
+ if not is_warmup_only_session(messages, session_id)
+ }
+
+ # Update cache with filtered session data
+ cache_manager.update_session_cache(filtered_sessions_cache_data)
# Update cache with working directories
cache_manager.update_working_directories(
- extract_working_directories(list(sessions_cache_data.values()))
+ extract_working_directories(list(filtered_sessions_cache_data.values()))
)
# Update cache with project aggregates
@@ -452,6 +461,8 @@ def _generate_individual_session_files(
cache_was_updated: bool = False,
) -> None:
"""Generate individual HTML files for each session."""
+ from .utils import is_warmup_only_session
+
# Find all unique session IDs
session_ids: set[str] = set()
for message in messages:
@@ -460,6 +471,13 @@ def _generate_individual_session_files(
if session_id:
session_ids.add(session_id)
+ # Filter out warmup-only sessions
+ session_ids = {
+ session_id
+ for session_id in session_ids
+ if not is_warmup_only_session(messages, session_id)
+ }
+
# Get session data from cache for better titles
session_data: Dict[str, Any] = {}
working_directories = None
diff --git a/claude_code_log/patterns.py b/claude_code_log/patterns.py
new file mode 100644
index 00000000..2a6315e7
--- /dev/null
+++ b/claude_code_log/patterns.py
@@ -0,0 +1,46 @@
+"""Shared regex patterns for IDE tag detection and filtering.
+
+These patterns are used both for:
+1. HTML rendering (renderer.py) - extracts and renders IDE tags nicely
+2. Text filtering (utils.py) - strips IDE tags from summaries/previews
+"""
+
+import re
+
+# Pattern 1: content
+IDE_OPENED_FILE_PATTERN = re.compile(
+ r"(.*?)", flags=re.DOTALL
+)
+
+# Pattern 2: content
+IDE_SELECTION_PATTERN = re.compile(
+ r"(.*?)", flags=re.DOTALL
+)
+
+# Pattern 3: JSON
+IDE_DIAGNOSTICS_PATTERN = re.compile(
+ r"\s*(.*?)\s*",
+ flags=re.DOTALL,
+)
+
+
+def strip_ide_tags(text: str) -> str:
+ """Remove all IDE notification tags from text, keeping only user content.
+
+ This is a lightweight text-only version used for summaries and previews.
+ For HTML rendering with IDE tag extraction, see renderer.extract_ide_notifications().
+
+ Args:
+ text: User message text potentially containing IDE tags
+
+ Returns:
+ Text with all IDE tags removed and whitespace normalized
+ """
+ result = text
+
+ # Remove all IDE tag patterns
+ result = IDE_OPENED_FILE_PATTERN.sub("", result)
+ result = IDE_SELECTION_PATTERN.sub("", result)
+ result = IDE_DIAGNOSTICS_PATTERN.sub("", result)
+
+ return result.strip()
diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
index 1a674919..88acf4d7 100644
--- a/claude_code_log/renderer.py
+++ b/claude_code_log/renderer.py
@@ -1171,14 +1171,17 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
where notifications are pre-rendered HTML divs and remaining_text
is the message content with IDE tags removed.
"""
- import re
+ from .patterns import (
+ IDE_DIAGNOSTICS_PATTERN,
+ IDE_OPENED_FILE_PATTERN,
+ IDE_SELECTION_PATTERN,
+ )
notifications: List[str] = []
remaining_text = text
# Pattern 1: content
- ide_file_pattern = r"(.*?)"
- file_matches = list(re.finditer(ide_file_pattern, remaining_text, flags=re.DOTALL))
+ file_matches = list(IDE_OPENED_FILE_PATTERN.finditer(remaining_text))
for match in file_matches:
content = match.group(1).strip()
@@ -1187,13 +1190,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)
# Remove ide_opened_file tags
- remaining_text = re.sub(ide_file_pattern, "", remaining_text, flags=re.DOTALL)
+ remaining_text = IDE_OPENED_FILE_PATTERN.sub("", remaining_text)
# Pattern 2: content
- selection_pattern = r"(.*?)"
- selection_matches = list(
- re.finditer(selection_pattern, remaining_text, flags=re.DOTALL)
- )
+ selection_matches = list(IDE_SELECTION_PATTERN.finditer(remaining_text))
for match in selection_matches:
content = match.group(1).strip()
@@ -1216,11 +1216,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)
# Remove ide_selection tags
- remaining_text = re.sub(selection_pattern, "", remaining_text, flags=re.DOTALL)
+ remaining_text = IDE_SELECTION_PATTERN.sub("", remaining_text)
# Pattern 3: JSON
- hook_pattern = r"\s*(.*?)\s*"
- hook_matches = list(re.finditer(hook_pattern, remaining_text, flags=re.DOTALL))
+ hook_matches = list(IDE_DIAGNOSTICS_PATTERN.finditer(remaining_text))
for match in hook_matches:
json_content = match.group(1).strip()
@@ -1250,7 +1249,7 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)
# Remove hook tags
- remaining_text = re.sub(hook_pattern, "", remaining_text, flags=re.DOTALL)
+ remaining_text = IDE_DIAGNOSTICS_PATTERN.sub("", remaining_text)
return notifications, remaining_text.strip()
@@ -2356,12 +2355,36 @@ def generate_html(
"input": tool_input,
}
+ # Filter out messages from warmup-only sessions
+ from .utils import is_warmup_only_session
+
+ # Step 1: Gather unique session IDs
+ unique_session_ids = {
+ getattr(msg, "sessionId", "")
+ for msg in messages
+ if hasattr(msg, "sessionId") and getattr(msg, "sessionId", "")
+ }
+
+ # Step 2: Check warmup status once per session and build set of warmup-only sessions
+ warmup_session_ids = {
+ session_id
+ for session_id in unique_session_ids
+ if is_warmup_only_session(messages, session_id)
+ }
+
# Process messages into template-friendly format
template_messages: List[TemplateMessage] = []
for message in messages:
message_type = message.type
+ # Skip messages from warmup-only sessions
+ if (
+ hasattr(message, "sessionId")
+ and getattr(message, "sessionId") in warmup_session_ids
+ ):
+ continue
+
# Skip summary messages - they should already be attached to their sessions
if isinstance(message, SummaryTranscriptEntry):
continue
@@ -2765,9 +2788,16 @@ def generate_html(
)
template_messages.append(tool_template_message)
+ # Filter out warmup-only sessions from navigation (reuse warmup_session_ids)
+ filtered_session_order = [
+ session_id
+ for session_id in session_order
+ if session_id not in warmup_session_ids
+ ]
+
# Prepare session navigation data
session_nav: List[Dict[str, Any]] = []
- for session_id in session_order:
+ for session_id in filtered_session_order:
session_info = sessions[session_id]
# Format timestamp range
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index f222d90f..dcbc5e28 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -491,7 +491,12 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non
def _update_selected_session_from_cursor(self) -> None:
"""Update the selected session based on the current cursor position."""
- table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
+ try:
+ table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
+ except Exception:
+ # Table not mounted yet, skip update
+ return
+
try:
row_data = table.get_row_at(table.cursor_row)
if row_data:
diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py
index dcbc01f4..0de12b2a 100644
--- a/claude_code_log/utils.py
+++ b/claude_code_log/utils.py
@@ -67,9 +67,13 @@ def should_use_as_session_starter(text_content: str) -> bool:
"""
Determine if a user message should be used as a session starter preview.
- This filters out system messages and most command messages, except for 'init' commands
- which are typically the start of a new session.
+ This filters out system messages, warmup messages, and most command messages,
+ except for 'init' commands which are typically the start of a new session.
"""
+ # Skip warmup messages
+ if text_content.strip() == "Warmup":
+ return False
+
# Skip system messages
if is_system_message(text_content):
return False
@@ -93,9 +97,18 @@ def create_session_preview(text_content: str) -> str:
Returns:
A preview string, truncated to FIRST_USER_MESSAGE_PREVIEW_LENGTH with
- ellipsis if needed, and with init commands converted to friendly descriptions.
+ ellipsis if needed, with init commands converted to friendly descriptions,
+ and with IDE notification tags stripped out.
"""
- preview_content = extract_init_command_description(text_content)
+ from .patterns import strip_ide_tags
+
+ # First strip IDE tags to get clean user content
+ clean_content = strip_ide_tags(text_content)
+
+ # Then apply init command description transformation
+ preview_content = extract_init_command_description(clean_content)
+
+ # Finally truncate if needed
if len(preview_content) > FIRST_USER_MESSAGE_PREVIEW_LENGTH:
return preview_content[:FIRST_USER_MESSAGE_PREVIEW_LENGTH] + "..."
return preview_content
@@ -149,3 +162,47 @@ def extract_working_directories(
# Sort by timestamp (most recent first) and return just the paths
sorted_dirs = sorted(working_directories.items(), key=lambda x: x[1], reverse=True)
return [path for path, _ in sorted_dirs]
+
+
+def is_warmup_only_session(messages: List[TranscriptEntry], session_id: str) -> bool:
+ """Check if a session contains only warmup messages (no real user content).
+
+ A warmup-only session is one where there are user messages AND all of them are
+ literally "Warmup" text. Sessions with no user messages at all are NOT considered
+ warmup-only as they may contain system messages or other important content.
+
+ Args:
+ messages: List of all transcript messages
+ session_id: The session ID to check
+
+ Returns:
+ True if the session only contains warmup user messages, False otherwise
+ """
+ from .models import UserTranscriptEntry
+ from .parser import extract_text_content
+
+ # Find all user messages for this session
+ user_messages: List[UserTranscriptEntry] = []
+ for message in messages:
+ if (
+ isinstance(message, UserTranscriptEntry)
+ and hasattr(message, "sessionId")
+ and getattr(message, "sessionId") == session_id
+ and hasattr(message, "message")
+ ):
+ user_messages.append(message)
+
+ # If no user messages at all, NOT warmup-only (may have system messages, etc.)
+ if not user_messages:
+ return False
+
+ # Check if ALL user messages are literally "Warmup" text
+ for user_msg in user_messages:
+ text_content = extract_text_content(user_msg.message.content)
+ # Only check for exact "Warmup" match, not general session starter filter
+ if text_content.strip() != "Warmup":
+ # Found a non-warmup user message, so this is NOT a warmup-only session
+ return False
+
+ # All user messages are "Warmup", so this is warmup-only
+ return True
diff --git a/test/test_utils.py b/test/test_utils.py
index ec018f6c..73a25a2b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -11,8 +11,18 @@
should_skip_message,
should_use_as_session_starter,
extract_text_content_length,
+ create_session_preview,
+ is_warmup_only_session,
)
-from claude_code_log.models import TextContent, ToolUseContent
+from claude_code_log.models import (
+ TextContent,
+ ToolUseContent,
+ UserTranscriptEntry,
+ UserMessage,
+ AssistantTranscriptEntry,
+ AssistantMessage,
+)
+from claude_code_log.patterns import strip_ide_tags
class TestSystemMessageDetection:
@@ -361,3 +371,263 @@ def test_session_starter_edge_cases(self):
# Test with init in the middle of command name
init_middle = "reinitReinitialize"
assert should_use_as_session_starter(init_middle) is False
+
+
+class TestWarmupMessageFiltering:
+ """Test warmup message filtering in session starters."""
+
+ def test_should_not_use_warmup_as_starter(self):
+ """Test that 'Warmup' messages are filtered out from session starters."""
+ assert should_use_as_session_starter("Warmup") is False
+
+ def test_should_not_use_warmup_with_whitespace_as_starter(self):
+ """Test that 'Warmup' with whitespace is filtered out."""
+ assert should_use_as_session_starter(" Warmup ") is False
+ assert should_use_as_session_starter("\nWarmup\n") is False
+ assert should_use_as_session_starter("\t Warmup \t") is False
+
+ def test_should_use_warmup_in_sentence_as_starter(self):
+ """Test that messages containing 'Warmup' in a sentence are not filtered."""
+ assert (
+ should_use_as_session_starter("Let's warmup with a simple example") is True
+ )
+ assert should_use_as_session_starter("Warmup exercises are important") is True
+
+ def test_should_not_use_case_sensitive_warmup(self):
+ """Test that warmup filtering is case-sensitive."""
+ # Only exact "Warmup" is filtered, not "warmup" or "WARMUP"
+ assert should_use_as_session_starter("warmup") is True
+ assert should_use_as_session_starter("WARMUP") is True
+ assert should_use_as_session_starter("WarmUp") is True
+
+
+class TestIDETagStripping:
+ """Test IDE tag stripping from preview text."""
+
+ def test_strip_ide_opened_file_tag(self):
+ """Test that tags are stripped."""
+ text = "The user opened the file /path/to/file.py in the IDE.User message here"
+ result = strip_ide_tags(text)
+ assert result == "User message here"
+ assert "" not in result
+
+ def test_strip_ide_selection_tag(self):
+ """Test that tags are stripped."""
+ text = "The user selected lines 42 to 46 from /path/to/file.pyCan you help with this code?"
+ result = strip_ide_tags(text)
+ assert result == "Can you help with this code?"
+ assert "" not in result
+
+ def test_strip_ide_diagnostics_tag(self):
+ """Test that tags are stripped."""
+ text = '[{"severity": "error"}]Fix this error please'
+ result = strip_ide_tags(text)
+ assert result == "Fix this error please"
+ assert "" not in result
+
+ def test_strip_multiple_ide_tags(self):
+ """Test stripping multiple IDE tags from the same text."""
+ text = (
+ "File opened"
+ "Lines selected"
+ "Actual user message"
+ )
+ result = strip_ide_tags(text)
+ assert result == "Actual user message"
+ assert "" not in result
+ assert "" not in result
+
+ def test_strip_ide_tags_with_no_remaining_text(self):
+ """Test stripping IDE tags when there's no other text."""
+ text = "The user selected some code"
+ result = strip_ide_tags(text)
+ assert result == ""
+
+ def test_strip_ide_tags_from_text_without_tags(self):
+ """Test that normal text is unchanged when no IDE tags present."""
+ text = "This is a normal message without any IDE tags"
+ result = strip_ide_tags(text)
+ assert result == text
+
+ def test_strip_ide_tags_preserves_whitespace(self):
+ """Test that whitespace in remaining text is preserved."""
+ text = "Selection info \n Actual message with spacing "
+ result = strip_ide_tags(text)
+ # strip_ide_tags calls .strip() at the end
+ assert result == "Actual message with spacing"
+
+ def test_create_session_preview_strips_ide_tags(self):
+ """Test that create_session_preview strips IDE tags."""
+ text = "User selected codeCan you refactor this function?"
+ preview = create_session_preview(text)
+ assert preview == "Can you refactor this function?"
+ assert "" not in preview
+
+ def test_create_session_preview_handles_ide_tags_and_truncation(self):
+ """Test that IDE tags are stripped before truncation."""
+ # Create a message with IDE tag + long content
+ long_message = "x" * 1100 # Exceeds FIRST_USER_MESSAGE_PREVIEW_LENGTH (1000)
+ text = f"Selection info{long_message}"
+ preview = create_session_preview(text)
+
+ # Should strip IDE tag first, then truncate
+ assert "" not in preview
+ assert preview.endswith("...")
+ assert len(preview) == 1003 # 1000 chars + "..."
+
+ def test_create_session_preview_multiple_ide_tags(self):
+ """Test preview creation with multiple IDE tags in content blocks."""
+ text = (
+ "File: test.py"
+ "Lines 1-10"
+ "Please review this code for bugs"
+ )
+ preview = create_session_preview(text)
+ assert preview == "Please review this code for bugs"
+ assert "" not in preview
+ assert "" not in preview
+
+
+class TestWarmupAndIDETagsCombined:
+ """Test combined warmup + IDE tag scenarios."""
+
+ def test_warmup_with_ide_tag_filtered(self):
+ """Test that warmup messages with IDE tags are still filtered."""
+ text = "Some selectionWarmup"
+ # First check if it would be used as starter (should be False)
+ clean_text = strip_ide_tags(text)
+ assert should_use_as_session_starter(clean_text) is False
+
+ def test_ide_tag_only_message_not_used_as_starter(self):
+ """Test that IDE-tag-only messages result in empty preview."""
+ text = "The user selected lines 42 to 46"
+ preview = create_session_preview(text)
+ assert preview == ""
+ # Empty string is technically valid as a starter, but won't be shown in UI
+ assert should_use_as_session_starter(preview) is True
+
+
+class TestWarmupOnlySessionDetection:
+ """Test detection of warmup-only sessions."""
+
+ def _create_user_entry(
+ self, session_id: str, content: str, uuid: str, timestamp: str
+ ):
+ """Helper to create a UserTranscriptEntry with all required fields."""
+ return UserTranscriptEntry(
+ type="user",
+ sessionId=session_id,
+ parentUuid=None,
+ isSidechain=False,
+ userType="external",
+ cwd="/test",
+ version="1.0.0",
+ message=UserMessage(role="user", content=content),
+ uuid=uuid,
+ timestamp=timestamp,
+ )
+
+ def _create_assistant_entry(
+ self, session_id: str, content: str, uuid: str, timestamp: str, parent_uuid: str
+ ):
+ """Helper to create an AssistantTranscriptEntry with all required fields."""
+ return AssistantTranscriptEntry(
+ type="assistant",
+ sessionId=session_id,
+ parentUuid=parent_uuid,
+ isSidechain=False,
+ userType="external",
+ cwd="/test",
+ version="1.0.0",
+ message=AssistantMessage(
+ id="msg-id",
+ type="message",
+ role="assistant",
+ model="claude-3-5-sonnet",
+ content=[TextContent(type="text", text=content)],
+ ),
+ uuid=uuid,
+ timestamp=timestamp,
+ )
+
+ def test_session_with_only_warmup_messages(self):
+ """Test that a session with only warmup messages is detected."""
+ session_id = "test-session-1"
+ messages = [
+ self._create_user_entry(
+ session_id, "Warmup", "msg-1", "2025-01-01T10:00:00Z"
+ ),
+ self._create_assistant_entry(
+ session_id,
+ "I'm ready to help!",
+ "msg-2",
+ "2025-01-01T10:00:01Z",
+ "msg-1",
+ ),
+ ]
+
+ assert is_warmup_only_session(messages, session_id) is True
+
+ def test_session_with_real_messages(self):
+ """Test that a session with real messages is not detected as warmup-only."""
+ session_id = "test-session-2"
+ messages = [
+ self._create_user_entry(
+ session_id, "Hello, can you help me?", "msg-1", "2025-01-01T10:00:00Z"
+ ),
+ self._create_assistant_entry(
+ session_id, "Sure!", "msg-2", "2025-01-01T10:00:01Z", "msg-1"
+ ),
+ ]
+
+ assert is_warmup_only_session(messages, session_id) is False
+
+ def test_session_with_warmup_and_real_messages(self):
+ """Test that a session with both warmup and real messages is not warmup-only."""
+ session_id = "test-session-3"
+ messages = [
+ self._create_user_entry(
+ session_id, "Warmup", "msg-1", "2025-01-01T10:00:00Z"
+ ),
+ self._create_assistant_entry(
+ session_id, "Ready!", "msg-2", "2025-01-01T10:00:01Z", "msg-1"
+ ),
+ self._create_user_entry(
+ session_id,
+ "Now help me debug this code",
+ "msg-3",
+ "2025-01-01T10:00:02Z",
+ ),
+ ]
+
+ assert is_warmup_only_session(messages, session_id) is False
+
+ def test_session_with_multiple_warmup_messages(self):
+ """Test session with multiple warmup messages."""
+ session_id = "test-session-4"
+ messages = [
+ self._create_user_entry(
+ session_id, " Warmup ", "msg-1", "2025-01-01T10:00:00Z"
+ ),
+ self._create_user_entry(
+ session_id, "Warmup", "msg-2", "2025-01-01T10:00:01Z"
+ ),
+ ]
+
+ assert is_warmup_only_session(messages, session_id) is True
+
+ def test_nonexistent_session(self):
+ """Test checking a session ID that doesn't exist."""
+ messages = [
+ self._create_user_entry(
+ "different-session", "Hello", "msg-1", "2025-01-01T10:00:00Z"
+ ),
+ ]
+
+ # Should return False (no user messages may mean system messages exist)
+ assert is_warmup_only_session(messages, "nonexistent-session") is False
+
+ def test_empty_messages_list(self):
+ """Test with empty messages list."""
+ # Should return False (no user messages may mean system messages exist)
+ assert is_warmup_only_session([], "any-session") is False