Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,6 @@ uv run claude-code-log
- figure out minimum Python version and introduce a testing matrix
- add minimalist theme and make it light + dark; animate gradient background in fancy theme
- do we need special handling for hooks?
- make processing parallel, currently we only use 1 CPU (core) and it's slow
- migrate cache from JSON files to SQLite to make it faster and more versatile for downstream tasks and analytics
- add more realistic fixtures reproducing the log directory structure for more thorough testing
24 changes: 21 additions & 3 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,21 @@ def _update_cache_with_session_data(
usage.cache_read_input_tokens
)

# Update cache with session data
cache_manager.update_session_cache(sessions_cache_data)
# Filter out warmup-only sessions before caching
from .utils import is_warmup_only_session

filtered_sessions_cache_data = {
session_id: session_data
for session_id, session_data in sessions_cache_data.items()
if not is_warmup_only_session(messages, session_id)
}

# Update cache with filtered session data
cache_manager.update_session_cache(filtered_sessions_cache_data)

# Update cache with working directories
cache_manager.update_working_directories(
extract_working_directories(list(sessions_cache_data.values()))
extract_working_directories(list(filtered_sessions_cache_data.values()))
)

# Update cache with project aggregates
Expand Down Expand Up @@ -452,6 +461,8 @@ def _generate_individual_session_files(
cache_was_updated: bool = False,
) -> None:
"""Generate individual HTML files for each session."""
from .utils import is_warmup_only_session

# Find all unique session IDs
session_ids: set[str] = set()
for message in messages:
Expand All @@ -460,6 +471,13 @@ def _generate_individual_session_files(
if session_id:
session_ids.add(session_id)

# Filter out warmup-only sessions
session_ids = {
session_id
for session_id in session_ids
if not is_warmup_only_session(messages, session_id)
}

# Get session data from cache for better titles
session_data: Dict[str, Any] = {}
working_directories = None
Expand Down
46 changes: 46 additions & 0 deletions claude_code_log/patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Shared regex patterns for IDE tag detection and filtering.

These patterns are used both for:
1. HTML rendering (renderer.py) - extracts and renders IDE tags nicely
2. Text filtering (utils.py) - strips IDE tags from summaries/previews
"""

import re

# Pattern 1: <ide_opened_file>content</ide_opened_file>
IDE_OPENED_FILE_PATTERN = re.compile(
r"<ide_opened_file>(.*?)</ide_opened_file>", flags=re.DOTALL
)

# Pattern 2: <ide_selection>content</ide_selection>
IDE_SELECTION_PATTERN = re.compile(
r"<ide_selection>(.*?)</ide_selection>", flags=re.DOTALL
)

# Pattern 3: <post-tool-use-hook><ide_diagnostics>JSON</ide_diagnostics></post-tool-use-hook>
IDE_DIAGNOSTICS_PATTERN = re.compile(
r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>",
flags=re.DOTALL,
)


def strip_ide_tags(text: str) -> str:
"""Remove all IDE notification tags from text, keeping only user content.

This is a lightweight text-only version used for summaries and previews.
For HTML rendering with IDE tag extraction, see renderer.extract_ide_notifications().

Args:
text: User message text potentially containing IDE tags

Returns:
Text with all IDE tags removed and whitespace normalized
"""
result = text

# Remove all IDE tag patterns
result = IDE_OPENED_FILE_PATTERN.sub("", result)
result = IDE_SELECTION_PATTERN.sub("", result)
result = IDE_DIAGNOSTICS_PATTERN.sub("", result)

return result.strip()
56 changes: 43 additions & 13 deletions claude_code_log/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,14 +1171,17 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
where notifications are pre-rendered HTML divs and remaining_text
is the message content with IDE tags removed.
"""
import re
from .patterns import (
IDE_DIAGNOSTICS_PATTERN,
IDE_OPENED_FILE_PATTERN,
IDE_SELECTION_PATTERN,
)

notifications: List[str] = []
remaining_text = text

# Pattern 1: <ide_opened_file>content</ide_opened_file>
ide_file_pattern = r"<ide_opened_file>(.*?)</ide_opened_file>"
file_matches = list(re.finditer(ide_file_pattern, remaining_text, flags=re.DOTALL))
file_matches = list(IDE_OPENED_FILE_PATTERN.finditer(remaining_text))

for match in file_matches:
content = match.group(1).strip()
Expand All @@ -1187,13 +1190,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)

# Remove ide_opened_file tags
remaining_text = re.sub(ide_file_pattern, "", remaining_text, flags=re.DOTALL)
remaining_text = IDE_OPENED_FILE_PATTERN.sub("", remaining_text)

# Pattern 2: <ide_selection>content</ide_selection>
selection_pattern = r"<ide_selection>(.*?)</ide_selection>"
selection_matches = list(
re.finditer(selection_pattern, remaining_text, flags=re.DOTALL)
)
selection_matches = list(IDE_SELECTION_PATTERN.finditer(remaining_text))

for match in selection_matches:
content = match.group(1).strip()
Expand All @@ -1216,11 +1216,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)

# Remove ide_selection tags
remaining_text = re.sub(selection_pattern, "", remaining_text, flags=re.DOTALL)
remaining_text = IDE_SELECTION_PATTERN.sub("", remaining_text)

# Pattern 3: <post-tool-use-hook><ide_diagnostics>JSON</ide_diagnostics></post-tool-use-hook>
hook_pattern = r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>"
hook_matches = list(re.finditer(hook_pattern, remaining_text, flags=re.DOTALL))
hook_matches = list(IDE_DIAGNOSTICS_PATTERN.finditer(remaining_text))

for match in hook_matches:
json_content = match.group(1).strip()
Expand Down Expand Up @@ -1250,7 +1249,7 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]:
notifications.append(notification_html)

# Remove hook tags
remaining_text = re.sub(hook_pattern, "", remaining_text, flags=re.DOTALL)
remaining_text = IDE_DIAGNOSTICS_PATTERN.sub("", remaining_text)

return notifications, remaining_text.strip()

Expand Down Expand Up @@ -2356,12 +2355,36 @@ def generate_html(
"input": tool_input,
}

# Filter out messages from warmup-only sessions
from .utils import is_warmup_only_session

# Step 1: Gather unique session IDs
unique_session_ids = {
getattr(msg, "sessionId", "")
for msg in messages
if hasattr(msg, "sessionId") and getattr(msg, "sessionId", "")
}

# Step 2: Check warmup status once per session and build set of warmup-only sessions
warmup_session_ids = {
session_id
for session_id in unique_session_ids
if is_warmup_only_session(messages, session_id)
}

# Process messages into template-friendly format
template_messages: List[TemplateMessage] = []

for message in messages:
message_type = message.type

# Skip messages from warmup-only sessions
if (
hasattr(message, "sessionId")
and getattr(message, "sessionId") in warmup_session_ids
):
continue
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Skip summary messages - they should already be attached to their sessions
if isinstance(message, SummaryTranscriptEntry):
continue
Expand Down Expand Up @@ -2765,9 +2788,16 @@ def generate_html(
)
template_messages.append(tool_template_message)

# Filter out warmup-only sessions from navigation (reuse warmup_session_ids)
filtered_session_order = [
session_id
for session_id in session_order
if session_id not in warmup_session_ids
]

# Prepare session navigation data
session_nav: List[Dict[str, Any]] = []
for session_id in session_order:
for session_id in filtered_session_order:
session_info = sessions[session_id]

# Format timestamp range
Expand Down
7 changes: 6 additions & 1 deletion claude_code_log/tui.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,12 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non

def _update_selected_session_from_cursor(self) -> None:
"""Update the selected session based on the current cursor position."""
table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
try:
table = cast(DataTable[str], self.query_one("#sessions-table", DataTable))
except Exception:
# Table not mounted yet, skip update
return

try:
row_data = table.get_row_at(table.cursor_row)
if row_data:
Expand Down
65 changes: 61 additions & 4 deletions claude_code_log/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,13 @@ def should_use_as_session_starter(text_content: str) -> bool:
"""
Determine if a user message should be used as a session starter preview.

This filters out system messages and most command messages, except for 'init' commands
which are typically the start of a new session.
This filters out system messages, warmup messages, and most command messages,
except for 'init' commands which are typically the start of a new session.
"""
# Skip warmup messages
if text_content.strip() == "Warmup":
return False

# Skip system messages
if is_system_message(text_content):
return False
Expand All @@ -93,9 +97,18 @@ def create_session_preview(text_content: str) -> str:

Returns:
A preview string, truncated to FIRST_USER_MESSAGE_PREVIEW_LENGTH with
ellipsis if needed, and with init commands converted to friendly descriptions.
ellipsis if needed, with init commands converted to friendly descriptions,
and with IDE notification tags stripped out.
"""
preview_content = extract_init_command_description(text_content)
from .patterns import strip_ide_tags

# First strip IDE tags to get clean user content
clean_content = strip_ide_tags(text_content)

# Then apply init command description transformation
preview_content = extract_init_command_description(clean_content)

# Finally truncate if needed
if len(preview_content) > FIRST_USER_MESSAGE_PREVIEW_LENGTH:
return preview_content[:FIRST_USER_MESSAGE_PREVIEW_LENGTH] + "..."
return preview_content
Expand Down Expand Up @@ -149,3 +162,47 @@ def extract_working_directories(
# Sort by timestamp (most recent first) and return just the paths
sorted_dirs = sorted(working_directories.items(), key=lambda x: x[1], reverse=True)
return [path for path, _ in sorted_dirs]


def is_warmup_only_session(messages: List[TranscriptEntry], session_id: str) -> bool:
"""Check if a session contains only warmup messages (no real user content).

A warmup-only session is one where there are user messages AND all of them are
literally "Warmup" text. Sessions with no user messages at all are NOT considered
warmup-only as they may contain system messages or other important content.

Args:
messages: List of all transcript messages
session_id: The session ID to check

Returns:
True if the session only contains warmup user messages, False otherwise
"""
from .models import UserTranscriptEntry
from .parser import extract_text_content

# Find all user messages for this session
user_messages: List[UserTranscriptEntry] = []
for message in messages:
if (
isinstance(message, UserTranscriptEntry)
and hasattr(message, "sessionId")
and getattr(message, "sessionId") == session_id
and hasattr(message, "message")
):
user_messages.append(message)

# If no user messages at all, NOT warmup-only (may have system messages, etc.)
if not user_messages:
return False

# Check if ALL user messages are literally "Warmup" text
for user_msg in user_messages:
text_content = extract_text_content(user_msg.message.content)
# Only check for exact "Warmup" match, not general session starter filter
if text_content.strip() != "Warmup":
# Found a non-warmup user message, so this is NOT a warmup-only session
return False

# All user messages are "Warmup", so this is warmup-only
return True
Loading
Loading