diff --git a/README.md b/README.md index 65c7ca7a..f36afa31 100644 --- a/README.md +++ b/README.md @@ -393,3 +393,6 @@ uv run claude-code-log - figure out minimum Python version and introduce a testing matrix - add minimalist theme and make it light + dark; animate gradient background in fancy theme - do we need special handling for hooks? +- make processing parallel, currently we only use 1 CPU (core) and it's slow +- migrate cache from JSON files to SQLite to make it faster and more versatile for downstream tasks and analytics +- add more realistic fixtures reproducing the log directory structure for more thorough testing diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 4dad595d..c5daa663 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -299,12 +299,21 @@ def _update_cache_with_session_data( usage.cache_read_input_tokens ) - # Update cache with session data - cache_manager.update_session_cache(sessions_cache_data) + # Filter out warmup-only sessions before caching + from .utils import is_warmup_only_session + + filtered_sessions_cache_data = { + session_id: session_data + for session_id, session_data in sessions_cache_data.items() + if not is_warmup_only_session(messages, session_id) + } + + # Update cache with filtered session data + cache_manager.update_session_cache(filtered_sessions_cache_data) # Update cache with working directories cache_manager.update_working_directories( - extract_working_directories(list(sessions_cache_data.values())) + extract_working_directories(list(filtered_sessions_cache_data.values())) ) # Update cache with project aggregates @@ -452,6 +461,8 @@ def _generate_individual_session_files( cache_was_updated: bool = False, ) -> None: """Generate individual HTML files for each session.""" + from .utils import is_warmup_only_session + # Find all unique session IDs session_ids: set[str] = set() for message in messages: @@ -460,6 +471,13 @@ def _generate_individual_session_files( if session_id: session_ids.add(session_id) + # Filter out warmup-only sessions + session_ids = { + session_id + for session_id in session_ids + if not is_warmup_only_session(messages, session_id) + } + # Get session data from cache for better titles session_data: Dict[str, Any] = {} working_directories = None diff --git a/claude_code_log/patterns.py b/claude_code_log/patterns.py new file mode 100644 index 00000000..2a6315e7 --- /dev/null +++ b/claude_code_log/patterns.py @@ -0,0 +1,46 @@ +"""Shared regex patterns for IDE tag detection and filtering. + +These patterns are used both for: +1. HTML rendering (renderer.py) - extracts and renders IDE tags nicely +2. Text filtering (utils.py) - strips IDE tags from summaries/previews +""" + +import re + +# Pattern 1: content +IDE_OPENED_FILE_PATTERN = re.compile( + r"(.*?)", flags=re.DOTALL +) + +# Pattern 2: content +IDE_SELECTION_PATTERN = re.compile( + r"(.*?)", flags=re.DOTALL +) + +# Pattern 3: JSON +IDE_DIAGNOSTICS_PATTERN = re.compile( + r"\s*(.*?)\s*", + flags=re.DOTALL, +) + + +def strip_ide_tags(text: str) -> str: + """Remove all IDE notification tags from text, keeping only user content. + + This is a lightweight text-only version used for summaries and previews. + For HTML rendering with IDE tag extraction, see renderer.extract_ide_notifications(). + + Args: + text: User message text potentially containing IDE tags + + Returns: + Text with all IDE tags removed and whitespace normalized + """ + result = text + + # Remove all IDE tag patterns + result = IDE_OPENED_FILE_PATTERN.sub("", result) + result = IDE_SELECTION_PATTERN.sub("", result) + result = IDE_DIAGNOSTICS_PATTERN.sub("", result) + + return result.strip() diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 1a674919..88acf4d7 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -1171,14 +1171,17 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]: where notifications are pre-rendered HTML divs and remaining_text is the message content with IDE tags removed. """ - import re + from .patterns import ( + IDE_DIAGNOSTICS_PATTERN, + IDE_OPENED_FILE_PATTERN, + IDE_SELECTION_PATTERN, + ) notifications: List[str] = [] remaining_text = text # Pattern 1: content - ide_file_pattern = r"(.*?)" - file_matches = list(re.finditer(ide_file_pattern, remaining_text, flags=re.DOTALL)) + file_matches = list(IDE_OPENED_FILE_PATTERN.finditer(remaining_text)) for match in file_matches: content = match.group(1).strip() @@ -1187,13 +1190,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]: notifications.append(notification_html) # Remove ide_opened_file tags - remaining_text = re.sub(ide_file_pattern, "", remaining_text, flags=re.DOTALL) + remaining_text = IDE_OPENED_FILE_PATTERN.sub("", remaining_text) # Pattern 2: content - selection_pattern = r"(.*?)" - selection_matches = list( - re.finditer(selection_pattern, remaining_text, flags=re.DOTALL) - ) + selection_matches = list(IDE_SELECTION_PATTERN.finditer(remaining_text)) for match in selection_matches: content = match.group(1).strip() @@ -1216,11 +1216,10 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]: notifications.append(notification_html) # Remove ide_selection tags - remaining_text = re.sub(selection_pattern, "", remaining_text, flags=re.DOTALL) + remaining_text = IDE_SELECTION_PATTERN.sub("", remaining_text) # Pattern 3: JSON - hook_pattern = r"\s*(.*?)\s*" - hook_matches = list(re.finditer(hook_pattern, remaining_text, flags=re.DOTALL)) + hook_matches = list(IDE_DIAGNOSTICS_PATTERN.finditer(remaining_text)) for match in hook_matches: json_content = match.group(1).strip() @@ -1250,7 +1249,7 @@ def extract_ide_notifications(text: str) -> tuple[List[str], str]: notifications.append(notification_html) # Remove hook tags - remaining_text = re.sub(hook_pattern, "", remaining_text, flags=re.DOTALL) + remaining_text = IDE_DIAGNOSTICS_PATTERN.sub("", remaining_text) return notifications, remaining_text.strip() @@ -2356,12 +2355,36 @@ def generate_html( "input": tool_input, } + # Filter out messages from warmup-only sessions + from .utils import is_warmup_only_session + + # Step 1: Gather unique session IDs + unique_session_ids = { + getattr(msg, "sessionId", "") + for msg in messages + if hasattr(msg, "sessionId") and getattr(msg, "sessionId", "") + } + + # Step 2: Check warmup status once per session and build set of warmup-only sessions + warmup_session_ids = { + session_id + for session_id in unique_session_ids + if is_warmup_only_session(messages, session_id) + } + # Process messages into template-friendly format template_messages: List[TemplateMessage] = [] for message in messages: message_type = message.type + # Skip messages from warmup-only sessions + if ( + hasattr(message, "sessionId") + and getattr(message, "sessionId") in warmup_session_ids + ): + continue + # Skip summary messages - they should already be attached to their sessions if isinstance(message, SummaryTranscriptEntry): continue @@ -2765,9 +2788,16 @@ def generate_html( ) template_messages.append(tool_template_message) + # Filter out warmup-only sessions from navigation (reuse warmup_session_ids) + filtered_session_order = [ + session_id + for session_id in session_order + if session_id not in warmup_session_ids + ] + # Prepare session navigation data session_nav: List[Dict[str, Any]] = [] - for session_id in session_order: + for session_id in filtered_session_order: session_info = sessions[session_id] # Format timestamp range diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index f222d90f..dcbc5e28 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -491,7 +491,12 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non def _update_selected_session_from_cursor(self) -> None: """Update the selected session based on the current cursor position.""" - table = cast(DataTable[str], self.query_one("#sessions-table", DataTable)) + try: + table = cast(DataTable[str], self.query_one("#sessions-table", DataTable)) + except Exception: + # Table not mounted yet, skip update + return + try: row_data = table.get_row_at(table.cursor_row) if row_data: diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index dcbc01f4..0de12b2a 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -67,9 +67,13 @@ def should_use_as_session_starter(text_content: str) -> bool: """ Determine if a user message should be used as a session starter preview. - This filters out system messages and most command messages, except for 'init' commands - which are typically the start of a new session. + This filters out system messages, warmup messages, and most command messages, + except for 'init' commands which are typically the start of a new session. """ + # Skip warmup messages + if text_content.strip() == "Warmup": + return False + # Skip system messages if is_system_message(text_content): return False @@ -93,9 +97,18 @@ def create_session_preview(text_content: str) -> str: Returns: A preview string, truncated to FIRST_USER_MESSAGE_PREVIEW_LENGTH with - ellipsis if needed, and with init commands converted to friendly descriptions. + ellipsis if needed, with init commands converted to friendly descriptions, + and with IDE notification tags stripped out. """ - preview_content = extract_init_command_description(text_content) + from .patterns import strip_ide_tags + + # First strip IDE tags to get clean user content + clean_content = strip_ide_tags(text_content) + + # Then apply init command description transformation + preview_content = extract_init_command_description(clean_content) + + # Finally truncate if needed if len(preview_content) > FIRST_USER_MESSAGE_PREVIEW_LENGTH: return preview_content[:FIRST_USER_MESSAGE_PREVIEW_LENGTH] + "..." return preview_content @@ -149,3 +162,47 @@ def extract_working_directories( # Sort by timestamp (most recent first) and return just the paths sorted_dirs = sorted(working_directories.items(), key=lambda x: x[1], reverse=True) return [path for path, _ in sorted_dirs] + + +def is_warmup_only_session(messages: List[TranscriptEntry], session_id: str) -> bool: + """Check if a session contains only warmup messages (no real user content). + + A warmup-only session is one where there are user messages AND all of them are + literally "Warmup" text. Sessions with no user messages at all are NOT considered + warmup-only as they may contain system messages or other important content. + + Args: + messages: List of all transcript messages + session_id: The session ID to check + + Returns: + True if the session only contains warmup user messages, False otherwise + """ + from .models import UserTranscriptEntry + from .parser import extract_text_content + + # Find all user messages for this session + user_messages: List[UserTranscriptEntry] = [] + for message in messages: + if ( + isinstance(message, UserTranscriptEntry) + and hasattr(message, "sessionId") + and getattr(message, "sessionId") == session_id + and hasattr(message, "message") + ): + user_messages.append(message) + + # If no user messages at all, NOT warmup-only (may have system messages, etc.) + if not user_messages: + return False + + # Check if ALL user messages are literally "Warmup" text + for user_msg in user_messages: + text_content = extract_text_content(user_msg.message.content) + # Only check for exact "Warmup" match, not general session starter filter + if text_content.strip() != "Warmup": + # Found a non-warmup user message, so this is NOT a warmup-only session + return False + + # All user messages are "Warmup", so this is warmup-only + return True diff --git a/test/test_utils.py b/test/test_utils.py index ec018f6c..73a25a2b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -11,8 +11,18 @@ should_skip_message, should_use_as_session_starter, extract_text_content_length, + create_session_preview, + is_warmup_only_session, ) -from claude_code_log.models import TextContent, ToolUseContent +from claude_code_log.models import ( + TextContent, + ToolUseContent, + UserTranscriptEntry, + UserMessage, + AssistantTranscriptEntry, + AssistantMessage, +) +from claude_code_log.patterns import strip_ide_tags class TestSystemMessageDetection: @@ -361,3 +371,263 @@ def test_session_starter_edge_cases(self): # Test with init in the middle of command name init_middle = "reinitReinitialize" assert should_use_as_session_starter(init_middle) is False + + +class TestWarmupMessageFiltering: + """Test warmup message filtering in session starters.""" + + def test_should_not_use_warmup_as_starter(self): + """Test that 'Warmup' messages are filtered out from session starters.""" + assert should_use_as_session_starter("Warmup") is False + + def test_should_not_use_warmup_with_whitespace_as_starter(self): + """Test that 'Warmup' with whitespace is filtered out.""" + assert should_use_as_session_starter(" Warmup ") is False + assert should_use_as_session_starter("\nWarmup\n") is False + assert should_use_as_session_starter("\t Warmup \t") is False + + def test_should_use_warmup_in_sentence_as_starter(self): + """Test that messages containing 'Warmup' in a sentence are not filtered.""" + assert ( + should_use_as_session_starter("Let's warmup with a simple example") is True + ) + assert should_use_as_session_starter("Warmup exercises are important") is True + + def test_should_not_use_case_sensitive_warmup(self): + """Test that warmup filtering is case-sensitive.""" + # Only exact "Warmup" is filtered, not "warmup" or "WARMUP" + assert should_use_as_session_starter("warmup") is True + assert should_use_as_session_starter("WARMUP") is True + assert should_use_as_session_starter("WarmUp") is True + + +class TestIDETagStripping: + """Test IDE tag stripping from preview text.""" + + def test_strip_ide_opened_file_tag(self): + """Test that tags are stripped.""" + text = "The user opened the file /path/to/file.py in the IDE.User message here" + result = strip_ide_tags(text) + assert result == "User message here" + assert "" not in result + + def test_strip_ide_selection_tag(self): + """Test that tags are stripped.""" + text = "The user selected lines 42 to 46 from /path/to/file.pyCan you help with this code?" + result = strip_ide_tags(text) + assert result == "Can you help with this code?" + assert "" not in result + + def test_strip_ide_diagnostics_tag(self): + """Test that tags are stripped.""" + text = '[{"severity": "error"}]Fix this error please' + result = strip_ide_tags(text) + assert result == "Fix this error please" + assert "" not in result + + def test_strip_multiple_ide_tags(self): + """Test stripping multiple IDE tags from the same text.""" + text = ( + "File opened" + "Lines selected" + "Actual user message" + ) + result = strip_ide_tags(text) + assert result == "Actual user message" + assert "" not in result + assert "" not in result + + def test_strip_ide_tags_with_no_remaining_text(self): + """Test stripping IDE tags when there's no other text.""" + text = "The user selected some code" + result = strip_ide_tags(text) + assert result == "" + + def test_strip_ide_tags_from_text_without_tags(self): + """Test that normal text is unchanged when no IDE tags present.""" + text = "This is a normal message without any IDE tags" + result = strip_ide_tags(text) + assert result == text + + def test_strip_ide_tags_preserves_whitespace(self): + """Test that whitespace in remaining text is preserved.""" + text = "Selection info \n Actual message with spacing " + result = strip_ide_tags(text) + # strip_ide_tags calls .strip() at the end + assert result == "Actual message with spacing" + + def test_create_session_preview_strips_ide_tags(self): + """Test that create_session_preview strips IDE tags.""" + text = "User selected codeCan you refactor this function?" + preview = create_session_preview(text) + assert preview == "Can you refactor this function?" + assert "" not in preview + + def test_create_session_preview_handles_ide_tags_and_truncation(self): + """Test that IDE tags are stripped before truncation.""" + # Create a message with IDE tag + long content + long_message = "x" * 1100 # Exceeds FIRST_USER_MESSAGE_PREVIEW_LENGTH (1000) + text = f"Selection info{long_message}" + preview = create_session_preview(text) + + # Should strip IDE tag first, then truncate + assert "" not in preview + assert preview.endswith("...") + assert len(preview) == 1003 # 1000 chars + "..." + + def test_create_session_preview_multiple_ide_tags(self): + """Test preview creation with multiple IDE tags in content blocks.""" + text = ( + "File: test.py" + "Lines 1-10" + "Please review this code for bugs" + ) + preview = create_session_preview(text) + assert preview == "Please review this code for bugs" + assert "" not in preview + assert "" not in preview + + +class TestWarmupAndIDETagsCombined: + """Test combined warmup + IDE tag scenarios.""" + + def test_warmup_with_ide_tag_filtered(self): + """Test that warmup messages with IDE tags are still filtered.""" + text = "Some selectionWarmup" + # First check if it would be used as starter (should be False) + clean_text = strip_ide_tags(text) + assert should_use_as_session_starter(clean_text) is False + + def test_ide_tag_only_message_not_used_as_starter(self): + """Test that IDE-tag-only messages result in empty preview.""" + text = "The user selected lines 42 to 46" + preview = create_session_preview(text) + assert preview == "" + # Empty string is technically valid as a starter, but won't be shown in UI + assert should_use_as_session_starter(preview) is True + + +class TestWarmupOnlySessionDetection: + """Test detection of warmup-only sessions.""" + + def _create_user_entry( + self, session_id: str, content: str, uuid: str, timestamp: str + ): + """Helper to create a UserTranscriptEntry with all required fields.""" + return UserTranscriptEntry( + type="user", + sessionId=session_id, + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + version="1.0.0", + message=UserMessage(role="user", content=content), + uuid=uuid, + timestamp=timestamp, + ) + + def _create_assistant_entry( + self, session_id: str, content: str, uuid: str, timestamp: str, parent_uuid: str + ): + """Helper to create an AssistantTranscriptEntry with all required fields.""" + return AssistantTranscriptEntry( + type="assistant", + sessionId=session_id, + parentUuid=parent_uuid, + isSidechain=False, + userType="external", + cwd="/test", + version="1.0.0", + message=AssistantMessage( + id="msg-id", + type="message", + role="assistant", + model="claude-3-5-sonnet", + content=[TextContent(type="text", text=content)], + ), + uuid=uuid, + timestamp=timestamp, + ) + + def test_session_with_only_warmup_messages(self): + """Test that a session with only warmup messages is detected.""" + session_id = "test-session-1" + messages = [ + self._create_user_entry( + session_id, "Warmup", "msg-1", "2025-01-01T10:00:00Z" + ), + self._create_assistant_entry( + session_id, + "I'm ready to help!", + "msg-2", + "2025-01-01T10:00:01Z", + "msg-1", + ), + ] + + assert is_warmup_only_session(messages, session_id) is True + + def test_session_with_real_messages(self): + """Test that a session with real messages is not detected as warmup-only.""" + session_id = "test-session-2" + messages = [ + self._create_user_entry( + session_id, "Hello, can you help me?", "msg-1", "2025-01-01T10:00:00Z" + ), + self._create_assistant_entry( + session_id, "Sure!", "msg-2", "2025-01-01T10:00:01Z", "msg-1" + ), + ] + + assert is_warmup_only_session(messages, session_id) is False + + def test_session_with_warmup_and_real_messages(self): + """Test that a session with both warmup and real messages is not warmup-only.""" + session_id = "test-session-3" + messages = [ + self._create_user_entry( + session_id, "Warmup", "msg-1", "2025-01-01T10:00:00Z" + ), + self._create_assistant_entry( + session_id, "Ready!", "msg-2", "2025-01-01T10:00:01Z", "msg-1" + ), + self._create_user_entry( + session_id, + "Now help me debug this code", + "msg-3", + "2025-01-01T10:00:02Z", + ), + ] + + assert is_warmup_only_session(messages, session_id) is False + + def test_session_with_multiple_warmup_messages(self): + """Test session with multiple warmup messages.""" + session_id = "test-session-4" + messages = [ + self._create_user_entry( + session_id, " Warmup ", "msg-1", "2025-01-01T10:00:00Z" + ), + self._create_user_entry( + session_id, "Warmup", "msg-2", "2025-01-01T10:00:01Z" + ), + ] + + assert is_warmup_only_session(messages, session_id) is True + + def test_nonexistent_session(self): + """Test checking a session ID that doesn't exist.""" + messages = [ + self._create_user_entry( + "different-session", "Hello", "msg-1", "2025-01-01T10:00:00Z" + ), + ] + + # Should return False (no user messages may mean system messages exist) + assert is_warmup_only_session(messages, "nonexistent-session") is False + + def test_empty_messages_list(self): + """Test with empty messages list.""" + # Should return False (no user messages may mean system messages exist) + assert is_warmup_only_session([], "any-session") is False