diff --git a/pkg/cli/copilot_agent.go b/pkg/cli/copilot_agent.go index d2dcd2e370..18a7194388 100644 --- a/pkg/cli/copilot_agent.go +++ b/pkg/cli/copilot_agent.go @@ -252,7 +252,7 @@ func ParseCopilotAgentLogMetrics(logContent string, verbose bool) workflow.LogMe // Collect errors and warnings lowerLine := strings.ToLower(line) if errorPattern.MatchString(lowerLine) && !strings.Contains(lowerLine, "no error") { - message := extractErrorMessage(line) + message := logger.ExtractErrorMessage(line) if message != "" { metrics.Errors = append(metrics.Errors, workflow.LogError{ Line: lineNum + 1, @@ -262,7 +262,7 @@ func ParseCopilotAgentLogMetrics(logContent string, verbose bool) workflow.LogMe } } if warningPattern.MatchString(lowerLine) { - message := extractErrorMessage(line) + message := logger.ExtractErrorMessage(line) if message != "" { metrics.Errors = append(metrics.Errors, workflow.LogError{ Line: lineNum + 1, @@ -292,22 +292,6 @@ func ParseCopilotAgentLogMetrics(logContent string, verbose bool) workflow.LogMe return metrics } -// extractErrorMessage extracts a clean error message from a log line -// This is a simplified version for the copilot agent parser -func extractErrorMessage(line string) string { - // Remove common timestamp patterns - line = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?\s*`).ReplaceAllString(line, "") - line = regexp.MustCompile(`^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s*`).ReplaceAllString(line, "") - - // Remove common log level prefixes - line = regexp.MustCompile(`^(ERROR|WARN|WARNING|INFO|DEBUG):\s*`).ReplaceAllString(line, "") - - // Trim whitespace - line = strings.TrimSpace(line) - - return line -} - // extractToolName extracts a tool name from a log line func extractToolName(line string) string { // Try to extract tool name from various patterns diff --git a/pkg/cli/copilot_agent_test.go b/pkg/cli/copilot_agent_test.go index b9030c5884..50ab4e52cb 100644 --- a/pkg/cli/copilot_agent_test.go +++ b/pkg/cli/copilot_agent_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/githubnext/gh-aw/pkg/logger" "github.com/githubnext/gh-aw/pkg/workflow" ) @@ -282,7 +283,7 @@ func TestExtractErrorMessage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := extractErrorMessage(tt.line) + result := logger.ExtractErrorMessage(tt.line) if result != tt.expected { t.Errorf("Expected '%s', got '%s'", tt.expected, result) } diff --git a/pkg/logger/error_formatting.go b/pkg/logger/error_formatting.go new file mode 100644 index 0000000000..9d77f6c9f3 --- /dev/null +++ b/pkg/logger/error_formatting.go @@ -0,0 +1,47 @@ +package logger + +import ( + "regexp" + "strings" +) + +// Pre-compiled regexes for performance (avoid recompiling in hot paths) +var ( + // Timestamp patterns for log cleanup + // Pattern 1: ISO 8601 with T or space separator (e.g., "2024-01-01T12:00:00.123Z " or "2024-01-01 12:00:00 ") + timestampPattern1 = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?\s*`) + // Pattern 2: Bracketed date-time (e.g., "[2024-01-01 12:00:00] ") + timestampPattern2 = regexp.MustCompile(`^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s*`) + // Pattern 3: Bracketed time only (e.g., "[12:00:00] ") + timestampPattern3 = regexp.MustCompile(`^\[\d{2}:\d{2}:\d{2}\]\s+`) + // Pattern 4: Time only with optional milliseconds (e.g., "12:00:00.123 ") + timestampPattern4 = regexp.MustCompile(`^\d{2}:\d{2}:\d{2}(\.\d+)?\s+`) + + // Log level pattern for message cleanup (case-insensitive) + logLevelPattern = regexp.MustCompile(`(?i)^\[?(ERROR|WARNING|WARN|INFO|DEBUG)\]?\s*[:-]?\s*`) +) + +// ExtractErrorMessage extracts a clean error message from a log line. +// It removes timestamps, log level prefixes, and other common noise. +// If the message is longer than 200 characters, it will be truncated. +func ExtractErrorMessage(line string) string { + // Remove common timestamp patterns using pre-compiled regexes + cleanedLine := line + cleanedLine = timestampPattern1.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern2.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern3.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern4.ReplaceAllString(cleanedLine, "") + + // Remove common log level prefixes using pre-compiled regex + cleanedLine = logLevelPattern.ReplaceAllString(cleanedLine, "") + + // Trim whitespace + cleanedLine = strings.TrimSpace(cleanedLine) + + // If the line is too long (>200 chars), truncate it + if len(cleanedLine) > 200 { + cleanedLine = cleanedLine[:197] + "..." + } + + return cleanedLine +} diff --git a/pkg/logger/error_formatting_test.go b/pkg/logger/error_formatting_test.go new file mode 100644 index 0000000000..8f48ccee27 --- /dev/null +++ b/pkg/logger/error_formatting_test.go @@ -0,0 +1,175 @@ +package logger + +import ( + "strings" + "testing" +) + +func TestExtractErrorMessage(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "ISO 8601 timestamp with T separator and Z", + input: "2024-01-01T12:00:00.123Z Error: connection failed", + expected: "connection failed", + }, + { + name: "ISO 8601 timestamp with T separator and timezone offset", + input: "2024-01-01T12:00:00.123+00:00 Error: connection failed", + expected: "connection failed", + }, + { + name: "Date-time with space separator", + input: "2024-01-01 12:00:00 Error: connection failed", + expected: "connection failed", + }, + { + name: "Date-time with space separator and milliseconds", + input: "2024-01-01 12:00:00.456 Error: connection failed", + expected: "connection failed", + }, + { + name: "Bracketed date-time", + input: "[2024-01-01 12:00:00] Error: connection failed", + expected: "connection failed", + }, + { + name: "Bracketed time only", + input: "[12:00:00] Error: connection failed", + expected: "connection failed", + }, + { + name: "Time only with milliseconds", + input: "12:00:00.123 Error: connection failed", + expected: "connection failed", + }, + { + name: "Time only without milliseconds", + input: "12:00:00 Error: connection failed", + expected: "connection failed", + }, + { + name: "ERROR prefix with colon", + input: "ERROR: connection failed", + expected: "connection failed", + }, + { + name: "ERROR prefix without colon", + input: "ERROR connection failed", + expected: "connection failed", + }, + { + name: "Bracketed ERROR prefix", + input: "[ERROR] connection failed", + expected: "connection failed", + }, + { + name: "Bracketed ERROR prefix with colon", + input: "[ERROR]: connection failed", + expected: "connection failed", + }, + { + name: "WARNING prefix", + input: "WARNING: disk space low", + expected: "disk space low", + }, + { + name: "WARN prefix", + input: "WARN: deprecated API used", + expected: "deprecated API used", + }, + { + name: "INFO prefix", + input: "INFO: service started", + expected: "service started", + }, + { + name: "DEBUG prefix", + input: "DEBUG: processing request", + expected: "processing request", + }, + { + name: "Case insensitive log level", + input: "error: connection failed", + expected: "connection failed", + }, + { + name: "Combined timestamp and log level", + input: "2024-01-01 12:00:00 ERROR: connection failed", + expected: "connection failed", + }, + { + name: "Combined ISO timestamp with Z and log level", + input: "2024-01-01T12:00:00Z ERROR: connection failed", + expected: "connection failed", + }, + { + name: "Multiple timestamps - only first is removed", + input: "[12:00:00] 2024-01-01 12:00:00 ERROR: connection failed", + expected: "2024-01-01 12:00:00 ERROR: connection failed", + }, + { + name: "No timestamp or log level", + input: "connection failed", + expected: "connection failed", + }, + { + name: "Empty string", + input: "", + expected: "", + }, + { + name: "Only whitespace", + input: " ", + expected: "", + }, + { + name: "Truncation at 200 chars", + input: "ERROR: " + strings.Repeat("a", 250), + expected: strings.Repeat("a", 197) + "...", + }, + { + name: "Exactly 200 chars - no truncation", + input: "ERROR: " + strings.Repeat("a", 193), + expected: strings.Repeat("a", 193), + }, + { + name: "Real world example from metrics.go", + input: "2024-01-15 14:30:22 ERROR: Failed to connect to database", + expected: "Failed to connect to database", + }, + { + name: "Real world example from copilot_agent.go", + input: "2024-01-15T14:30:22.123Z ERROR: API request failed", + expected: "API request failed", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ExtractErrorMessage(tt.input) + if result != tt.expected { + t.Errorf("ExtractErrorMessage(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } +} + +func BenchmarkExtractErrorMessage(b *testing.B) { + testLine := "2024-01-01T12:00:00.123Z ERROR: connection failed to remote server" + b.ResetTimer() + for i := 0; i < b.N; i++ { + ExtractErrorMessage(testLine) + } +} + +func BenchmarkExtractErrorMessageLong(b *testing.B) { + testLine := "2024-01-01T12:00:00.123Z ERROR: " + strings.Repeat("very long error message ", 20) + b.ResetTimer() + for i := 0; i < b.N; i++ { + ExtractErrorMessage(testLine) + } +} diff --git a/pkg/workflow/claude_logs.go b/pkg/workflow/claude_logs.go index 933115a30e..dd86ad0aa4 100644 --- a/pkg/workflow/claude_logs.go +++ b/pkg/workflow/claude_logs.go @@ -66,7 +66,7 @@ func (e *ClaudeEngine) ParseLogMetrics(logContent string, verbose bool) LogMetri lowerLine := strings.ToLower(line) if strings.Contains(lowerLine, "error") { // Extract error message (remove timestamp and common prefixes) - message := extractErrorMessage(line) + message := logger.ExtractErrorMessage(line) if message != "" { metrics.Errors = append(metrics.Errors, LogError{ Line: lineNum + 1, // 1-based line numbering @@ -77,7 +77,7 @@ func (e *ClaudeEngine) ParseLogMetrics(logContent string, verbose bool) LogMetri } if strings.Contains(lowerLine, "warning") { // Extract warning message (remove timestamp and common prefixes) - message := extractErrorMessage(line) + message := logger.ExtractErrorMessage(line) if message != "" { metrics.Errors = append(metrics.Errors, LogError{ Line: lineNum + 1, // 1-based line numbering diff --git a/pkg/workflow/metrics.go b/pkg/workflow/metrics.go index 63f8cd3ef4..3ff241bbcf 100644 --- a/pkg/workflow/metrics.go +++ b/pkg/workflow/metrics.go @@ -14,17 +14,6 @@ import ( var metricsLog = logger.New("workflow:metrics") -// Pre-compiled regexes for performance (avoid recompiling in hot paths) -var ( - // Timestamp patterns for log cleanup - timestampPattern1 = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(\.\d+)?\s+`) - timestampPattern2 = regexp.MustCompile(`^\[\d{2}:\d{2}:\d{2}\]\s+`) - timestampPattern3 = regexp.MustCompile(`^\d{2}:\d{2}:\d{2}(\.\d+)?\s+`) - - // Log level pattern for message cleanup - logLevelPattern = regexp.MustCompile(`(?i)^\[?(ERROR|WARNING|WARN|INFO|DEBUG)\]?\s*[:-]?\s*`) -) - // ToolCallInfo represents statistics for a single tool type ToolCallInfo struct { Name string // Prettified tool name (e.g., "github::search_issues", "bash") @@ -382,7 +371,7 @@ func CountErrorsAndWarningsWithPatterns(logContent string, patterns []ErrorPatte } // Clean up the message - message = extractErrorMessage(message) + message = logger.ExtractErrorMessage(message) if strings.ToLower(level) == "error" { if message != "" { @@ -457,29 +446,6 @@ func extractLevelFromMatchCompiled(match []string, cp compiledPattern) string { return "unknown" } -// extractErrorMessage extracts a clean error message from a log line -// Removes timestamps, log level prefixes, and other common noise -func extractErrorMessage(line string) string { - // Remove common timestamp patterns using pre-compiled regexes - cleanedLine := line - cleanedLine = timestampPattern1.ReplaceAllString(cleanedLine, "") - cleanedLine = timestampPattern2.ReplaceAllString(cleanedLine, "") - cleanedLine = timestampPattern3.ReplaceAllString(cleanedLine, "") - - // Remove common log level prefixes using pre-compiled regex - cleanedLine = logLevelPattern.ReplaceAllString(cleanedLine, "") - - // Trim whitespace - cleanedLine = strings.TrimSpace(cleanedLine) - - // If the line is too long (>200 chars), truncate it - if len(cleanedLine) > 200 { - cleanedLine = cleanedLine[:197] + "..." - } - - return cleanedLine -} - // FinalizeToolMetrics completes the metric collection process by finalizing sequences, // converting tool call maps to sorted slices, and optionally counting errors using patterns. // This function is called by engine-specific ParseLogMetrics implementations to avoid code duplication. diff --git a/pkg/workflow/metrics_test.go b/pkg/workflow/metrics_test.go index f00420543b..b20a35af54 100644 --- a/pkg/workflow/metrics_test.go +++ b/pkg/workflow/metrics_test.go @@ -3,6 +3,8 @@ package workflow import ( "encoding/json" "testing" + + "github.com/githubnext/gh-aw/pkg/logger" ) func TestExtractFirstMatch(t *testing.T) { @@ -744,9 +746,9 @@ func TestExtractErrorMessage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := extractErrorMessage(tt.input) + result := logger.ExtractErrorMessage(tt.input) if result != tt.expected { - t.Errorf("extractErrorMessage(%q) = %q, want %q", tt.input, result, tt.expected) + t.Errorf("logger.ExtractErrorMessage(%q) = %q, want %q", tt.input, result, tt.expected) } }) }