From fab538cce8809128e34607e26e6c6be436ce4f2e Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Sun, 15 Mar 2026 15:19:18 +0200 Subject: [PATCH 1/7] feat: unified JSON logging with per-record JSONL handler (#1030) Add JsonLogHandler that writes every log record as JSONL to the same date-based file used by webhook summaries. Each entry is enriched with webhook context (hook_id, repository, event_type) from ContextVar. - Add type field ("log_entry" vs "webhook_summary") to distinguish entries - Add level field derived from execution context (_derive_level) - Add type-based routing in log parser for mixed-type JSONL files - Filter log_entry records in log viewer to prevent duplicates - Capture exception tracebacks in exc_info field --- webhook_server/libs/log_parser.py | 74 +++- webhook_server/tests/test_context.py | 30 ++ webhook_server/tests/test_json_log_handler.py | 320 ++++++++++++++++++ webhook_server/tests/test_log_parser.py | 127 ++++++- .../tests/test_structured_logger.py | 3 + webhook_server/utils/context.py | 23 ++ webhook_server/utils/helpers.py | 15 +- webhook_server/utils/json_log_handler.py | 148 ++++++++ webhook_server/utils/structured_logger.py | 3 + webhook_server/web/log_viewer.py | 26 +- 10 files changed, 752 insertions(+), 17 deletions(-) create mode 100644 webhook_server/tests/test_json_log_handler.py create mode 100644 webhook_server/utils/json_log_handler.py diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index ff0f494b..f9139648 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -313,7 +313,9 @@ def parse_log_file(self, file_path: Path) -> list[LogEntry]: def parse_json_log_entry(self, json_line: str) -> LogEntry | None: """Parse a JSONL log entry into a LogEntry object. - Parses JSONL format (one compact JSON object per line). + Routes JSON entries by their ``type`` field: + - ``"log_entry"`` -> individual structured log line + - ``"webhook_summary"`` (or missing) -> legacy webhook summary Args: json_line: Raw JSON string from webhooks_*.json files (single line) @@ -329,6 +331,74 @@ def parse_json_log_entry(self, json_line: str) -> LogEntry | None: except json.JSONDecodeError: return None + entry_type = data.get("type", "webhook_summary") # backward compat + if entry_type == "log_entry": + return self._parse_json_log_line(data) + return self._parse_json_webhook_summary(data) + + def _parse_json_log_line(self, data: dict[str, Any]) -> LogEntry | None: + """Parse an individual JSON log line entry (type="log_entry") into a LogEntry. + + Expected JSON format:: + + { + "type": "log_entry", + "timestamp": "ISO8601", + "level": "INFO", + "logger_name": "GithubWebhook", + "message": "Processing webhook", + "hook_id": "abc123", + "event_type": "pull_request", + "repository": "org/repo", + "pr_number": 123, + "api_user": "bot-user" + } + + Args: + data: Parsed JSON dictionary with type="log_entry" + + Returns: + LogEntry object if parsing successful, None otherwise + """ + # Parse timestamp + timestamp_str = data.get("timestamp", "") + if not timestamp_str: + return None + + try: + timestamp = datetime.datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=datetime.UTC) + except (ValueError, TypeError): + return None + + return LogEntry( + timestamp=timestamp, + level=data.get("level", "INFO"), + logger_name=data.get("logger_name", "GithubWebhook"), + message=data.get("message", ""), + hook_id=data.get("hook_id"), + event_type=data.get("event_type"), + repository=data.get("repository"), + pr_number=data.get("pr_number"), + github_user=data.get("api_user"), + task_id=None, + task_type=None, + task_status=None, + token_spend=None, + ) + + def _parse_json_webhook_summary(self, data: dict[str, Any]) -> LogEntry | None: + """Parse a webhook summary JSON entry into a LogEntry. + + Handles legacy format where ``type`` is ``"webhook_summary"`` or missing. + + Args: + data: Parsed JSON dictionary with type="webhook_summary" (or no type) + + Returns: + LogEntry object if parsing successful, None otherwise + """ # Parse timestamp from timing.started_at try: timing = data.get("timing", {}) @@ -359,7 +429,7 @@ def parse_json_log_entry(self, json_line: str) -> LogEntry | None: return LogEntry( timestamp=timestamp, - level="INFO", # JSON logs don't have levels, default to INFO + level=data.get("level", "INFO"), logger_name="GithubWebhook", message=message, hook_id=data.get("hook_id"), diff --git a/webhook_server/tests/test_context.py b/webhook_server/tests/test_context.py index 155af5e7..7e8e584c 100644 --- a/webhook_server/tests/test_context.py +++ b/webhook_server/tests/test_context.py @@ -325,6 +325,9 @@ def test_to_dict_returns_correct_structure(self, mock_datetime): assert result["initial_rate_limit"] == 5000 assert result["final_rate_limit"] == 4985 + # Level + assert result["level"] == "INFO" + # Status assert result["success"] is True assert result["error"] is None @@ -391,11 +394,38 @@ def test_to_dict_with_error(self, mock_datetime): result = ctx.to_dict() + assert result["level"] == "ERROR" assert result["success"] is False assert result["error"] is not None assert result["error"]["type"] == "ValueError" assert result["error"]["message"] == "Something went wrong" + def test_derive_level_warning_on_failed_step(self, mock_datetime): + """Test _derive_level returns WARNING when success=True but a step has status=failed.""" + ctx = WebhookContext( + hook_id="hook-warning-1", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + # Context is successful overall, but a step explicitly failed + ctx.workflow_steps["some_step"] = {"status": "failed"} + assert ctx.success is True + assert ctx._derive_level() == "WARNING" + + def test_derive_level_warning_on_failed_indicator(self, mock_datetime): + """Test _derive_level returns WARNING when a completed step has a _failed=True indicator.""" + ctx = WebhookContext( + hook_id="hook-warning-2", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + # Context is successful overall, but a step has a failure indicator + ctx.workflow_steps["compare_step"] = {"status": "completed", "compare_api_failed": True} + assert ctx.success is True + assert ctx._derive_level() == "WARNING" + class TestContextManagement: """Tests for module-level context management functions.""" diff --git a/webhook_server/tests/test_json_log_handler.py b/webhook_server/tests/test_json_log_handler.py new file mode 100644 index 00000000..af5bde50 --- /dev/null +++ b/webhook_server/tests/test_json_log_handler.py @@ -0,0 +1,320 @@ +"""Tests for webhook_server/utils/json_log_handler.py. + +Tests JsonLogHandler which writes log records as JSONL to date-based webhook log files, +enriched with WebhookContext data when available. +""" + +from __future__ import annotations + +import json +import logging +from datetime import UTC, datetime +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from webhook_server.utils.context import clear_context, create_context +from webhook_server.utils.json_log_handler import JsonLogHandler + + +@pytest.fixture(autouse=True) +def cleanup_context() -> None: + """Clean up context after each test.""" + yield # type: ignore[misc] + clear_context() + + +@pytest.fixture +def log_dir(tmp_path: Path) -> Path: + """Return a temporary log directory path.""" + return tmp_path / "logs" + + +@pytest.fixture +def handler(log_dir: Path) -> JsonLogHandler: + """Create a JsonLogHandler pointing at a temporary directory.""" + return JsonLogHandler(log_dir=str(log_dir)) + + +@pytest.fixture +def logger_with_handler(handler: JsonLogHandler) -> logging.Logger: + """Create a stdlib logger wired to the JsonLogHandler.""" + logger = logging.getLogger("test_json_log_handler") + logger.setLevel(logging.DEBUG) + logger.handlers.clear() + logger.addHandler(handler) + return logger + + +def _read_log_lines(log_dir: Path) -> list[dict]: + """Read all JSONL lines from the current-date log file in *log_dir*.""" + date_str = datetime.now(UTC).strftime("%Y-%m-%d") + log_file = log_dir / f"webhooks_{date_str}.json" + assert log_file.exists(), f"Expected log file {log_file} to exist" + lines = log_file.read_text(encoding="utf-8").strip().splitlines() + return [json.loads(line) for line in lines] + + +class TestJsonLogHandlerInit: + """Tests for handler initialisation.""" + + def test_creates_log_dir_if_missing(self, tmp_path: Path) -> None: + """Handler __init__ creates the log directory when it does not exist.""" + new_dir = tmp_path / "nonexistent" / "logs" + assert not new_dir.exists() + JsonLogHandler(log_dir=str(new_dir)) + assert new_dir.is_dir() + + def test_existing_log_dir_is_fine(self, tmp_path: Path) -> None: + """Handler __init__ succeeds when the log directory already exists.""" + existing = tmp_path / "logs" + existing.mkdir() + handler = JsonLogHandler(log_dir=str(existing)) + assert handler.log_dir == existing + + +class TestEmitBasic: + """Tests for basic emit behaviour and entry format.""" + + def test_emit_writes_valid_jsonl(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """A single emit produces a valid JSONL line in the correct file.""" + logger_with_handler.info("hello world") + entries = _read_log_lines(log_dir) + assert len(entries) == 1 + + def test_entry_format_fields(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Each entry contains the mandatory fields.""" + logger_with_handler.warning("something happened") + entry = _read_log_lines(log_dir)[0] + + assert entry["type"] == "log_entry" + assert "timestamp" in entry + # Verify timestamp is valid ISO format + datetime.fromisoformat(entry["timestamp"]) + assert entry["level"] == "WARNING" + assert entry["logger_name"] == "test_json_log_handler" + assert entry["message"] == "something happened" + + def test_date_based_file_naming(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Log file follows webhooks_YYYY-MM-DD.json naming convention.""" + logger_with_handler.info("test") + date_str = datetime.now(UTC).strftime("%Y-%m-%d") + expected = log_dir / f"webhooks_{date_str}.json" + assert expected.exists() + + def test_append_behaviour(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Multiple emits append to the same file.""" + logger_with_handler.info("first") + logger_with_handler.info("second") + logger_with_handler.info("third") + + entries = _read_log_lines(log_dir) + assert len(entries) == 3 + assert entries[0]["message"] == "first" + assert entries[1]["message"] == "second" + assert entries[2]["message"] == "third" + + +class TestLogLevels: + """Tests for different log levels.""" + + @pytest.mark.parametrize( + ("method", "expected_level"), + [ + ("debug", "DEBUG"), + ("info", "INFO"), + ("warning", "WARNING"), + ("error", "ERROR"), + ], + ) + def test_level_is_recorded( + self, + logger_with_handler: logging.Logger, + log_dir: Path, + method: str, + expected_level: str, + ) -> None: + """Log level is correctly captured in the entry.""" + getattr(logger_with_handler, method)("test message") + entry = _read_log_lines(log_dir)[0] + assert entry["level"] == expected_level + + +class TestAnsiStripping: + """Tests for ANSI escape code removal.""" + + def test_ansi_codes_stripped_from_message(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """ANSI colour codes are removed from the message before writing.""" + logger_with_handler.info("\x1b[31mRed text\x1b[0m and \x1b[1;32mbold green\x1b[0m") + entry = _read_log_lines(log_dir)[0] + assert "\x1b" not in entry["message"] + assert "Red text" in entry["message"] + assert "bold green" in entry["message"] + + def test_message_without_ansi_unchanged(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Plain messages pass through without modification.""" + logger_with_handler.info("plain message") + entry = _read_log_lines(log_dir)[0] + assert entry["message"] == "plain message" + + +class TestContextEnrichment: + """Tests for WebhookContext enrichment.""" + + def test_context_fields_added_when_context_set(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """When WebhookContext is active, its fields appear in the entry.""" + ctx = create_context( + hook_id="delivery-abc", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + action="opened", + sender="user1", + api_user="bot-user", + ) + ctx.pr_number = 42 + + logger_with_handler.info("processing webhook") + entry = _read_log_lines(log_dir)[0] + + assert entry["hook_id"] == "delivery-abc" + assert entry["event_type"] == "pull_request" + assert entry["repository"] == "org/repo" + assert entry["pr_number"] == 42 + assert entry["api_user"] == "bot-user" + + def test_no_context_fields_when_context_absent(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Without WebhookContext, context-specific fields are absent.""" + logger_with_handler.info("no context here") + entry = _read_log_lines(log_dir)[0] + + assert "hook_id" not in entry + assert "event_type" not in entry + assert "repository" not in entry + assert "pr_number" not in entry + assert "api_user" not in entry + + def test_basic_fields_present_without_context(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Basic fields (type, timestamp, level, logger_name, message) always present.""" + logger_with_handler.info("basic info") + entry = _read_log_lines(log_dir)[0] + + assert entry["type"] == "log_entry" + assert "timestamp" in entry + assert entry["level"] == "INFO" + assert entry["logger_name"] == "test_json_log_handler" + assert entry["message"] == "basic info" + + +class TestExceptionTraceback: + """Tests for exception traceback capture.""" + + def test_emit_with_exception_includes_traceback(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Test that exception info is included in JSON entry.""" + try: + raise ValueError("test error") + except ValueError: + logger_with_handler.exception("Something failed") + + entries = _read_log_lines(log_dir) + assert len(entries) == 1 + entry = entries[0] + + assert entry["level"] == "ERROR" + assert "Something failed" in entry["message"] + assert "exc_info" in entry + assert "ValueError" in entry["exc_info"] + assert "test error" in entry["exc_info"] + assert "Traceback" in entry["exc_info"] + + def test_emit_without_exception_has_no_exc_info(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Normal log entries do not contain exc_info field.""" + logger_with_handler.info("no error here") + entry = _read_log_lines(log_dir)[0] + assert "exc_info" not in entry + + +class TestErrorHandling: + """Tests for error resilience.""" + + def test_emit_does_not_crash_on_write_failure(self, handler: JsonLogHandler) -> None: + """Handler silently handles write failures via handleError.""" + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="test message", + args=(), + exc_info=None, + ) + + with patch.object(handler, "_append_to_file", side_effect=OSError("Disk full")): + # Must not raise + handler.emit(record) + + def test_emit_does_not_crash_on_open_failure( + self, + handler: JsonLogHandler, + ) -> None: + """Handler silently handles open() failures.""" + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="test message", + args=(), + exc_info=None, + ) + + with patch("builtins.open", side_effect=OSError("Permission denied")): + handler.emit(record) + + +class TestFileLocking: + """Tests for fcntl file locking behaviour.""" + + @patch("webhook_server.utils.json_log_handler.HAS_FCNTL", new=True) + @patch("fcntl.flock") + def test_flock_called_when_available( + self, mock_flock: Mock, logger_with_handler: logging.Logger, log_dir: Path + ) -> None: + """When HAS_FCNTL is True, fcntl.flock is called for lock/unlock.""" + logger_with_handler.info("locked write") + assert mock_flock.call_count >= 2 # At least LOCK_EX + LOCK_UN + + @patch("webhook_server.utils.json_log_handler.HAS_FCNTL", new=False) + def test_works_without_fcntl(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """When HAS_FCNTL is False, writing still works without locking.""" + logger_with_handler.info("no lock write") + entries = _read_log_lines(log_dir) + assert len(entries) == 1 + assert entries[0]["message"] == "no lock write" + + +class TestUnicodeContent: + """Tests for Unicode content handling.""" + + def test_unicode_message_written_correctly(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Messages with Unicode characters are preserved.""" + logger_with_handler.info("测试 emoji 🚀 and accents: éàü") + entry = _read_log_lines(log_dir)[0] + assert "测试" in entry["message"] + assert "🚀" in entry["message"] + assert "éàü" in entry["message"] + + def test_unicode_in_context_fields(self, logger_with_handler: logging.Logger, log_dir: Path) -> None: + """Unicode in WebhookContext fields is preserved.""" + create_context( + hook_id="hook-unicode", + event_type="pull_request", + repository="org/日本語リポ", + repository_full_name="org/日本語リポ", + api_user="用户", + ) + logger_with_handler.info("ctx test") + entry = _read_log_lines(log_dir)[0] + assert entry["repository"] == "org/日本語リポ" + assert entry["api_user"] == "用户" diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index e5bf7868..a8acde44 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -3,6 +3,7 @@ import asyncio import contextlib import datetime +import json import logging import tempfile import unittest.mock @@ -781,6 +782,7 @@ def test_parse_json_log_entry_valid_json(self) -> None: "action": "opened", "repository": "org/test-repo", "api_user": "test-user", + "level": "INFO", "success": true, "token_spend": 35, "timing": { @@ -819,6 +821,7 @@ def test_parse_json_log_entry_failed_webhook(self) -> None: "action": "created", "repository": "org/repo", "api_user": "user1", + "level": "ERROR", "success": false, "token_spend": 10, "timing": { @@ -835,6 +838,7 @@ def test_parse_json_log_entry_failed_webhook(self) -> None: entry = parser.parse_json_log_entry(json_line) assert entry is not None + assert entry.level == "ERROR" assert entry.hook_id == "failed-hook" assert entry.task_status == "failed" assert entry.pr_number == 456 @@ -849,6 +853,7 @@ def test_parse_json_log_entry_without_pr(self) -> None: "event_type": "push", "repository": "org/repo", "api_user": "user2", + "level": "INFO", "success": true, "timing": { "started_at": "2025-07-31T12:00:00Z" @@ -918,6 +923,7 @@ def test_parse_json_log_entry_timezone_handling(self) -> None: "hook_id": "z-time", "event_type": "push", "repository": "org/repo", + "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -930,6 +936,7 @@ def test_parse_json_log_entry_timezone_handling(self) -> None: "hook_id": "plus-time", "event_type": "push", "repository": "org/repo", + "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00+00:00"} }""" @@ -946,6 +953,7 @@ def test_parse_json_log_entry_extracts_all_fields(self) -> None: "action": "synchronize", "repository": "owner/repo-name", "api_user": "github-user", + "level": "INFO", "success": true, "token_spend": 42, "timing": { @@ -982,12 +990,13 @@ def test_parse_json_log_file_multiple_entries(self, tmp_path: Path) -> None: # Each JSON object must be on a single line (JSON lines format) json_content = ( '{"hook_id": "hook1", "event_type": "push", "repository": "org/repo1", ' - '"api_user": "user1", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + '"api_user": "user1", "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' '{"hook_id": "hook2", "event_type": "pull_request", "action": "opened", ' - '"repository": "org/repo2", "api_user": "user2", "success": true, ' + '"repository": "org/repo2", "api_user": "user2", "level": "INFO", "success": true, ' '"timing": {"started_at": "2025-07-31T10:01:00Z"}, "pr": {"number": 123}}\n' '{"hook_id": "hook3", "event_type": "issue_comment", "repository": "org/repo3", ' - '"api_user": "user3", "success": false, "timing": {"started_at": "2025-07-31T10:02:00Z"}}\n' + '"api_user": "user3", "level": "ERROR", "success": false, ' + '"timing": {"started_at": "2025-07-31T10:02:00Z"}}\n' ) log_file = tmp_path / "webhooks_test.json" log_file.write_text(json_content) @@ -1020,11 +1029,11 @@ def test_parse_json_log_file_skips_invalid_lines(self, tmp_path: Path) -> None: # Each JSON object must be on a single line (JSON lines format) json_content = ( '{"hook_id": "valid1", "event_type": "push", "repository": "org/repo", ' - '"success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + '"level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' "this is not valid json\n" "{incomplete json\n" '{"hook_id": "valid2", "event_type": "pull_request", "repository": "org/repo", ' - '"success": true, "timing": {"started_at": "2025-07-31T10:01:00Z"}}\n' + '"level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:01:00Z"}}\n' '{"missing_timestamp": true}\n' ) log_file = tmp_path / "mixed.json" @@ -1133,6 +1142,7 @@ def test_json_summary_message_format_with_action(self) -> None: "event_type": "pull_request", "action": "synchronize", "repository": "org/repo", + "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -1149,6 +1159,7 @@ def test_json_summary_message_format_without_action(self) -> None: "hook_id": "hook1", "event_type": "push", "repository": "org/repo", + "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -1166,6 +1177,7 @@ def test_parse_json_log_entry_null_pr_field(self) -> None: "hook_id": "null-pr", "event_type": "push", "repository": "org/repo", + "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}, "pr": null @@ -1190,6 +1202,111 @@ def test_parse_json_log_entry_empty_timing_object(self) -> None: entry = parser.parse_json_log_entry(json_line) assert entry is None + def test_parse_json_log_entry_type_log_entry(self) -> None: + """Test parsing a JSON entry with type='log_entry' routes to _parse_json_log_line.""" + parser = LogParser() + json_line = json.dumps({ + "type": "log_entry", + "timestamp": "2026-03-15T12:00:00+00:00", + "level": "WARNING", + "logger_name": "PullRequestHandler", + "message": "PR labels updated", + "hook_id": "delivery-999", + "event_type": "pull_request", + "repository": "org/my-repo", + "pr_number": 77, + "api_user": "bot-user", + }) + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.level == "WARNING" + assert entry.logger_name == "PullRequestHandler" + assert entry.message == "PR labels updated" + assert entry.hook_id == "delivery-999" + assert entry.event_type == "pull_request" + assert entry.repository == "org/my-repo" + assert entry.pr_number == 77 + assert entry.github_user == "bot-user" + # Fields not present in log_entry type + assert entry.task_id is None + assert entry.task_type is None + assert entry.task_status is None + assert entry.token_spend is None + + def test_parse_json_log_entry_type_routing_default(self) -> None: + """Test that entries without a 'type' field default to webhook_summary parsing.""" + parser = LogParser() + json_line = json.dumps({ + "hook_id": "no-type-field", + "event_type": "push", + "action": "completed", + "repository": "org/repo", + "success": True, + "timing": { + "started_at": "2026-03-15T08:00:00+00:00", + }, + "pr": None, + }) + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + # Should have been routed through _parse_json_webhook_summary + assert entry.hook_id == "no-type-field" + assert entry.event_type == "push" + assert entry.repository == "org/repo" + assert entry.task_status == "completed" # derived from success=True + + def test_parse_json_log_entry_mixed_types(self, tmp_path: Path) -> None: + """Test parsing a file containing both log_entry and webhook_summary entries.""" + parser = LogParser() + + log_entry_line = json.dumps({ + "type": "log_entry", + "timestamp": "2026-03-15T09:00:00+00:00", + "level": "INFO", + "logger_name": "GithubWebhook", + "message": "Started processing", + "hook_id": "mixed-hook-1", + "event_type": "pull_request", + "repository": "org/repo", + "pr_number": 10, + "api_user": "bot", + }) + + summary_line = json.dumps({ + "type": "webhook_summary", + "hook_id": "mixed-hook-1", + "event_type": "pull_request", + "action": "opened", + "repository": "org/repo", + "success": True, + "timing": { + "started_at": "2026-03-15T09:00:00+00:00", + "completed_at": "2026-03-15T09:00:05+00:00", + "duration_ms": 5000, + }, + "pr": {"number": 10, "title": "Test PR", "author": "dev"}, + "level": "INFO", + }) + + log_file = tmp_path / "webhooks_2026-03-15.json" + log_file.write_text(f"{log_entry_line}\n{summary_line}\n") + + entries = parser.parse_json_log_file(log_file) + + assert len(entries) == 2 + # First entry is a log_entry + assert entries[0].logger_name == "GithubWebhook" + assert entries[0].message == "Started processing" + assert entries[0].hook_id == "mixed-hook-1" + # Second entry is a webhook_summary + assert entries[1].hook_id == "mixed-hook-1" + assert entries[1].pr_number == 10 + assert entries[1].task_status == "completed" # derived from success=True + class TestAdditionalCoverageTests: """Additional tests for edge cases to reach 90%+ coverage.""" diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py index b087caab..6d47647e 100644 --- a/webhook_server/tests/test_structured_logger.py +++ b/webhook_server/tests/test_structured_logger.py @@ -126,6 +126,7 @@ def test_write_log_writes_valid_json( content = f.read().strip() # Pretty-printed JSON is multi-line, parse the entire content as one JSON object log_entry = json.loads(content) + assert log_entry["type"] == "webhook_summary" assert log_entry["hook_id"] == "test-hook-123" assert log_entry["event_type"] == "pull_request" assert log_entry["repository"] == "org/repo" @@ -377,6 +378,7 @@ def test_write_error_log_with_partial_context( with open(log_file) as f: log_entry = json.loads(f.read().strip()) + assert log_entry["type"] == "webhook_summary" assert log_entry["success"] is False assert log_entry["error"] is not None assert log_entry["error"]["message"] == "Early failure" @@ -397,6 +399,7 @@ def test_write_error_log_without_context(self, log_writer: StructuredLogWriter, with open(log_file) as f: log_entry = json.loads(f.read().strip()) + assert log_entry["type"] == "webhook_summary" assert log_entry["hook_id"] == "test-hook-error" assert log_entry["event_type"] == "push" assert log_entry["repository"] == "org/repo" diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py index e4d82f3e..18441a02 100644 --- a/webhook_server/utils/context.py +++ b/webhook_server/utils/context.py @@ -331,6 +331,28 @@ def _build_summary(self) -> str | None: f"[{_format_duration(duration_ms)}{token_info}] steps=[{steps_str}]" ) + def _derive_level(self) -> str: + """Derive log level from execution context. + + Returns: + Log level string: ERROR, WARNING, or INFO + """ + if not self.success: + return "ERROR" + + for step_data in self.workflow_steps.values(): + # Explicit failure status + if step_data.get("status") == "failed": + return "WARNING" + + # Steps completed with error indicators (e.g., compare_api_failed=True) + if step_data.get("status") == "completed" and any( + key.endswith("_failed") and value is True for key, value in step_data.items() if isinstance(value, bool) + ): + return "WARNING" + + return "INFO" + def to_dict(self) -> dict[str, Any]: """Convert context to dictionary for JSON serialization. @@ -342,6 +364,7 @@ def to_dict(self) -> dict[str, Any]: """ return { "hook_id": self.hook_id, + "level": self._derive_level(), "event_type": self.event_type, "action": self.action, "sender": self.sender, diff --git a/webhook_server/utils/helpers.py b/webhook_server/utils/helpers.py index 5a0521cf..96c11b3e 100644 --- a/webhook_server/utils/helpers.py +++ b/webhook_server/utils/helpers.py @@ -4,6 +4,7 @@ import contextlib import datetime import json +import logging import os import random import re @@ -27,6 +28,7 @@ from webhook_server.libs.config import Config from webhook_server.libs.exceptions import NoApiTokenError +from webhook_server.utils.json_log_handler import JsonLogHandler from webhook_server.utils.safe_rotating_handler import SafeRotatingFileHandler # Patch simple_logger to use SafeRotatingFileHandler to prevent crashes @@ -87,7 +89,7 @@ def get_logger_with_params( # The original 'name' parameter is preserved in log records via the logger name. logger_cache_key = os.path.basename(log_file_path_resolved) if log_file_path_resolved else "console" - return get_logger( + logger = get_logger( name=logger_cache_key, filename=log_file_path_resolved, level=log_level, @@ -97,6 +99,17 @@ def get_logger_with_params( console=True, # Enable console output for docker logs with FORCE_COLOR support ) + # Attach JsonLogHandler for writing log records to the webhook JSONL file. + # Only attach when a log file path is configured (skip console-only loggers) + # and only once per logger instance to avoid duplicate handlers. + if log_file_path_resolved: + log_dir = os.path.dirname(log_file_path_resolved) + if not any(isinstance(h, JsonLogHandler) for h in logger.handlers): + json_handler = JsonLogHandler(log_dir=log_dir, level=getattr(logging, log_level.upper(), logging.DEBUG)) + logger.addHandler(json_handler) + + return logger + def get_log_file_path(config: Config, log_file_name: str | None) -> str | None: """ diff --git a/webhook_server/utils/json_log_handler.py b/webhook_server/utils/json_log_handler.py new file mode 100644 index 00000000..fc34772a --- /dev/null +++ b/webhook_server/utils/json_log_handler.py @@ -0,0 +1,148 @@ +"""JSON log handler that writes log records to the webhook JSONL log file. + +Intercepts every log record and writes it as a JSON entry to the same +date-based JSONL file used by webhook summaries. Enriches entries with +webhook context (hook_id, repository, event_type, etc.) from the +ContextVar when available. + +Architecture: +- Subclasses logging.Handler for standard library integration +- Reads WebhookContext from ContextVar for per-request enrichment +- Atomic append with fcntl file locking (same pattern as StructuredLogWriter) +- Never crashes the application — uses handleError() on failures + +Entry format: + {"type": "log_entry", "timestamp": "ISO8601", "level": "INFO", + "logger_name": "...", "message": "...", "hook_id": "...", ...} +""" + +import json +import logging +import os +import re +import traceback +from datetime import UTC, datetime +from pathlib import Path + +# Platform-specific imports for file locking +try: + import fcntl + + HAS_FCNTL = True +except ImportError: + HAS_FCNTL = False + +from webhook_server.utils.context import get_context + +# Pre-compiled regex for stripping ANSI escape codes +_ANSI_ESCAPE_RE: re.Pattern[str] = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + + +class JsonLogHandler(logging.Handler): + """Logging handler that writes JSON entries to the webhook JSONL log file. + + Each log record is serialized as a compact JSON object and appended to + the date-based log file (webhooks_YYYY-MM-DD.json). The handler enriches + entries with webhook context data when available. + + Attributes: + log_dir: Directory path for log files + """ + + def __init__(self, log_dir: str, level: int = logging.NOTSET) -> None: + """Initialize the JSON log handler. + + Args: + log_dir: Directory path where JSONL log files are written + level: Minimum log level to handle (default: NOTSET, handles all) + """ + super().__init__(level) + self.log_dir: Path = Path(log_dir) + self.log_dir.mkdir(parents=True, exist_ok=True) + + def _get_log_file_path(self) -> Path: + """Get log file path for the current UTC date. + + Returns: + Path to the log file (e.g., {log_dir}/webhooks_2026-01-05.json) + """ + date_str = datetime.now(UTC).strftime("%Y-%m-%d") + return self.log_dir / f"webhooks_{date_str}.json" + + def emit(self, record: logging.LogRecord) -> None: + """Write a log record as JSON to the JSONL log file. + + Builds a JSON dict from the LogRecord and enriches it with webhook + context data (hook_id, repository, event_type, etc.) when available. + + Args: + record: The log record to write + """ + try: + entry = self._build_entry(record) + log_line = json.dumps(entry, ensure_ascii=False) + self._append_to_file(log_line) + except Exception: + self.handleError(record) + + def _build_entry(self, record: logging.LogRecord) -> dict[str, object]: + """Build a JSON-serializable dict from a LogRecord. + + Strips ANSI codes from the message and enriches with webhook + context when available. + + Args: + record: The log record to convert + + Returns: + Dict ready for JSON serialization + """ + message = record.getMessage() + message = _ANSI_ESCAPE_RE.sub("", message) + + exc_text: str | None = None + if record.exc_info and record.exc_info[0] is not None: + exc_text = "".join(traceback.format_exception(*record.exc_info)) + + entry: dict[str, object] = { + "type": "log_entry", + "timestamp": datetime.fromtimestamp(record.created, tz=UTC).isoformat(), + "level": record.levelname, + "logger_name": record.name, + "message": message, + } + + if exc_text: + entry["exc_info"] = exc_text + + # Enrich with webhook context when available + ctx = get_context() + if ctx is not None: + entry["hook_id"] = ctx.hook_id + entry["event_type"] = ctx.event_type + entry["repository"] = ctx.repository + entry["pr_number"] = ctx.pr_number + entry["api_user"] = ctx.api_user + + return entry + + def _append_to_file(self, log_line: str) -> None: + """Atomically append a JSON line to the log file. + + Uses fcntl file locking when available for safe concurrent access. + + Args: + log_line: JSON string to append (without trailing newline) + """ + log_file = self._get_log_file_path() + + with open(log_file, "a") as fd: + if HAS_FCNTL: + fcntl.flock(fd.fileno(), fcntl.LOCK_EX) + try: + fd.write(f"{log_line}\n") + fd.flush() + os.fsync(fd.fileno()) + finally: + if HAS_FCNTL: + fcntl.flock(fd.fileno(), fcntl.LOCK_UN) diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py index 9de73b22..5965fbbb 100644 --- a/webhook_server/utils/structured_logger.py +++ b/webhook_server/utils/structured_logger.py @@ -108,6 +108,7 @@ def write_log(self, context: WebhookContext) -> None: # Get context dict and update timing locally (without mutating context) context_dict = context.to_dict() + context_dict["type"] = "webhook_summary" if "timing" in context_dict: context_dict["timing"]["completed_at"] = completed_at.isoformat() if context.started_at: @@ -213,6 +214,7 @@ def write_error_log( else: # No context - create minimal error entry error_entry = { + "type": "webhook_summary", "hook_id": hook_id, "event_type": event_type, "action": None, @@ -230,6 +232,7 @@ def write_error_log( "token_spend": None, "initial_rate_limit": None, "final_rate_limit": None, + "level": "ERROR", "success": False, "error": { "type": "WebhookProcessingError", diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 37a566c9..00344743 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -1089,8 +1089,14 @@ def sort_key(f: Path) -> tuple[int, float]: # Use appropriate parser based on file type if log_file.suffix == ".json": # JSONL files: one compact JSON object per line + # Skip "log_entry" records - those same events are already + # in the *.log text files; only include "webhook_summary" + # (or entries without a type field, for backward compat). async for line in f: - entry = self.log_parser.parse_json_log_entry(line) + raw = self.log_parser.get_raw_json_entry(line) + if not raw or raw.get("type") == "log_entry": + continue + entry = self.log_parser._parse_json_webhook_summary(raw) if entry: buffer.append(entry) else: @@ -1213,22 +1219,24 @@ async def _stream_json_log_entries( try: # Stream JSONL entries incrementally without loading entire file remaining = max_entries - total_yielded - line_buffer: deque[str] = deque(maxlen=remaining) + entry_buffer: deque[dict[str, Any]] = deque(maxlen=remaining) async with aiofiles.open(log_file, encoding="utf-8") as f: # JSONL format: one JSON object per line + # Filter during buffering so log_entry records don't evict + # valid webhook_summary entries from the bounded deque. async for line in f: - line_buffer.append(line.rstrip("\n")) + data = self.log_parser.get_raw_json_entry(line.rstrip("\n")) + if data is not None and data.get("type") != "log_entry": + entry_buffer.append(data) - # Process lines in reverse order (newest first) - for line in reversed(line_buffer): + # Yield entries in reverse order (newest first) + for entry in reversed(entry_buffer): if total_yielded >= max_entries: break - data = self.log_parser.get_raw_json_entry(line) - if data: - yield data - total_yielded += 1 + yield entry + total_yielded += 1 except asyncio.CancelledError: self.logger.debug("Operation cancelled") raise # Always re-raise CancelledError From 5146135f0fc6f594e33c37e37600e6df69f7a51b Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Sun, 15 Mar 2026 15:44:59 +0200 Subject: [PATCH 2/7] fix: log viewer shows all levels and fixes realtime streaming - Stop filtering out log_entry records in _stream_log_entries() and _stream_json_log_entries() so DEBUG/WARNING/ERROR entries appear - Change monitor_log_directory() default pattern to webhooks_*.json so WebSocket streams from the JSONL file containing all log records - Add tail_json_log_file() for tailing JSONL files in real-time - Fix realtime button toggle in log_viewer.js --- webhook_server/libs/log_parser.py | 41 +++++++++++++++++++--- webhook_server/web/log_viewer.py | 13 ++----- webhook_server/web/static/js/log_viewer.js | 6 ++++ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index f9139648..acc875d9 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -571,13 +571,41 @@ async def tail_log_file(self, file_path: Path, follow: bool = True) -> AsyncGene # Not following, exit when no more data break - async def monitor_log_directory(self, log_dir: Path, pattern: str = "*.log") -> AsyncGenerator[LogEntry]: + async def tail_json_log_file(self, file_path: Path, follow: bool = True) -> AsyncGenerator[LogEntry]: + """Tail a JSON log file and yield new LogEntry objects as they are added. + + Args: + file_path: Path to the JSONL file to monitor + follow: Whether to continue monitoring for new entries + + Yields: + LogEntry objects for new JSON log lines + """ + if not file_path.exists(): + return + + with open(file_path, encoding="utf-8") as f: + # Move to end of file + f.seek(0, 2) + + while True: + line = f.readline() + if line: + entry = self.parse_json_log_entry(line) + if entry: + yield entry + elif follow: + await asyncio.sleep(0.1) + else: + break + + async def monitor_log_directory(self, log_dir: Path, pattern: str = "webhooks_*.json") -> AsyncGenerator[LogEntry]: """ Monitor a directory for log files and yield new entries from all files. Args: log_dir: Directory path containing log files - pattern: Glob pattern for log files (default: "*.log") + pattern: Glob pattern for log files (default: "webhooks_*.json") Yields: LogEntry objects from all monitored log files @@ -601,8 +629,13 @@ async def monitor_log_directory(self, log_dir: Path, pattern: str = "*.log") -> current_log_files.sort(key=lambda f: f.stat().st_mtime, reverse=True) most_recent_file = current_log_files[0] - async for entry in self.tail_log_file(most_recent_file, follow=True): - yield entry + # Use appropriate tailer based on file type + if most_recent_file.suffix == ".json": + async for entry in self.tail_json_log_file(most_recent_file, follow=True): + yield entry + else: + async for entry in self.tail_log_file(most_recent_file, follow=True): + yield entry class LogFilter: diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 00344743..d3b1173e 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -1089,14 +1089,9 @@ def sort_key(f: Path) -> tuple[int, float]: # Use appropriate parser based on file type if log_file.suffix == ".json": # JSONL files: one compact JSON object per line - # Skip "log_entry" records - those same events are already - # in the *.log text files; only include "webhook_summary" - # (or entries without a type field, for backward compat). + # Process both "log_entry" and "webhook_summary" entries async for line in f: - raw = self.log_parser.get_raw_json_entry(line) - if not raw or raw.get("type") == "log_entry": - continue - entry = self.log_parser._parse_json_webhook_summary(raw) + entry = self.log_parser.parse_json_log_entry(line) if entry: buffer.append(entry) else: @@ -1223,11 +1218,9 @@ async def _stream_json_log_entries( async with aiofiles.open(log_file, encoding="utf-8") as f: # JSONL format: one JSON object per line - # Filter during buffering so log_entry records don't evict - # valid webhook_summary entries from the bounded deque. async for line in f: data = self.log_parser.get_raw_json_entry(line.rstrip("\n")) - if data is not None and data.get("type") != "log_entry": + if data is not None: entry_buffer.append(data) # Yield entries in reverse order (newest first) diff --git a/webhook_server/web/static/js/log_viewer.js b/webhook_server/web/static/js/log_viewer.js index a04e484d..10199390 100644 --- a/webhook_server/web/static/js/log_viewer.js +++ b/webhook_server/web/static/js/log_viewer.js @@ -11,13 +11,19 @@ const CONFIG = { function updateConnectionStatus(connected) { const status = document.getElementById("connectionStatus"); const statusText = document.getElementById("statusText"); + const connectBtn = document.getElementById("connectBtn"); + const disconnectBtn = document.getElementById("disconnectBtn"); if (connected) { status.className = "status connected"; statusText.textContent = "Connected - Real-time updates active"; + if (connectBtn) connectBtn.style.display = "none"; + if (disconnectBtn) disconnectBtn.style.display = ""; } else { status.className = "status disconnected"; statusText.textContent = "Disconnected - Real-time updates inactive"; + if (connectBtn) connectBtn.style.display = ""; + if (disconnectBtn) disconnectBtn.style.display = "none"; } } From a92ad862192621886840aac49dbb4011c27342c2 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Sun, 15 Mar 2026 15:59:04 +0200 Subject: [PATCH 3/7] fix: address review findings from Qodo and CodeRabbit - Use config.data_dir/logs for JsonLogHandler directory (Qodo #1) - Add encoding="utf-8" to JSONL file writes (Qodo #2) - Thread-safe handler attachment with log_dir match check (CodeRabbit) - Use _detect_success() in _derive_level() for all failure indicators (CodeRabbit) - Filter log_entry type in get_workflow_steps_json() (CodeRabbit) - Remove unused mock_datetime fixture params (CodeRabbit) - Add regression tests for can_merge=False and success=False --- webhook_server/tests/test_context.py | 28 ++++++++++++++++++++++-- webhook_server/utils/context.py | 6 ++--- webhook_server/utils/helpers.py | 19 ++++++++++++---- webhook_server/utils/json_log_handler.py | 2 +- webhook_server/web/log_viewer.py | 2 ++ 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/webhook_server/tests/test_context.py b/webhook_server/tests/test_context.py index 7e8e584c..3dae473a 100644 --- a/webhook_server/tests/test_context.py +++ b/webhook_server/tests/test_context.py @@ -400,7 +400,7 @@ def test_to_dict_with_error(self, mock_datetime): assert result["error"]["type"] == "ValueError" assert result["error"]["message"] == "Something went wrong" - def test_derive_level_warning_on_failed_step(self, mock_datetime): + def test_derive_level_warning_on_failed_step(self): """Test _derive_level returns WARNING when success=True but a step has status=failed.""" ctx = WebhookContext( hook_id="hook-warning-1", @@ -413,7 +413,7 @@ def test_derive_level_warning_on_failed_step(self, mock_datetime): assert ctx.success is True assert ctx._derive_level() == "WARNING" - def test_derive_level_warning_on_failed_indicator(self, mock_datetime): + def test_derive_level_warning_on_failed_indicator(self): """Test _derive_level returns WARNING when a completed step has a _failed=True indicator.""" ctx = WebhookContext( hook_id="hook-warning-2", @@ -426,6 +426,30 @@ def test_derive_level_warning_on_failed_indicator(self, mock_datetime): assert ctx.success is True assert ctx._derive_level() == "WARNING" + def test_derive_level_warning_on_can_merge_false(self): + """Test _derive_level returns WARNING when a step has can_merge=False.""" + ctx = WebhookContext( + hook_id="hook-warning-3", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + ctx.workflow_steps["merge_check"] = {"status": "completed", "can_merge": False, "reason": "Missing approver"} + assert ctx.success is True + assert ctx._derive_level() == "WARNING" + + def test_derive_level_warning_on_step_success_false(self): + """Test _derive_level returns WARNING when a step has success=False.""" + ctx = WebhookContext( + hook_id="hook-warning-4", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + ctx.workflow_steps["validation"] = {"status": "completed", "success": False} + assert ctx.success is True + assert ctx._derive_level() == "WARNING" + class TestContextManagement: """Tests for module-level context management functions.""" diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py index 18441a02..15194179 100644 --- a/webhook_server/utils/context.py +++ b/webhook_server/utils/context.py @@ -345,10 +345,8 @@ def _derive_level(self) -> str: if step_data.get("status") == "failed": return "WARNING" - # Steps completed with error indicators (e.g., compare_api_failed=True) - if step_data.get("status") == "completed" and any( - key.endswith("_failed") and value is True for key, value in step_data.items() if isinstance(value, bool) - ): + # Steps completed with non-fatal failure indicators + if step_data.get("status") == "completed" and not self._detect_success(step_data): return "WARNING" return "INFO" diff --git a/webhook_server/utils/helpers.py b/webhook_server/utils/helpers.py index 96c11b3e..4c45e0bf 100644 --- a/webhook_server/utils/helpers.py +++ b/webhook_server/utils/helpers.py @@ -11,9 +11,11 @@ import shlex import shutil import subprocess +import threading from collections.abc import AsyncGenerator from concurrent.futures import Future, as_completed from logging import Logger +from pathlib import Path from typing import Any from uuid import uuid4 @@ -35,6 +37,8 @@ # when backup log files are missing during rollover simple_logger.logger.RotatingFileHandler = SafeRotatingFileHandler +_JSON_HANDLER_LOCK = threading.Lock() + def get_logger_with_params( repository_name: str = "", @@ -102,11 +106,18 @@ def get_logger_with_params( # Attach JsonLogHandler for writing log records to the webhook JSONL file. # Only attach when a log file path is configured (skip console-only loggers) # and only once per logger instance to avoid duplicate handlers. + # Uses _config.data_dir/logs (same directory as StructuredLogWriter) instead + # of deriving from the text log file path, which may differ for absolute paths. if log_file_path_resolved: - log_dir = os.path.dirname(log_file_path_resolved) - if not any(isinstance(h, JsonLogHandler) for h in logger.handlers): - json_handler = JsonLogHandler(log_dir=log_dir, level=getattr(logging, log_level.upper(), logging.DEBUG)) - logger.addHandler(json_handler) + log_dir = os.path.join(_config.data_dir, "logs") + with _JSON_HANDLER_LOCK: + if not any(isinstance(h, JsonLogHandler) and h.log_dir == Path(log_dir) for h in logger.handlers): + logger.addHandler( + JsonLogHandler( + log_dir=log_dir, + level=getattr(logging, log_level.upper(), logging.DEBUG), + ) + ) return logger diff --git a/webhook_server/utils/json_log_handler.py b/webhook_server/utils/json_log_handler.py index fc34772a..505c9d4d 100644 --- a/webhook_server/utils/json_log_handler.py +++ b/webhook_server/utils/json_log_handler.py @@ -136,7 +136,7 @@ def _append_to_file(self, log_line: str) -> None: """ log_file = self._get_log_file_path() - with open(log_file, "a") as fd: + with open(log_file, "a", encoding="utf-8") as fd: if HAS_FCNTL: fcntl.flock(fd.fileno(), fcntl.LOCK_EX) try: diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index d3b1173e..bac10c0b 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -696,6 +696,8 @@ async def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: """ # Search JSON logs for this hook_id async for entry in self._stream_json_log_entries(max_files=25, max_entries=50000): + if entry.get("type", "webhook_summary") != "webhook_summary": + continue if entry.get("hook_id") == hook_id: # Found the entry - transform to frontend-expected format try: From 6d392a6c2d1bfb532a0f6848be6401818ecb9730 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Sun, 15 Mar 2026 16:28:58 +0200 Subject: [PATCH 4/7] fix: add COMPLETED level to UI filter and force COMPLETED for all summaries --- webhook_server/libs/log_parser.py | 2 +- webhook_server/tests/test_context.py | 59 ++--- webhook_server/tests/test_log_parser.py | 235 +++++++++++++++++-- webhook_server/utils/context.py | 20 +- webhook_server/utils/structured_logger.py | 2 +- webhook_server/web/static/css/log_viewer.css | 2 + webhook_server/web/static/js/log_viewer.js | 1 + webhook_server/web/templates/log_viewer.html | 1 + 8 files changed, 245 insertions(+), 77 deletions(-) diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index acc875d9..0182770e 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -429,7 +429,7 @@ def _parse_json_webhook_summary(self, data: dict[str, Any]) -> LogEntry | None: return LogEntry( timestamp=timestamp, - level=data.get("level", "INFO"), + level="COMPLETED", logger_name="GithubWebhook", message=message, hook_id=data.get("hook_id"), diff --git a/webhook_server/tests/test_context.py b/webhook_server/tests/test_context.py index 3dae473a..1e0dcf40 100644 --- a/webhook_server/tests/test_context.py +++ b/webhook_server/tests/test_context.py @@ -326,7 +326,7 @@ def test_to_dict_returns_correct_structure(self, mock_datetime): assert result["final_rate_limit"] == 4985 # Level - assert result["level"] == "INFO" + assert result["level"] == "COMPLETED" # Status assert result["success"] is True @@ -394,61 +394,34 @@ def test_to_dict_with_error(self, mock_datetime): result = ctx.to_dict() - assert result["level"] == "ERROR" + assert result["level"] == "COMPLETED" assert result["success"] is False assert result["error"] is not None assert result["error"]["type"] == "ValueError" assert result["error"]["message"] == "Something went wrong" - def test_derive_level_warning_on_failed_step(self): - """Test _derive_level returns WARNING when success=True but a step has status=failed.""" + def test_derive_level_always_returns_completed(self): + """Test _derive_level always returns COMPLETED regardless of step or context state.""" ctx = WebhookContext( - hook_id="hook-warning-1", + hook_id="hook-level-1", event_type="pull_request", repository="owner/repo", repository_full_name="owner/repo", ) - # Context is successful overall, but a step explicitly failed + # Default state (success=True, no steps) + assert ctx._derive_level() == "COMPLETED" + + # With a failed step ctx.workflow_steps["some_step"] = {"status": "failed"} - assert ctx.success is True - assert ctx._derive_level() == "WARNING" + assert ctx._derive_level() == "COMPLETED" - def test_derive_level_warning_on_failed_indicator(self): - """Test _derive_level returns WARNING when a completed step has a _failed=True indicator.""" - ctx = WebhookContext( - hook_id="hook-warning-2", - event_type="pull_request", - repository="owner/repo", - repository_full_name="owner/repo", - ) - # Context is successful overall, but a step has a failure indicator - ctx.workflow_steps["compare_step"] = {"status": "completed", "compare_api_failed": True} - assert ctx.success is True - assert ctx._derive_level() == "WARNING" + # With success=False + ctx.success = False + assert ctx._derive_level() == "COMPLETED" - def test_derive_level_warning_on_can_merge_false(self): - """Test _derive_level returns WARNING when a step has can_merge=False.""" - ctx = WebhookContext( - hook_id="hook-warning-3", - event_type="pull_request", - repository="owner/repo", - repository_full_name="owner/repo", - ) - ctx.workflow_steps["merge_check"] = {"status": "completed", "can_merge": False, "reason": "Missing approver"} - assert ctx.success is True - assert ctx._derive_level() == "WARNING" - - def test_derive_level_warning_on_step_success_false(self): - """Test _derive_level returns WARNING when a step has success=False.""" - ctx = WebhookContext( - hook_id="hook-warning-4", - event_type="pull_request", - repository="owner/repo", - repository_full_name="owner/repo", - ) - ctx.workflow_steps["validation"] = {"status": "completed", "success": False} - assert ctx.success is True - assert ctx._derive_level() == "WARNING" + # With can_merge=False step + ctx.workflow_steps["merge_check"] = {"status": "completed", "can_merge": False} + assert ctx._derive_level() == "COMPLETED" class TestContextManagement: diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index a8acde44..589d6df2 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -782,7 +782,7 @@ def test_parse_json_log_entry_valid_json(self) -> None: "action": "opened", "repository": "org/test-repo", "api_user": "test-user", - "level": "INFO", + "level": "COMPLETED", "success": true, "token_spend": 35, "timing": { @@ -798,7 +798,7 @@ def test_parse_json_log_entry_valid_json(self) -> None: assert entry is not None assert entry.timestamp == datetime.datetime(2025, 7, 31, 10, 30, 0, 123000, tzinfo=datetime.UTC) - assert entry.level == "INFO" + assert entry.level == "COMPLETED" assert entry.logger_name == "GithubWebhook" assert entry.hook_id == "abc123-def456" assert entry.event_type == "pull_request" @@ -821,7 +821,7 @@ def test_parse_json_log_entry_failed_webhook(self) -> None: "action": "created", "repository": "org/repo", "api_user": "user1", - "level": "ERROR", + "level": "COMPLETED", "success": false, "token_spend": 10, "timing": { @@ -838,7 +838,7 @@ def test_parse_json_log_entry_failed_webhook(self) -> None: entry = parser.parse_json_log_entry(json_line) assert entry is not None - assert entry.level == "ERROR" + assert entry.level == "COMPLETED" assert entry.hook_id == "failed-hook" assert entry.task_status == "failed" assert entry.pr_number == 456 @@ -853,7 +853,7 @@ def test_parse_json_log_entry_without_pr(self) -> None: "event_type": "push", "repository": "org/repo", "api_user": "user2", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": { "started_at": "2025-07-31T12:00:00Z" @@ -923,7 +923,7 @@ def test_parse_json_log_entry_timezone_handling(self) -> None: "hook_id": "z-time", "event_type": "push", "repository": "org/repo", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -936,7 +936,7 @@ def test_parse_json_log_entry_timezone_handling(self) -> None: "hook_id": "plus-time", "event_type": "push", "repository": "org/repo", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00+00:00"} }""" @@ -953,7 +953,7 @@ def test_parse_json_log_entry_extracts_all_fields(self) -> None: "action": "synchronize", "repository": "owner/repo-name", "api_user": "github-user", - "level": "INFO", + "level": "COMPLETED", "success": true, "token_spend": 42, "timing": { @@ -971,7 +971,7 @@ def test_parse_json_log_entry_extracts_all_fields(self) -> None: assert entry is not None assert entry.timestamp == datetime.datetime(2025, 8, 1, 14, 30, 45, 678000, tzinfo=datetime.UTC) - assert entry.level == "INFO" + assert entry.level == "COMPLETED" assert entry.logger_name == "GithubWebhook" assert entry.message is not None assert entry.hook_id == "complete-hook" @@ -990,12 +990,13 @@ def test_parse_json_log_file_multiple_entries(self, tmp_path: Path) -> None: # Each JSON object must be on a single line (JSON lines format) json_content = ( '{"hook_id": "hook1", "event_type": "push", "repository": "org/repo1", ' - '"api_user": "user1", "level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + '"api_user": "user1", "level": "COMPLETED", "success": true, ' + '"timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' '{"hook_id": "hook2", "event_type": "pull_request", "action": "opened", ' - '"repository": "org/repo2", "api_user": "user2", "level": "INFO", "success": true, ' + '"repository": "org/repo2", "api_user": "user2", "level": "COMPLETED", "success": true, ' '"timing": {"started_at": "2025-07-31T10:01:00Z"}, "pr": {"number": 123}}\n' '{"hook_id": "hook3", "event_type": "issue_comment", "repository": "org/repo3", ' - '"api_user": "user3", "level": "ERROR", "success": false, ' + '"api_user": "user3", "level": "COMPLETED", "success": false, ' '"timing": {"started_at": "2025-07-31T10:02:00Z"}}\n' ) log_file = tmp_path / "webhooks_test.json" @@ -1029,11 +1030,11 @@ def test_parse_json_log_file_skips_invalid_lines(self, tmp_path: Path) -> None: # Each JSON object must be on a single line (JSON lines format) json_content = ( '{"hook_id": "valid1", "event_type": "push", "repository": "org/repo", ' - '"level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + '"level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' "this is not valid json\n" "{incomplete json\n" '{"hook_id": "valid2", "event_type": "pull_request", "repository": "org/repo", ' - '"level": "INFO", "success": true, "timing": {"started_at": "2025-07-31T10:01:00Z"}}\n' + '"level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:01:00Z"}}\n' '{"missing_timestamp": true}\n' ) log_file = tmp_path / "mixed.json" @@ -1142,7 +1143,7 @@ def test_json_summary_message_format_with_action(self) -> None: "event_type": "pull_request", "action": "synchronize", "repository": "org/repo", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -1159,7 +1160,7 @@ def test_json_summary_message_format_without_action(self) -> None: "hook_id": "hook1", "event_type": "push", "repository": "org/repo", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"} }""" @@ -1177,7 +1178,7 @@ def test_parse_json_log_entry_null_pr_field(self) -> None: "hook_id": "null-pr", "event_type": "push", "repository": "org/repo", - "level": "INFO", + "level": "COMPLETED", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}, "pr": null @@ -1289,7 +1290,7 @@ def test_parse_json_log_entry_mixed_types(self, tmp_path: Path) -> None: "duration_ms": 5000, }, "pr": {"number": 10, "title": "Test PR", "author": "dev"}, - "level": "INFO", + "level": "COMPLETED", }) log_file = tmp_path / "webhooks_2026-03-15.json" @@ -1494,3 +1495,201 @@ def _append_log() -> None: # Should have collected exactly 1 entry from the new content (not from rotated files) assert len(entries) == 1 assert entries[0].message == "New entry after monitoring started" + + def test_parse_json_log_line_empty_timestamp(self) -> None: + """Test _parse_json_log_line returns None when timestamp is empty.""" + parser = LogParser() + json_line = json.dumps({ + "type": "log_entry", + "timestamp": "", + "level": "INFO", + "logger_name": "GithubWebhook", + "message": "test", + }) + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + def test_parse_json_log_line_naive_timestamp(self) -> None: + """Test _parse_json_log_line adds UTC to naive timestamp.""" + parser = LogParser() + json_line = json.dumps({ + "type": "log_entry", + "timestamp": "2025-07-31T10:00:00", + "level": "INFO", + "logger_name": "GithubWebhook", + "message": "naive tz test", + }) + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.timestamp.tzinfo == datetime.UTC + assert entry.message == "naive tz test" + + def test_parse_json_log_line_invalid_timestamp(self) -> None: + """Test _parse_json_log_line returns None for invalid timestamp.""" + parser = LogParser() + json_line = json.dumps({ + "type": "log_entry", + "timestamp": "not-a-date", + "level": "INFO", + "logger_name": "GithubWebhook", + "message": "bad ts", + }) + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + def test_parse_json_webhook_summary_naive_timestamp(self) -> None: + """Test _parse_json_webhook_summary adds UTC to naive timestamp.""" + parser = LogParser() + json_line = json.dumps({ + "hook_id": "naive-tz", + "event_type": "push", + "repository": "org/repo", + "success": True, + "timing": {"started_at": "2025-07-31T10:00:00"}, + }) + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.timestamp.tzinfo == datetime.UTC + + def test_parse_json_webhook_summary_success_none(self) -> None: + """Test _parse_json_webhook_summary sets task_status to None when success is None.""" + parser = LogParser() + json_line = json.dumps({ + "hook_id": "null-success", + "event_type": "push", + "repository": "org/repo", + "timing": {"started_at": "2025-07-31T10:00:00Z"}, + }) + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.task_status is None + + def test_parse_json_log_file_pretty_printed_format(self, tmp_path: Path) -> None: + """Test parsing JSON log file with pretty-printed entries separated by blank lines.""" + parser = LogParser() + entry1 = json.dumps( + { + "hook_id": "pretty1", + "event_type": "push", + "repository": "org/repo", + "level": "COMPLETED", + "success": True, + "timing": {"started_at": "2025-07-31T10:00:00Z"}, + }, + indent=2, + ) + entry2 = json.dumps( + { + "hook_id": "pretty2", + "event_type": "pull_request", + "action": "opened", + "repository": "org/repo", + "level": "COMPLETED", + "success": False, + "timing": {"started_at": "2025-07-31T10:01:00Z"}, + }, + indent=2, + ) + # Separate entries with blank lines (pretty-printed format) + log_file = tmp_path / "webhooks_pretty.json" + log_file.write_text(f"{entry1}\n\n{entry2}") + + entries = parser.parse_json_log_file(log_file) + + assert len(entries) == 2 + assert entries[0].hook_id == "pretty1" + assert entries[1].hook_id == "pretty2" + + @pytest.mark.asyncio + async def test_tail_log_file_nonexistent(self, tmp_path: Path) -> None: + """Test tail_log_file returns immediately for non-existent file.""" + parser = LogParser() + nonexistent = tmp_path / "does_not_exist.log" + + entries = [entry async for entry in parser.tail_log_file(nonexistent, follow=False)] + + assert entries == [] + + @pytest.mark.asyncio + async def test_tail_json_log_file_nonexistent(self, tmp_path: Path) -> None: + """Test tail_json_log_file returns immediately for non-existent file.""" + parser = LogParser() + nonexistent = tmp_path / "does_not_exist.json" + + entries = [entry async for entry in parser.tail_json_log_file(nonexistent, follow=False)] + + assert entries == [] + + @pytest.mark.asyncio + async def test_tail_json_log_file_follow_false_breaks(self, tmp_path: Path) -> None: + """Test tail_json_log_file with follow=False breaks when no new data.""" + parser = LogParser() + log_file = tmp_path / "webhooks_tail.json" + # Create file with some existing content (tail seeks past it) + log_file.write_text('{"hook_id":"old"}\n') + + # With follow=False, after seeking to end, readline returns "" and it breaks + entries = [entry async for entry in parser.tail_json_log_file(log_file, follow=False)] + + assert entries == [] + + @pytest.mark.asyncio + async def test_monitor_log_directory_json_file(self, tmp_path: Path) -> None: + """Test monitor_log_directory selects .json file and uses tail_json_log_file.""" + parser = LogParser() + + # Create a .json log file + json_log = tmp_path / "webhooks_2025-07-31.json" + json_log.write_text("") + + entries: list[LogEntry] = [] + + async def collect_entries(async_gen: AsyncIterator[LogEntry], max_entries: int = 1) -> None: + count = 0 + async for entry in async_gen: + entries.append(entry) + count += 1 + if count >= max_entries: + break + + # Start monitoring + monitor_task = asyncio.create_task( + collect_entries(parser.monitor_log_directory(tmp_path, pattern="webhooks_*.json"), max_entries=1) + ) + + await asyncio.sleep(0.1) + + # Append a valid JSON entry + json_entry = json.dumps({ + "hook_id": "monitor-json", + "event_type": "push", + "repository": "org/repo", + "level": "COMPLETED", + "success": True, + "timing": {"started_at": "2025-07-31T10:00:00Z"}, + }) + + def _append() -> None: + with open(json_log, "a") as f: + f.write(f"{json_entry}\n") + f.flush() + + await asyncio.to_thread(_append) + + try: + await asyncio.wait_for(monitor_task, timeout=2.0) + except TimeoutError: + monitor_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await monitor_task + + assert len(entries) == 1 + assert entries[0].hook_id == "monitor-json" diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py index 15194179..77f0849a 100644 --- a/webhook_server/utils/context.py +++ b/webhook_server/utils/context.py @@ -334,22 +334,14 @@ def _build_summary(self) -> str | None: def _derive_level(self) -> str: """Derive log level from execution context. + Webhook summaries always return COMPLETED since the webhook + finished processing. Success/failure is tracked separately + in the success field and error field. + Returns: - Log level string: ERROR, WARNING, or INFO + "COMPLETED" for webhook summary entries """ - if not self.success: - return "ERROR" - - for step_data in self.workflow_steps.values(): - # Explicit failure status - if step_data.get("status") == "failed": - return "WARNING" - - # Steps completed with non-fatal failure indicators - if step_data.get("status") == "completed" and not self._detect_success(step_data): - return "WARNING" - - return "INFO" + return "COMPLETED" def to_dict(self) -> dict[str, Any]: """Convert context to dictionary for JSON serialization. diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py index 5965fbbb..ab1029b0 100644 --- a/webhook_server/utils/structured_logger.py +++ b/webhook_server/utils/structured_logger.py @@ -232,7 +232,7 @@ def write_error_log( "token_spend": None, "initial_rate_limit": None, "final_rate_limit": None, - "level": "ERROR", + "level": "COMPLETED", "success": False, "error": { "type": "WebhookProcessingError", diff --git a/webhook_server/web/static/css/log_viewer.css b/webhook_server/web/static/css/log_viewer.css index ad337980..12f1790f 100644 --- a/webhook_server/web/static/css/log_viewer.css +++ b/webhook_server/web/static/css/log_viewer.css @@ -570,6 +570,7 @@ body { overflow-wrap: anywhere; } +.log-entry.COMPLETED .level { background-color: var(--log-success-bg); color: #155724; } .log-entry.INFO .level { background-color: var(--level-info-bg); color: var(--level-info-border); } .log-entry.ERROR .level { background-color: var(--log-error-bg); color: #dc3545; } .log-entry.WARNING .level { background-color: var(--log-warning-bg); color: #856404; } @@ -577,6 +578,7 @@ body { .log-entry.STEP .level { background-color: var(--log-step-bg); color: #0056b3; } .log-entry.SUCCESS .level { background-color: var(--log-success-bg); color: #155724; } +.log-entry.COMPLETED { background-color: rgba(25, 135, 84, 0.05); } .log-entry.INFO { background-color: transparent; } .log-entry.ERROR { background-color: rgba(220, 53, 69, 0.05); } .log-entry.WARNING { background-color: rgba(255, 193, 7, 0.05); } diff --git a/webhook_server/web/static/js/log_viewer.js b/webhook_server/web/static/js/log_viewer.js index 10199390..530590ce 100644 --- a/webhook_server/web/static/js/log_viewer.js +++ b/webhook_server/web/static/js/log_viewer.js @@ -180,6 +180,7 @@ function createLogEntryElement(entry) { // Whitelist of allowed log levels to prevent class-name injection const allowedLevels = [ + "COMPLETED", "DEBUG", "INFO", "WARNING", diff --git a/webhook_server/web/templates/log_viewer.html b/webhook_server/web/templates/log_viewer.html index 50b0eff5..e48c692f 100644 --- a/webhook_server/web/templates/log_viewer.html +++ b/webhook_server/web/templates/log_viewer.html @@ -68,6 +68,7 @@

Filters & Controls