diff --git a/webhook_server/tests/test_logging_separation.py b/webhook_server/tests/test_logging_separation.py index 6169f04f..302037a9 100644 --- a/webhook_server/tests/test_logging_separation.py +++ b/webhook_server/tests/test_logging_separation.py @@ -1,3 +1,4 @@ +import datetime from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -5,7 +6,10 @@ import webhook_server.app from webhook_server.app import get_log_viewer_controller, lifespan from webhook_server.libs.config import Config -from webhook_server.utils.helpers import get_log_file_path +from webhook_server.libs.log_parser import LogEntry +from webhook_server.utils.helpers import get_log_file_path, get_logger_with_params +from webhook_server.utils.json_log_handler import JsonLogHandler +from webhook_server.web.log_viewer import LogViewerController def test_get_log_file_path_absolute(): @@ -18,8 +22,8 @@ def test_get_log_file_path_absolute(): def test_get_log_file_path_relative(): config = MagicMock(spec=Config) config.data_dir = "/tmp/data" - # Mock os.makedirs to avoid filesystem side effects - with patch("os.makedirs") as mock_makedirs: + # Mock os.path.isdir to force makedirs path, and os.makedirs to avoid filesystem side effects + with patch("os.path.isdir", return_value=False), patch("os.makedirs") as mock_makedirs: path = get_log_file_path(config, "server.log") assert path == "/tmp/data/logs/server.log" mock_makedirs.assert_called_once_with("/tmp/data/logs", exist_ok=True) @@ -114,3 +118,102 @@ def test_log_viewer_controller_logging_separation(): # Verify mock_get_logger_params.assert_called_with(log_file_name="logs_server.log") MockController.assert_called_with(logger=mock_logger) + + +class TestJsonLogHandlerNotAttachedForInfrastructureLoggers: + """Verify that JsonLogHandler is NOT attached when log_file_name is explicit. + + Infrastructure loggers (MCP server, log viewer) use explicit log_file_name + parameters. They must not write to webhooks_*.json to avoid polluting + webhook log queries with noise entries. + """ + + def test_no_json_handler_when_log_file_name_explicit(self) -> None: + """get_logger_with_params with explicit log_file_name must skip JsonLogHandler.""" + with ( + patch("webhook_server.utils.helpers.Config") as MockConfig, + patch("webhook_server.utils.helpers.get_logger") as mock_get_logger, + ): + mock_config_instance = MockConfig.return_value + mock_config_instance.data_dir = "/tmp/data" + mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: return_on_none + + mock_logger = MagicMock() + mock_logger.handlers = [] + mock_get_logger.return_value = mock_logger + + get_logger_with_params(log_file_name="mcp_server.log") + + # JsonLogHandler must NOT be added + mock_logger.addHandler.assert_not_called() + + def test_json_handler_attached_when_log_file_name_default(self) -> None: + """get_logger_with_params without log_file_name must attach JsonLogHandler.""" + with ( + patch("webhook_server.utils.helpers.Config") as MockConfig, + patch("webhook_server.utils.helpers.get_logger") as mock_get_logger, + patch("os.path.isdir", return_value=False), + patch("os.makedirs"), + ): + mock_config_instance = MockConfig.return_value + mock_config_instance.data_dir = "/tmp/data" + # Return a log file config so a file handler exists + mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: ( + "webhook_server.log" if value == "log-file" else return_on_none + ) + + mock_logger = MagicMock() + mock_logger.handlers = [] # No existing handlers + mock_get_logger.return_value = mock_logger + + get_logger_with_params() # No explicit log_file_name + + # JsonLogHandler MUST be added + mock_logger.addHandler.assert_called_once() + + added_handler = mock_logger.addHandler.call_args[0][0] + assert isinstance(added_handler, JsonLogHandler) + + +class TestIsInfrastructureNoise: + """Verify _is_infrastructure_noise correctly identifies noise entries.""" + + @staticmethod + def _make_entry(logger_name: str, hook_id: str | None = None) -> LogEntry: + return LogEntry( + timestamp=datetime.datetime.now(tz=datetime.UTC), + level="INFO", + logger_name=logger_name, + message="test message", + hook_id=hook_id, + ) + + def test_mcp_logger_without_hook_id_is_noise(self) -> None: + entry = self._make_entry("mcp.server.streamable_http", hook_id=None) + assert LogViewerController._is_infrastructure_noise(entry) is True + + def test_logs_server_logger_without_hook_id_is_noise(self) -> None: + entry = self._make_entry("logs_server.log", hook_id=None) + assert LogViewerController._is_infrastructure_noise(entry) is True + + def test_mcp_server_log_without_hook_id_is_noise(self) -> None: + entry = self._make_entry("mcp_server.log", hook_id=None) + assert LogViewerController._is_infrastructure_noise(entry) is True + + def test_log_parser_without_hook_id_is_noise(self) -> None: + entry = self._make_entry("log_parser", hook_id=None) + assert LogViewerController._is_infrastructure_noise(entry) is True + + def test_infra_logger_with_hook_id_is_not_noise(self) -> None: + """Infrastructure logger entry WITH hook_id should be preserved.""" + entry = self._make_entry("mcp.server.streamable_http", hook_id="abc-123") + assert LogViewerController._is_infrastructure_noise(entry) is False + + def test_webhook_logger_without_hook_id_is_not_noise(self) -> None: + """Non-infrastructure logger without hook_id is kept (could be startup log).""" + entry = self._make_entry("GithubWebhook", hook_id=None) + assert LogViewerController._is_infrastructure_noise(entry) is False + + def test_webhook_logger_with_hook_id_is_not_noise(self) -> None: + entry = self._make_entry("GithubWebhook", hook_id="abc-123") + assert LogViewerController._is_infrastructure_noise(entry) is False diff --git a/webhook_server/utils/helpers.py b/webhook_server/utils/helpers.py index 4c45e0bf..92ee0107 100644 --- a/webhook_server/utils/helpers.py +++ b/webhook_server/utils/helpers.py @@ -104,11 +104,16 @@ def get_logger_with_params( ) # Attach JsonLogHandler for writing log records to the webhook JSONL file. - # Only attach when a log file path is configured (skip console-only loggers) - # and only once per logger instance to avoid duplicate handlers. + # Only attach when: + # - A log file path is configured (skip console-only loggers) + # - The logger is for the main webhook log (log_file_name not explicitly set) + # Infrastructure loggers (mcp_server.log, logs_server.log) must NOT write + # to webhooks_*.json because their entries lack webhook context (hook_id, + # event_type, etc.) and pollute the webhook log with noise entries. + # - Only once per logger instance to avoid duplicate handlers. # Uses _config.data_dir/logs (same directory as StructuredLogWriter) instead # of deriving from the text log file path, which may differ for absolute paths. - if log_file_path_resolved: + if log_file_path_resolved and not log_file_name: log_dir = os.path.join(_config.data_dir, "logs") with _JSON_HANDLER_LOCK: if not any(isinstance(h, JsonLogHandler) and h.log_dir == Path(log_dir) for h in logger.handlers): diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index bac10c0b..daed2196 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -29,6 +29,17 @@ class LogViewerController: # 60 seconds provides a reasonable maximum window for log correlation _DEFAULT_STEP_DURATION_MS = 60000 + # Logger names from infrastructure components (MCP server, log viewer itself) that + # should be excluded from webhook log queries. These loggers produce high-frequency + # noise entries without webhook context (hook_id, event_type are None) and would + # drown out actual webhook processing entries in unfiltered queries. + _INFRASTRUCTURE_LOGGERS: frozenset[str] = frozenset({ + "mcp.server.streamable_http", + "logs_server.log", + "log_parser", + "mcp_server.log", + }) + # Workflow stage patterns for PR flow analysis # These patterns match log messages to identify workflow stages and can be updated # when log message formats change without modifying the analysis logic @@ -288,6 +299,26 @@ def _entry_matches_filters( return True + @staticmethod + def _is_infrastructure_noise(entry: LogEntry) -> bool: + """Check if a log entry is infrastructure noise that should be excluded. + + Infrastructure loggers (MCP server, log viewer) produce high-frequency + entries without webhook context. These are filtered out to prevent them + from drowning actual webhook processing entries in unfiltered queries. + + Only excludes entries that have NO webhook context (hook_id is None), + preserving any infrastructure log that happens to correlate with a webhook. + + Args: + entry: LogEntry to check + + Returns: + True if the entry is infrastructure noise and should be excluded + + """ + return entry.logger_name in LogViewerController._INFRASTRUCTURE_LOGGERS and entry.hook_id is None + async def export_logs( self, format_type: str, @@ -1092,15 +1123,17 @@ def sort_key(f: Path) -> tuple[int, float]: if log_file.suffix == ".json": # JSONL files: one compact JSON object per line # Process both "log_entry" and "webhook_summary" entries + # Skip infrastructure logger entries that lack webhook context async for line in f: entry = self.log_parser.parse_json_log_entry(line) - if entry: + if entry and not LogViewerController._is_infrastructure_noise(entry): buffer.append(entry) else: # Text log files: parse line by line + # Skip infrastructure logger entries that lack webhook context async for line in f: entry = self.log_parser.parse_log_entry(line) - if entry: + if entry and not LogViewerController._is_infrastructure_noise(entry): buffer.append(entry) for entry in reversed(buffer): @@ -1162,9 +1195,10 @@ async def _stream_text_log_entries(self, max_files: int = 25, max_entries: int = buffer: deque[LogEntry] = deque(maxlen=remaining_capacity) async with aiofiles.open(log_file, encoding="utf-8") as f: + # Skip infrastructure logger entries that lack webhook context async for line in f: entry = self.log_parser.parse_log_entry(line) - if entry: + if entry and not LogViewerController._is_infrastructure_noise(entry): buffer.append(entry) for entry in reversed(buffer):