Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 106 additions & 3 deletions webhook_server/tests/test_logging_separation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import datetime
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

import webhook_server.app
from webhook_server.app import get_log_viewer_controller, lifespan
from webhook_server.libs.config import Config
from webhook_server.utils.helpers import get_log_file_path
from webhook_server.libs.log_parser import LogEntry
from webhook_server.utils.helpers import get_log_file_path, get_logger_with_params
from webhook_server.utils.json_log_handler import JsonLogHandler
from webhook_server.web.log_viewer import LogViewerController


def test_get_log_file_path_absolute():
Expand All @@ -18,8 +22,8 @@ def test_get_log_file_path_absolute():
def test_get_log_file_path_relative():
config = MagicMock(spec=Config)
config.data_dir = "/tmp/data"
# Mock os.makedirs to avoid filesystem side effects
with patch("os.makedirs") as mock_makedirs:
# Mock os.path.isdir to force makedirs path, and os.makedirs to avoid filesystem side effects
with patch("os.path.isdir", return_value=False), patch("os.makedirs") as mock_makedirs:
path = get_log_file_path(config, "server.log")
assert path == "/tmp/data/logs/server.log"
mock_makedirs.assert_called_once_with("/tmp/data/logs", exist_ok=True)
Expand Down Expand Up @@ -114,3 +118,102 @@ def test_log_viewer_controller_logging_separation():
# Verify
mock_get_logger_params.assert_called_with(log_file_name="logs_server.log")
MockController.assert_called_with(logger=mock_logger)


class TestJsonLogHandlerNotAttachedForInfrastructureLoggers:
"""Verify that JsonLogHandler is NOT attached when log_file_name is explicit.

Infrastructure loggers (MCP server, log viewer) use explicit log_file_name
parameters. They must not write to webhooks_*.json to avoid polluting
webhook log queries with noise entries.
"""

def test_no_json_handler_when_log_file_name_explicit(self) -> None:
"""get_logger_with_params with explicit log_file_name must skip JsonLogHandler."""
with (
patch("webhook_server.utils.helpers.Config") as MockConfig,
patch("webhook_server.utils.helpers.get_logger") as mock_get_logger,
):
mock_config_instance = MockConfig.return_value
mock_config_instance.data_dir = "/tmp/data"
mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: return_on_none

mock_logger = MagicMock()
mock_logger.handlers = []
mock_get_logger.return_value = mock_logger

get_logger_with_params(log_file_name="mcp_server.log")

# JsonLogHandler must NOT be added
mock_logger.addHandler.assert_not_called()

def test_json_handler_attached_when_log_file_name_default(self) -> None:
"""get_logger_with_params without log_file_name must attach JsonLogHandler."""
with (
patch("webhook_server.utils.helpers.Config") as MockConfig,
patch("webhook_server.utils.helpers.get_logger") as mock_get_logger,
patch("os.path.isdir", return_value=False),
patch("os.makedirs"),
):
mock_config_instance = MockConfig.return_value
mock_config_instance.data_dir = "/tmp/data"
# Return a log file config so a file handler exists
mock_config_instance.get_value.side_effect = lambda value, return_on_none=None: (
"webhook_server.log" if value == "log-file" else return_on_none
)

mock_logger = MagicMock()
mock_logger.handlers = [] # No existing handlers
mock_get_logger.return_value = mock_logger

get_logger_with_params() # No explicit log_file_name

# JsonLogHandler MUST be added
mock_logger.addHandler.assert_called_once()

added_handler = mock_logger.addHandler.call_args[0][0]
assert isinstance(added_handler, JsonLogHandler)
Comment thread
coderabbitai[bot] marked this conversation as resolved.


class TestIsInfrastructureNoise:
"""Verify _is_infrastructure_noise correctly identifies noise entries."""

@staticmethod
def _make_entry(logger_name: str, hook_id: str | None = None) -> LogEntry:
return LogEntry(
timestamp=datetime.datetime.now(tz=datetime.UTC),
level="INFO",
logger_name=logger_name,
message="test message",
hook_id=hook_id,
)

def test_mcp_logger_without_hook_id_is_noise(self) -> None:
entry = self._make_entry("mcp.server.streamable_http", hook_id=None)
assert LogViewerController._is_infrastructure_noise(entry) is True

def test_logs_server_logger_without_hook_id_is_noise(self) -> None:
entry = self._make_entry("logs_server.log", hook_id=None)
assert LogViewerController._is_infrastructure_noise(entry) is True

def test_mcp_server_log_without_hook_id_is_noise(self) -> None:
entry = self._make_entry("mcp_server.log", hook_id=None)
assert LogViewerController._is_infrastructure_noise(entry) is True

def test_log_parser_without_hook_id_is_noise(self) -> None:
entry = self._make_entry("log_parser", hook_id=None)
assert LogViewerController._is_infrastructure_noise(entry) is True

def test_infra_logger_with_hook_id_is_not_noise(self) -> None:
"""Infrastructure logger entry WITH hook_id should be preserved."""
entry = self._make_entry("mcp.server.streamable_http", hook_id="abc-123")
assert LogViewerController._is_infrastructure_noise(entry) is False

def test_webhook_logger_without_hook_id_is_not_noise(self) -> None:
"""Non-infrastructure logger without hook_id is kept (could be startup log)."""
entry = self._make_entry("GithubWebhook", hook_id=None)
assert LogViewerController._is_infrastructure_noise(entry) is False

def test_webhook_logger_with_hook_id_is_not_noise(self) -> None:
entry = self._make_entry("GithubWebhook", hook_id="abc-123")
assert LogViewerController._is_infrastructure_noise(entry) is False
11 changes: 8 additions & 3 deletions webhook_server/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,16 @@ def get_logger_with_params(
)

# Attach JsonLogHandler for writing log records to the webhook JSONL file.
# Only attach when a log file path is configured (skip console-only loggers)
# and only once per logger instance to avoid duplicate handlers.
# Only attach when:
# - A log file path is configured (skip console-only loggers)
# - The logger is for the main webhook log (log_file_name not explicitly set)
# Infrastructure loggers (mcp_server.log, logs_server.log) must NOT write
# to webhooks_*.json because their entries lack webhook context (hook_id,
# event_type, etc.) and pollute the webhook log with noise entries.
# - Only once per logger instance to avoid duplicate handlers.
# Uses _config.data_dir/logs (same directory as StructuredLogWriter) instead
# of deriving from the text log file path, which may differ for absolute paths.
if log_file_path_resolved:
if log_file_path_resolved and not log_file_name:
log_dir = os.path.join(_config.data_dir, "logs")
with _JSON_HANDLER_LOCK:
if not any(isinstance(h, JsonLogHandler) and h.log_dir == Path(log_dir) for h in logger.handlers):
Expand Down
40 changes: 37 additions & 3 deletions webhook_server/web/log_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ class LogViewerController:
# 60 seconds provides a reasonable maximum window for log correlation
_DEFAULT_STEP_DURATION_MS = 60000

# Logger names from infrastructure components (MCP server, log viewer itself) that
# should be excluded from webhook log queries. These loggers produce high-frequency
# noise entries without webhook context (hook_id, event_type are None) and would
# drown out actual webhook processing entries in unfiltered queries.
_INFRASTRUCTURE_LOGGERS: frozenset[str] = frozenset({
"mcp.server.streamable_http",
"logs_server.log",
"log_parser",
"mcp_server.log",
})

# Workflow stage patterns for PR flow analysis
# These patterns match log messages to identify workflow stages and can be updated
# when log message formats change without modifying the analysis logic
Expand Down Expand Up @@ -288,6 +299,26 @@ def _entry_matches_filters(

return True

@staticmethod
def _is_infrastructure_noise(entry: LogEntry) -> bool:
"""Check if a log entry is infrastructure noise that should be excluded.

Infrastructure loggers (MCP server, log viewer) produce high-frequency
entries without webhook context. These are filtered out to prevent them
from drowning actual webhook processing entries in unfiltered queries.

Only excludes entries that have NO webhook context (hook_id is None),
preserving any infrastructure log that happens to correlate with a webhook.

Args:
entry: LogEntry to check

Returns:
True if the entry is infrastructure noise and should be excluded

"""
return entry.logger_name in LogViewerController._INFRASTRUCTURE_LOGGERS and entry.hook_id is None

async def export_logs(
self,
format_type: str,
Expand Down Expand Up @@ -1092,15 +1123,17 @@ def sort_key(f: Path) -> tuple[int, float]:
if log_file.suffix == ".json":
# JSONL files: one compact JSON object per line
# Process both "log_entry" and "webhook_summary" entries
# Skip infrastructure logger entries that lack webhook context
async for line in f:
entry = self.log_parser.parse_json_log_entry(line)
if entry:
if entry and not LogViewerController._is_infrastructure_noise(entry):
buffer.append(entry)
else:
# Text log files: parse line by line
# Skip infrastructure logger entries that lack webhook context
async for line in f:
entry = self.log_parser.parse_log_entry(line)
if entry:
if entry and not LogViewerController._is_infrastructure_noise(entry):
buffer.append(entry)

for entry in reversed(buffer):
Expand Down Expand Up @@ -1162,9 +1195,10 @@ async def _stream_text_log_entries(self, max_files: int = 25, max_entries: int =
buffer: deque[LogEntry] = deque(maxlen=remaining_capacity)

async with aiofiles.open(log_file, encoding="utf-8") as f:
# Skip infrastructure logger entries that lack webhook context
async for line in f:
entry = self.log_parser.parse_log_entry(line)
if entry:
if entry and not LogViewerController._is_infrastructure_noise(entry):
buffer.append(entry)

for entry in reversed(buffer):
Expand Down