From 268a8dc58ee986bf45a3119895e13c0613581ab1 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 12:02:45 +0200 Subject: [PATCH 01/14] feat: add structured JSON logging system Migrate from text-based logging with regex parsing to structured JSON logging. New components: - webhook_server/utils/context.py: ContextVar-based WebhookContext for request tracking - webhook_server/utils/structured_logger.py: JSON log writer with structured output - webhook_server/tests/test_context.py: Tests for context system - webhook_server/tests/test_structured_logger.py: Tests for structured logger - webhook_server/tests/test_log_viewer.py: Tests for log viewer Modified components: - app.py: Integrate WebhookContext and structured logging - github_api.py: Use structured logger throughout - All handlers: Migrated to structured logging - log_parser.py: Updated to parse JSON logs - log_viewer.py: Enhanced to handle structured logs - app_utils.py, helpers.py: Logging improvements - All test files: Updated for structured logging Benefits: - Eliminates regex-based log parsing - Improved log searchability and filtering - Better structured data extraction - Simplified log analysis --- webhook_server/app.py | 60 +- webhook_server/libs/github_api.py | 241 +++---- .../libs/handlers/check_run_handler.py | 85 +-- .../libs/handlers/issue_comment_handler.py | 64 +- .../libs/handlers/labels_handler.py | 58 -- .../libs/handlers/owners_files_handler.py | 42 +- .../libs/handlers/pull_request_handler.py | 250 ++----- .../handlers/pull_request_review_handler.py | 8 + webhook_server/libs/handlers/push_handler.py | 63 +- .../libs/handlers/runner_handler.py | 175 +---- webhook_server/libs/log_parser.py | 129 ++++ .../tests/test_check_run_handler.py | 31 +- webhook_server/tests/test_context.py | 589 +++++++++++++++ webhook_server/tests/test_github_api.py | 6 +- .../tests/test_helpers_sanitization.py | 77 -- .../tests/test_issue_comment_handler.py | 21 +- webhook_server/tests/test_labels_handler.py | 13 +- webhook_server/tests/test_log_parser.py | 607 ++++++++++++++++ webhook_server/tests/test_log_viewer.py | 433 +++++++++++ .../tests/test_owners_files_handler.py | 5 +- .../tests/test_pull_request_handler.py | 32 +- .../tests/test_pull_request_review_handler.py | 1 + webhook_server/tests/test_push_handler.py | 1 + webhook_server/tests/test_runner_handler.py | 1 + .../tests/test_structured_logger.py | 672 ++++++++++++++++++ webhook_server/utils/app_utils.py | 93 +++ webhook_server/utils/context.py | 293 ++++++++ webhook_server/utils/helpers.py | 22 - webhook_server/utils/structured_logger.py | 288 ++++++++ webhook_server/web/log_viewer.py | 100 ++- 30 files changed, 3444 insertions(+), 1016 deletions(-) create mode 100644 webhook_server/tests/test_context.py create mode 100644 webhook_server/tests/test_log_viewer.py create mode 100644 webhook_server/tests/test_structured_logger.py create mode 100644 webhook_server/utils/context.py create mode 100644 webhook_server/utils/structured_logger.py diff --git a/webhook_server/app.py b/webhook_server/app.py index 2e7711eeb..8f8d096d3 100644 --- a/webhook_server/app.py +++ b/webhook_server/app.py @@ -3,8 +3,10 @@ import json import logging import os +import traceback from collections.abc import AsyncGenerator from contextlib import asynccontextmanager +from datetime import UTC, datetime from typing import Any import httpx @@ -37,13 +39,17 @@ gate_by_allowlist_ips, get_cloudflare_allowlist, get_github_allowlist, + get_workflow_steps_core, + log_webhook_summary, parse_datetime_string, verify_signature, ) +from webhook_server.utils.context import clear_context, create_context from webhook_server.utils.helpers import ( get_logger_with_params, prepare_log_prefix, ) +from webhook_server.utils.structured_logger import write_webhook_log from webhook_server.web.log_viewer import LogViewerController # Constants @@ -384,8 +390,19 @@ async def process_with_error_handling( _delivery_id: GitHub delivery ID for logging _event_type: GitHub event type for logging """ - # Create repository-specific logger in background + # Create structured logging context at the VERY START repository_name = _hook_data.get("repository", {}).get("name", "unknown") + repository_full_name = _hook_data.get("repository", {}).get("full_name", "unknown") + ctx = create_context( + hook_id=_delivery_id, + event_type=_event_type, + repository=repository_name, + repository_full_name=repository_full_name, + action=_hook_data.get("action"), + sender=_hook_data.get("sender", {}).get("login"), + ) + + # Create repository-specific logger _logger = get_logger_with_params(repository_name=repository_name) _log_context = prepare_log_prefix( event_type=_event_type, delivery_id=_delivery_id, repository_name=repository_name @@ -402,12 +419,39 @@ async def process_with_error_handling( except RepositoryNotFoundInConfigError: # Repository-specific error - not exceptional, log as error not exception _logger.error(f"{_log_context} Repository not found in configuration") - except (httpx.ConnectError, httpx.RequestError, requests.exceptions.ConnectionError): + ctx.success = False + ctx.error = { + "type": "RepositoryNotFoundInConfigError", + "message": "Repository not found in configuration", + "traceback": "", + } + except (httpx.ConnectError, httpx.RequestError, requests.exceptions.ConnectionError) as ex: # Network/connection errors - can be transient _logger.exception(f"{_log_context} API connection error - check network connectivity") - except Exception: + ctx.success = False + ctx.error = { + "type": type(ex).__name__, + "message": str(ex), + "traceback": traceback.format_exc(), + } + except Exception as ex: # Catch-all for unexpected errors _logger.exception(f"{_log_context} Unexpected error in background webhook processing") + ctx.success = False + ctx.error = { + "type": type(ex).__name__, + "message": str(ex), + "traceback": traceback.format_exc(), + } + finally: + # Set completion time and log summary from structured context + if ctx: + ctx.completed_at = datetime.now(UTC) + log_webhook_summary(ctx, _logger, _log_context) + + # ALWAYS write the structured log, even on error + write_webhook_log(ctx) + clear_context() # Start background task immediately using asyncio.create_task # This ensures the HTTP response is sent immediately without waiting @@ -853,14 +897,6 @@ async def get_pr_flow_data(hook_id: str, controller: LogViewerController = contr return await _get_pr_flow_data_core(controller=controller, hook_id=hook_id) -async def _get_workflow_steps_core( - controller: LogViewerController, - hook_id: str, -) -> dict[str, Any]: - """Core logic for getting workflow step timeline data for a specific hook ID.""" - return controller.get_workflow_steps(hook_id) - - @FASTAPI_APP.get( "/logs/api/workflow-steps/{hook_id}", operation_id="get_workflow_steps", @@ -1096,7 +1132,7 @@ async def get_workflow_steps(hook_id: str, controller: LogViewerController = con - Historical analysis is available for completed workflows - Real-time step data for in-progress workflows """ - return await _get_workflow_steps_core(controller=controller, hook_id=hook_id) + return get_workflow_steps_core(controller=controller, hook_id=hook_id) @FASTAPI_APP.websocket("/logs/ws") diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index 41c6493e2..15c7d30fe 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -8,6 +8,7 @@ import shutil import tempfile import threading +import traceback from typing import Any import github @@ -35,12 +36,12 @@ SUCCESS_STR, TOX_STR, ) +from webhook_server.utils.context import WebhookContext, get_context from webhook_server.utils.github_repository_settings import ( get_repository_github_app_api, ) from webhook_server.utils.helpers import ( _redact_secrets, - format_task_fields, get_api_with_highest_rate_limit, get_apis_and_tokes_from_config, get_github_repo_api, @@ -87,6 +88,9 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self.github_event: str = headers["X-GitHub-Event"] self.config = Config(repository=self.repository_name, logger=self.logger) + # Get structured logging context (created in app.py before this) + self.ctx: WebhookContext | None = get_context() + # Type annotations for conditionally assigned attributes self.repository: Repository self.repository_by_github_app: Repository @@ -178,6 +182,27 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. "Report bugs in [Issues](https://github.com/myakove/github-webhook-server/issues)" ) + async def _update_context_metrics(self) -> None: + """Update context with token metrics at end of processing.""" + if not self.ctx: + return + + if self.requester_wrapper: + self.ctx.token_spend = self.requester_wrapper.count - self.initial_wrapper_count + + if self.initial_rate_limit_remaining is not None: + self.ctx.initial_rate_limit = self.initial_rate_limit_remaining + if self.requester_wrapper: + self.ctx.final_rate_limit = max( + 0, self.initial_rate_limit_remaining - (self.requester_wrapper.count - self.initial_wrapper_count) + ) + + # Update api_user + self.ctx.api_user = self.api_user + + # Mark webhook routing as completed + self.ctx.complete_step("webhook_routing") + async def _get_token_metrics(self) -> str: """Get token metrics (API rate limit consumption) for this webhook. @@ -245,11 +270,9 @@ async def _clone_repository( Raises: RuntimeError: If clone fails (aborts webhook processing) """ - # Log start FIRST - even before early returns - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'started')} " - "Cloning repository for handler worktrees" - ) + # Start context step + if self.ctx: + self.ctx.start_step("repo_clone") if self._repo_cloned: self.logger.debug(f"{self.log_prefix} Repository already cloned") @@ -258,8 +281,7 @@ async def _clone_repository( # Validate that at least one argument is provided if pull_request is None and not checkout_ref: self.logger.error( - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} " - "Invalid arguments: either pull_request or checkout_ref must be provided" + f"{self.log_prefix} Invalid arguments: either pull_request or checkout_ref must be provided" ) raise ValueError( f"{self.log_prefix} _clone_repository() requires either pull_request or checkout_ref to be provided" @@ -281,10 +303,7 @@ def redact_output(value: str) -> str: if not rc: redacted_err = redact_output(err) - self.logger.error( - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} " - f"Failed to clone repository: {redacted_err}" - ) + self.logger.error(f"{self.log_prefix} Failed to clone repository: {redacted_err}") raise RuntimeError(f"Failed to clone repository: {redacted_err}") # Configure git user @@ -343,66 +362,56 @@ def redact_output(value: str) -> str: ) if not rc: redacted_err = redact_output(err) - self.logger.error( - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} " - f"Failed to checkout {checkout_target}: {redacted_err}" - ) + self.logger.error(f"{self.log_prefix} Failed to checkout {checkout_target}: {redacted_err}") raise RuntimeError(f"Failed to checkout {checkout_target}: {redacted_err}") self._repo_cloned = True - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'completed')} " - f"Repository cloned to {self.clone_repo_dir} (ref: {checkout_target})" - ) + self.logger.info(f"{self.log_prefix} Repository cloned to {self.clone_repo_dir} (ref: {checkout_target})") + + # Complete context step on success + if self.ctx: + self.ctx.complete_step("repo_clone", checkout_ref=checkout_target) except RuntimeError: + # Fail context step on RuntimeError + if self.ctx: + self.ctx.fail_step("repo_clone", RuntimeError("Clone failed"), traceback.format_exc()) # Re-raise RuntimeError unchanged to avoid double-wrapping raise except Exception as ex: - self.logger.exception( - f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} " - f"Exception during repository clone: {ex}" - ) + self.logger.exception(f"{self.log_prefix} Exception during repository clone: {ex}") + # Fail context step on exception + if self.ctx: + self.ctx.fail_step("repo_clone", ex, traceback.format_exc()) raise RuntimeError(f"Repository clone failed: {ex}") from ex async def process(self) -> Any: event_log: str = f"Event type: {self.github_event}. event ID: {self.x_github_delivery}" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'started')} " - f"Starting webhook processing: {event_log}", - ) + + # Start webhook routing context step + if self.ctx: + self.ctx.start_step("webhook_routing", event_type=self.github_event) if self.github_event == "ping": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing ping event", - ) self.logger.debug(f"{self.log_prefix} {event_log}") token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: ping - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: ping - {token_metrics}", ) + await self._update_context_metrics() return None if self.github_event == "push": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing push event", - ) self.logger.debug(f"{self.log_prefix} {event_log}") # Skip branch/tag deletions - no processing needed if self.hook_data.get("deleted"): - self.logger.info( - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'skipped')} " - f"Branch/tag deletion detected, skipping processing" - ) + self.logger.info(f"{self.log_prefix} Branch/tag deletion detected, skipping processing") token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed: deletion event (skipped) - {token_metrics}" + self.logger.info( + f"{self.log_prefix} Webhook processing completed: deletion event (skipped) - {token_metrics}" ) + await self._update_context_metrics() return None # Clone repository for push operations (PyPI uploads, container builds) @@ -410,50 +419,36 @@ async def process(self) -> Any: await PushHandler(github_webhook=self).process_push_webhook_data() token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: push - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: push - {token_metrics}", ) + await self._update_context_metrics() return None pull_request = await self.get_pull_request() if pull_request: - # Log how we got the pull request (for workflow tracking) - if self.github_event == "pull_request": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing pull request from webhook payload", - ) - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Fetched pull request data via API (event: {self.github_event})", - ) + # Update context with PR info + if self.ctx: + pr_number = await asyncio.to_thread(lambda: pull_request.number) + pr_title = await asyncio.to_thread(lambda: pull_request.title) + pr_author = await asyncio.to_thread(lambda: pull_request.user.login) + self.ctx.pr_number = pr_number + self.ctx.pr_title = pr_title + self.ctx.pr_author = pr_author self.log_prefix = self.prepare_log_prefix(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing pull request event: {event_log}", - ) self.logger.debug(f"{self.log_prefix} {event_log}") if await asyncio.to_thread(lambda: pull_request.draft): - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Pull request is draft, skipping processing", - ) self.logger.debug(f"{self.log_prefix} Pull request is draft, doing nothing") token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: draft PR (skipped) - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: " + f"draft PR (skipped) - {token_metrics}", ) + await self._update_context_metrics() return None - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing pull request data", - ) self.last_commit = await self._get_last_commit(pull_request=pull_request) self.parent_committer = pull_request.user.login self.last_committer = getattr(self.last_commit.committer, "login", self.parent_committer) @@ -464,88 +459,61 @@ async def process(self) -> Any: await self._clone_repository(pull_request=pull_request) if self.github_event == "issue_comment": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing OWNERS file handler for issue comment", - ) owners_file_handler = OwnersFileHandler(github_webhook=self) owners_file_handler = await owners_file_handler.initialize(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing issue comment with IssueCommentHandler", - ) await IssueCommentHandler( github_webhook=self, owners_file_handler=owners_file_handler ).process_comment_webhook_data(pull_request=pull_request) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: issue_comment - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: issue_comment - {token_metrics}", ) + await self._update_context_metrics() return None elif self.github_event == "pull_request": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing OWNERS file handler for pull request", - ) owners_file_handler = OwnersFileHandler(github_webhook=self) owners_file_handler = await owners_file_handler.initialize(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing pull request with PullRequestHandler", - ) await PullRequestHandler( github_webhook=self, owners_file_handler=owners_file_handler ).process_pull_request_webhook_data(pull_request=pull_request) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: pull_request - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: pull_request - {token_metrics}", ) + await self._update_context_metrics() return None elif self.github_event == "pull_request_review": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing OWNERS file handler for pull request review", - ) owners_file_handler = OwnersFileHandler(github_webhook=self) owners_file_handler = await owners_file_handler.initialize(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing pull request review with PullRequestReviewHandler", - ) await PullRequestReviewHandler( github_webhook=self, owners_file_handler=owners_file_handler ).process_pull_request_review_webhook_data( pull_request=pull_request, ) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: pull_request_review - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: " + f"pull_request_review - {token_metrics}", ) + await self._update_context_metrics() return None elif self.github_event == "check_run": # Check if we need to process this check_run action = self.hook_data.get("action", "") if action != "completed": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'skipped')} " - f"Check run action is '{action}' (not 'completed'), skipping processing", - ) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] + self.logger.info( f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " f"Webhook processing completed successfully: check_run (action={action}, skipped) - " f"{token_metrics}", ) + await self._update_context_metrics() return None # Check if this is can-be-merged with non-success conclusion @@ -553,73 +521,44 @@ async def process(self) -> Any: check_run_conclusion = self.hook_data.get("check_run", {}).get("conclusion", "") if check_run_name == CAN_BE_MERGED_STR and check_run_conclusion != SUCCESS_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'skipped')} " - f"Can-be-merged check has conclusion '{check_run_conclusion}' (not 'success'), " - f"skipping processing", - ) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] + self.logger.info( f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " f"Webhook processing completed successfully: check_run " f"(can-be-merged, conclusion={check_run_conclusion}, skipped) - {token_metrics}", ) + await self._update_context_metrics() return None # Only clone repository when we actually need it (action is completed and processing is needed) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Cloning repository for check run processing", - ) await self._clone_repository(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Initializing OWNERS file handler for check run", - ) owners_file_handler = OwnersFileHandler(github_webhook=self) owners_file_handler = await owners_file_handler.initialize(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Processing check run with CheckRunHandler", - ) handled = await CheckRunHandler( github_webhook=self, owners_file_handler=owners_file_handler ).process_pull_request_check_run_webhook_data(pull_request=pull_request) if handled: if self.hook_data["check_run"]["name"] != CAN_BE_MERGED_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'processing')} " - f"Checking if pull request can be merged after check run", - ) await PullRequestHandler( github_webhook=self, owners_file_handler=owners_file_handler ).check_if_can_be_merged(pull_request=pull_request) # Log completion regardless of whether check run was processed or skipped token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed successfully: check_run - {token_metrics}", + self.logger.info( + f"{self.log_prefix} Webhook processing completed successfully: check_run - {token_metrics}", ) + await self._update_context_metrics() return None else: # Log warning when no PR found self.logger.warning( - f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'skipped')} " - f"No pull request found for {self.github_event} event - skipping processing" + f"{self.log_prefix} No pull request found for {self.github_event} event - skipping processing" ) token_metrics = await self._get_token_metrics() - self.logger.success( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('webhook_processing', 'webhook_routing', 'completed')} " - f"Webhook processing completed: no PR found - {token_metrics}" - ) + self.logger.info(f"{self.log_prefix} Webhook processing completed: no PR found - {token_metrics}") + await self._update_context_metrics() return None def add_api_users_to_auto_verified_and_merged_users(self) -> None: diff --git a/webhook_server/libs/handlers/check_run_handler.py b/webhook_server/libs/handlers/check_run_handler.py index ab189c626..50af9bbe0 100644 --- a/webhook_server/libs/handlers/check_run_handler.py +++ b/webhook_server/libs/handlers/check_run_handler.py @@ -23,15 +23,17 @@ TOX_STR, VERIFIED_LABEL_STR, ) -from webhook_server.utils.helpers import format_task_fields, strip_ansi_codes +from webhook_server.utils.helpers import strip_ansi_codes if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook + from webhook_server.utils.context import WebhookContext class CheckRunHandler: def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersFileHandler | None = None): self.github_webhook = github_webhook + self.ctx: WebhookContext | None = github_webhook.ctx self.owners_file_handler = owners_file_handler self.hook_data = self.github_webhook.hook_data self.logger = self.github_webhook.logger @@ -47,25 +49,19 @@ def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersF async def process_pull_request_check_run_webhook_data(self, pull_request: PullRequest | None = None) -> bool: """Return True if check_if_can_be_merged need to run""" + if self.ctx: + self.ctx.start_step("check_run_handler") _check_run: dict[str, Any] = self.hook_data["check_run"] check_run_name: str = _check_run["name"] - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'processing')} " - f"Processing check run: {check_run_name}", - ) - if self.hook_data.get("action", "") != "completed": self.logger.debug( f"{self.log_prefix} check run {check_run_name} action is " f"{self.hook_data.get('action', 'N/A')} and not completed, skipping" ) - # Log completion - task_status reflects the result of our action (skipping is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Processing check run: {check_run_name} (action not completed - skipped)", - ) + if self.ctx: + self.ctx.complete_step("check_run_handler") return False check_run_status: str = _check_run["status"] @@ -81,55 +77,29 @@ async def process_pull_request_check_run_webhook_data(self, pull_request: PullRe label=AUTOMERGE_LABEL_STR, pull_request=pull_request ): try: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'automerge', 'processing')} " - f"Executing auto-merge for PR #{pull_request.number}", - ) await asyncio.to_thread(pull_request.merge, merge_method="SQUASH") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'automerge', 'completed')} " - f"Auto-merge completed successfully", - ) self.logger.info( f"{self.log_prefix} Successfully auto-merged pull request #{pull_request.number}" ) - # Log completion for main check_run processing - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Processing check run: {check_run_name} (auto-merged)", - ) + if self.ctx: + self.ctx.complete_step("check_run_handler") return False except Exception as ex: self.logger.error( f"{self.log_prefix} Failed to auto-merge pull request #{pull_request.number}: {ex}" ) - # Log failure for automerge - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'automerge', 'failed')} " - f"Failed to auto-merge PR #{pull_request.number}: {ex}", - ) - # Continue processing to allow manual intervention - # Log completion for main check_run processing (continuing after failed automerge) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Processing check run: {check_run_name} (auto-merge failed, continuing)", - ) + if self.ctx: + self.ctx.complete_step("check_run_handler") return True else: self.logger.debug(f"{self.log_prefix} check run is {CAN_BE_MERGED_STR}, skipping") - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Processing check run: {check_run_name} (skipped - conditions not met)", - ) + if self.ctx: + self.ctx.complete_step("check_run_handler") return False - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Processing check run: {check_run_name} (completed)", - ) + if self.ctx: + self.ctx.complete_step("check_run_handler") return True async def set_verify_check_queued(self) -> None: @@ -263,34 +233,11 @@ async def set_check_run_status( msg: str = f"{self.log_prefix} check run {check_run} status: {status or conclusion}" - # Log workflow steps for check run status changes - # task_status reflects the result of our action, not what we're setting the check to - if status == QUEUED_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Setting {check_run} check to queued", - ) - elif status == IN_PROGRESS_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Setting {check_run} check to in-progress", - ) - elif conclusion == SUCCESS_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'completed')} " - f"Setting {check_run} check to success", - ) - elif conclusion == FAILURE_STR: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('check_run', 'ci_check', 'failed')} " - f"Setting {check_run} check to failure", - ) - try: self.logger.debug(f"{self.log_prefix} Set check run status with {kwargs}") await asyncio.to_thread(self.github_webhook.repository_by_github_app.create_check_run, **kwargs) if conclusion in (SUCCESS_STR, IN_PROGRESS_STR): - self.logger.success(msg) # type: ignore + self.logger.info(msg) return except Exception as ex: diff --git a/webhook_server/libs/handlers/issue_comment_handler.py b/webhook_server/libs/handlers/issue_comment_handler.py index e0b77dea4..6dfef06eb 100644 --- a/webhook_server/libs/handlers/issue_comment_handler.py +++ b/webhook_server/libs/handlers/issue_comment_handler.py @@ -35,10 +35,10 @@ VERIFIED_LABEL_STR, WIP_STR, ) -from webhook_server.utils.helpers import format_task_fields if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook + from webhook_server.utils.context import WebhookContext class IssueCommentHandler: @@ -50,6 +50,7 @@ def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFil self.logger = self.github_webhook.logger self.log_prefix: str = self.github_webhook.log_prefix self.repository: Repository = self.github_webhook.repository + self.ctx: WebhookContext | None = github_webhook.ctx self.labels_handler = LabelsHandler( github_webhook=self.github_webhook, owners_file_handler=self.owners_file_handler ) @@ -62,61 +63,35 @@ def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFil ) async def process_comment_webhook_data(self, pull_request: PullRequest) -> None: + if self.ctx: + self.ctx.start_step("issue_comment_handler") + comment_action = self.hook_data["action"] - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'started')} " - f"Starting issue comment processing: action={comment_action}", - ) if comment_action in ("edited", "deleted"): - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'processing')} " - f"Skipping comment processing: action is {comment_action}", - ) self.logger.debug(f"{self.log_prefix} Not processing comment. action is {comment_action}") - # Log completion - task_status reflects the result of our action (skipping is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'completed')} " - f"Skipping comment processing: action is {comment_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("issue_comment_handler") return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'processing')} " - f"Processing issue comment for issue {self.hook_data['issue']['number']}", - ) self.logger.info(f"{self.log_prefix} Processing issue {self.hook_data['issue']['number']}") body: str = self.hook_data["comment"]["body"] if self.github_webhook.issue_url_for_welcome_msg in body: self.logger.debug(f"{self.log_prefix} Welcome message found in issue {pull_request.title}. Not processing") - # Log completion - task_status reflects the result of our action (skipping welcome message is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'completed')} " - f"Processing issue comment for issue {self.hook_data['issue']['number']} (welcome message - skipped)", - ) + if self.ctx: + self.ctx.complete_step("issue_comment_handler") return _user_commands: list[str] = [_cmd.strip("/") for _cmd in body.strip().splitlines() if _cmd.startswith("/")] - if _user_commands: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'processing')} " - f"Found {len(_user_commands)} user commands: {_user_commands}", - ) - user_login: str = self.hook_data["sender"]["login"] # Execute all commands in parallel if _user_commands: tasks: list[Coroutine[Any, Any, Any] | Task[Any]] = [] for user_command in _user_commands: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('issue_comment', 'pr_management', 'processing')} " - f"Executing user command: /{user_command} by {user_login}", - ) task = asyncio.create_task( self.user_commands( pull_request=pull_request, @@ -135,26 +110,9 @@ async def process_comment_webhook_data(self, pull_request: PullRequest) -> None: user_command = _user_commands[idx] if isinstance(result, Exception): self.logger.error(f"{self.log_prefix} Command execution failed: /{user_command} - {result}") - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'completed')} " - f"Executed user command: /{user_command} by {user_login}", - ) - # Log completion for main processing - task_status reflects the result of our action - if not _user_commands: - # No commands found, log completion - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'completed')} " - f"Processing issue comment for issue {self.hook_data['issue']['number']} (no commands found)", - ) - else: - # Commands were processed, log completion - issue_num = self.hook_data["issue"]["number"] - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('issue_comment', 'pr_management', 'completed')} " - f"Processing issue comment for issue {issue_num} (processed {len(_user_commands)} commands)", - ) + if self.ctx: + self.ctx.complete_step("issue_comment_handler") async def user_commands( self, pull_request: PullRequest, command: str, reviewed_user: str, issue_comment_id: int diff --git a/webhook_server/libs/handlers/labels_handler.py b/webhook_server/libs/handlers/labels_handler.py index 27796b790..f1e01693b 100644 --- a/webhook_server/libs/handlers/labels_handler.py +++ b/webhook_server/libs/handlers/labels_handler.py @@ -23,7 +23,6 @@ STATIC_LABELS_DICT, WIP_STR, ) -from webhook_server.utils.helpers import format_task_fields if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook @@ -47,79 +46,35 @@ async def pull_request_labels_names(self, pull_request: PullRequest) -> list[str return [lb.name for lb in labels] async def _remove_label(self, pull_request: PullRequest, label: str) -> bool: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'processing')} " - f"Removing label '{label}' from PR", - ) self.logger.debug(f"{self.log_prefix} Removing label {label}") try: if await self.label_exists_in_pull_request(pull_request=pull_request, label=label): self.logger.info(f"{self.log_prefix} Removing label {label}") await asyncio.to_thread(pull_request.remove_from_labels, label) success = await self.wait_for_label(pull_request=pull_request, label=label, exists=False) - # Log completion - task_status reflects the result of our action - if success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Removed label '{label}' from PR", - ) - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'failed')} " - f"Failed to remove label '{label}' from PR (timeout waiting for removal)", - ) return success except Exception as exp: self.logger.debug(f"{self.log_prefix} Failed to remove {label} label. Exception: {exp}") - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'failed')} " - f"Failed to remove label '{label}' from PR (exception: {exp})", - ) return False # Label doesn't exist - this is an acceptable outcome (we don't check first to save API calls) - # Log completion - task_status reflects the result of our action (attempting to remove completed successfully) self.logger.debug(f"{self.log_prefix} Label {label} not found and cannot be removed") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Removing label '{label}' from PR (label does not exist - acceptable)", - ) return False async def _add_label(self, pull_request: PullRequest, label: str) -> None: label = label.strip() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'processing')} " - f"Adding label '{label}' to PR", - ) self.logger.debug(f"{self.log_prefix} Adding label {label}") if len(label) > 49: self.logger.debug(f"{label} is too long, not adding.") - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Adding label '{label}' to PR (label too long - skipped)", - ) return if await self.label_exists_in_pull_request(pull_request=pull_request, label=label): self.logger.debug(f"{self.log_prefix} Label {label} already assign") - # Log completion - task_status reflects the result of our action (label already exists is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Adding label '{label}' to PR (label already exists - acceptable)", - ) return if label in STATIC_LABELS_DICT: self.logger.info(f"{self.log_prefix} Adding pull request label {label}") await asyncio.to_thread(pull_request.add_to_labels, label) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Added label '{label}' to PR", - ) return color = self._get_label_color(label) @@ -137,11 +92,6 @@ async def _add_label(self, pull_request: PullRequest, label: str) -> None: self.logger.info(f"{self.log_prefix} Adding pull request label {label}") await asyncio.to_thread(pull_request.add_to_labels, label) await self.wait_for_label(pull_request=pull_request, label=label, exists=True) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Added label '{label}' to PR", - ) async def wait_for_label(self, pull_request: PullRequest, label: str, exists: bool) -> bool: self.logger.debug(f"{self.log_prefix} waiting for label {label} to {'exists' if exists else 'not exists'}") @@ -267,10 +217,6 @@ def get_size(self, pull_request: PullRequest) -> str: async def add_size_label(self, pull_request: PullRequest) -> None: """Add a size label to the pull request based on its additions and deletions.""" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'processing')} " - f"Calculating and applying PR size label", - ) size_label = self.get_size(pull_request=pull_request) self.logger.debug(f"{self.log_prefix} size label is {size_label}") if not size_label: @@ -291,10 +237,6 @@ async def add_size_label(self, pull_request: PullRequest) -> None: await self._remove_label(pull_request=pull_request, label=exists_size_label[0]) await self._add_label(pull_request=pull_request, label=size_label) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('labels', 'pr_management', 'completed')} " - f"Applied size label '{size_label}' to PR", - ) async def label_by_user_comment( self, diff --git a/webhook_server/libs/handlers/owners_files_handler.py b/webhook_server/libs/handlers/owners_files_handler.py index c1909aff2..c734fc587 100644 --- a/webhook_server/libs/handlers/owners_files_handler.py +++ b/webhook_server/libs/handlers/owners_files_handler.py @@ -14,7 +14,7 @@ from github.Repository import Repository from webhook_server.utils.constants import COMMAND_ADD_ALLOWED_USER_STR, ROOT_APPROVERS_KEY -from webhook_server.utils.helpers import format_task_fields, run_command +from webhook_server.utils.helpers import run_command if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook @@ -406,30 +406,12 @@ async def owners_data_for_changed_files(self) -> dict[str, dict[str, Any]]: async def assign_reviewers(self, pull_request: PullRequest) -> None: self._ensure_initialized() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'started')} " - f"Starting reviewer assignment based on OWNERS files", - ) self.logger.info(f"{self.log_prefix} Assign reviewers") _to_add: list[str] = list(set(self.all_pull_request_reviewers)) self.logger.debug(f"{self.log_prefix} Reviewers to add: {', '.join(_to_add)}") - if _to_add: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'processing')} " - f"Assigning {len(_to_add)} reviewers to PR", - ) - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'processing')} " - f"No reviewers to assign", - ) - # Log completion - task_status reflects the result of our action (no reviewers to assign is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'completed')} " - f"No reviewers to assign (completed)", - ) + if not _to_add: return assigned_count = 0 @@ -439,35 +421,15 @@ async def assign_reviewers(self, pull_request: PullRequest) -> None: self.logger.debug(f"{self.log_prefix} Adding reviewer {reviewer}") try: await asyncio.to_thread(pull_request.create_review_request, [reviewer]) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'processing')} " - f"Successfully assigned reviewer {reviewer}", - ) assigned_count += 1 except GithubException as ex: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'failed')} " - f"Failed to assign reviewer {reviewer}", - ) self.logger.debug(f"{self.log_prefix} Failed to add reviewer {reviewer}. {ex}") await asyncio.to_thread( pull_request.create_issue_comment, f"{reviewer} can not be added as reviewer. {ex}" ) failed_count += 1 - # Log completion - task_status reflects the result of our action - if failed_count > 0: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'failed')} " - f"Assigned {assigned_count} reviewers to PR ({failed_count} failed)", - ) - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('owners', 'pr_management', 'completed')} " - f"Assigned {assigned_count} reviewers to PR", - ) - async def is_user_valid_to_run_commands(self, pull_request: PullRequest, reviewed_user: str) -> bool: self._ensure_initialized() diff --git a/webhook_server/libs/handlers/pull_request_handler.py b/webhook_server/libs/handlers/pull_request_handler.py index 126b46715..2f888c3e2 100644 --- a/webhook_server/libs/handlers/pull_request_handler.py +++ b/webhook_server/libs/handlers/pull_request_handler.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import traceback from collections.abc import Coroutine from typing import TYPE_CHECKING, Any @@ -36,10 +37,10 @@ VERIFIED_LABEL_STR, WIP_STR, ) -from webhook_server.utils.helpers import format_task_fields if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook + from webhook_server.utils.context import WebhookContext class PullRequestHandler: @@ -51,6 +52,7 @@ def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFil self.logger = self.github_webhook.logger self.log_prefix: str = self.github_webhook.log_prefix self.repository: Repository = self.github_webhook.repository + self.ctx: WebhookContext | None = github_webhook.ctx self.labels_handler = LabelsHandler( github_webhook=self.github_webhook, owners_file_handler=self.owners_file_handler ) @@ -63,10 +65,9 @@ def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFil async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> None: hook_action: str = self.hook_data["action"] - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'started')} " - f"Starting pull request processing: action={hook_action}", - ) + if self.ctx: + self.ctx.start_step("pr_handler", action=hook_action) + self.logger.info(f"{self.log_prefix} hook_action is: {hook_action}") self.logger.debug(f"{self.log_prefix} pull_request: {pull_request.title} ({pull_request.number})") @@ -77,18 +78,11 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> if self.github_webhook.conventional_title and self.hook_data["changes"].get("title"): self.logger.info(f"{self.log_prefix} PR title changed, running conventional title check") await self.runner_handler.run_conventional_title_check(pull_request=pull_request) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return if hook_action in ("opened", "reopened", "ready_for_review"): - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Processing PR {hook_action} event: initializing new pull request", - ) tasks: list[Coroutine[Any, Any, Any]] = [] if hook_action in ("opened", "ready_for_review"): @@ -106,18 +100,11 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> # Set auto merge only after all initialization of a new PR is done. await self.set_pull_request_automerge(pull_request=pull_request) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return if hook_action == "synchronize": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Processing PR synchronize event: handling new commits", - ) sync_tasks: list[Coroutine[Any, Any, Any]] = [] sync_tasks.append(self.process_opened_or_synchronize_pull_request(pull_request=pull_request)) @@ -128,25 +115,14 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> for result in results: if isinstance(result, Exception): self.logger.error(f"{self.log_prefix} Async task failed: {result}") - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return if hook_action == "closed": - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Processing PR closed event: cleaning up resources", - ) await self.close_issue_for_merged_or_closed_pr(pull_request=pull_request, hook_action=hook_action) await self.delete_remote_tag_for_merged_or_closed_pr(pull_request=pull_request) if is_merged := pull_request_data.get("merged", False): - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"PR was merged: processing post-merge tasks", - ) self.logger.info(f"{self.log_prefix} PR is merged") labels = await asyncio.to_thread(lambda: list(pull_request.labels)) @@ -165,11 +141,8 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> ) await self.label_all_opened_pull_requests_merge_state_after_merged() - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return if hook_action in ("labeled", "unlabeled"): @@ -179,17 +152,9 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> labeled = self.hook_data["label"]["name"] labeled_lower = labeled.lower() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Processing label {hook_action} event: {labeled}", - ) - if labeled_lower == CAN_BE_MERGED_STR: - # Log completion - task_status reflects the result of our action (skipping is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (skipped - can-be-merged label)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return self.logger.info(f"{self.log_prefix} PR {pull_request.number} {hook_action} with {labeled}") @@ -234,18 +199,12 @@ async def process_pull_request_webhook_data(self, pull_request: PullRequest) -> if _check_for_merge: await self.check_if_can_be_merged(pull_request=pull_request) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) return - # Log completion for any unhandled actions - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Starting pull request processing: action={hook_action} (no action handler - completed)", - ) + if self.ctx: + self.ctx.complete_step("pr_handler", action=hook_action) async def set_wip_label_based_on_title(self, pull_request: PullRequest) -> None: if pull_request.title.lower().startswith(f"{WIP_STR}:"): @@ -424,27 +383,13 @@ async def label_all_opened_pull_requests_merge_state_after_merged(self) -> None: await self.label_pull_request_by_merge_state(pull_request=pull_request) async def delete_remote_tag_for_merged_or_closed_pr(self, pull_request: PullRequest) -> None: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'processing')} " - f"Deleting remote tag for PR #{pull_request.number}", - ) self.logger.debug(f"{self.log_prefix} Checking if need to delete remote tag for {pull_request.number}") if not self.github_webhook.build_and_push_container: self.logger.info(f"{self.log_prefix} repository do not have container configured") - # Log completion - task_status reflects the result of our action (skipping is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (skipped - container not configured)", - ) return repository_full_tag = self.github_webhook.container_repository_and_tag(pull_request=pull_request) if not repository_full_tag: - # Log completion - task_status reflects the result of our action (no tag to delete) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (no tag configured)", - ) return pr_tag = repository_full_tag.split(":")[-1] @@ -456,11 +401,6 @@ async def delete_remote_tag_for_merged_or_closed_pr(self, pull_request: PullRequ f"{self.log_prefix} No registry host found in " f"{self.github_webhook.container_repository}; skipping tag deletion" ) - # Log completion - task_status reflects the result of our action (skipping is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (skipped - no registry host)", - ) return registry_url = registry_info[0] @@ -485,11 +425,6 @@ async def _delete_ghcr_tag_via_github_api( ) -> None: """Delete GHCR tag using GitHub Packages REST API.""" if not self.github_webhook.github_api or not self.github_webhook.token: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag} (GitHub API not available)", - ) self.logger.error(f"{self.log_prefix} GitHub API or token not available for tag deletion") return @@ -498,11 +433,6 @@ async def _delete_ghcr_tag_via_github_api( # Format: ghcr.io/org/services/api-server -> org, services/api-server registry_info = self.github_webhook.container_repository.split("/") if len(registry_info) < 3: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag} (invalid repository format)", - ) self.logger.error( f"{self.log_prefix} Invalid container repository format: {self.github_webhook.container_repository}" ) @@ -533,11 +463,6 @@ async def _delete_ghcr_tag_via_github_api( raise if not versions or not package_api_base: - # Log completion - task_status reflects the result of our action (package not found is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (package not found)", - ) self.logger.warning( f"{self.log_prefix} Package {package_name} not found for owner {owner_name} on GHCR" ) @@ -555,11 +480,6 @@ async def _delete_ghcr_tag_via_github_api( break if not version_to_delete_id: - # Log completion - task_status reflects the result of our action (tag not found is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (tag not found in package)", - ) self.logger.warning(f"{self.log_prefix} Tag {pr_tag} not found in package {package_name} versions") return @@ -583,25 +503,10 @@ async def _delete_ghcr_tag_via_github_api( await asyncio.to_thread( pull_request.create_issue_comment, f"Successfully removed PR tag: {repository_full_tag}." ) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleted remote tag: {repository_full_tag}", - ) except GithubException: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag}", - ) self.logger.exception(f"{self.log_prefix} Failed to delete GHCR tag: {repository_full_tag}") except Exception: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag}", - ) self.logger.exception(f"{self.log_prefix} Failed to delete GHCR tag: {repository_full_tag}") async def _delete_registry_tag_via_regctl( @@ -643,27 +548,12 @@ async def _delete_registry_tag_via_regctl( await asyncio.to_thread( pull_request.create_issue_comment, f"Successfully removed PR tag: {repository_full_tag}." ) - # Log completion - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleted remote tag: {repository_full_tag}", - ) else: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag}", - ) self.logger.error( f"{self.log_prefix} Failed to delete tag: {repository_full_tag}. " f"OUT:{del_out}. ERR:{del_err}" ) else: - # Log completion - task_status reflects the result of our action (tag not found is acceptable) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'completed')} " - f"Deleting remote tag for PR #{pull_request.number} (tag not found in registry)", - ) self.logger.warning( f"{self.log_prefix} {pr_tag} tag not found in registry " f"{self.github_webhook.container_repository}. " @@ -673,11 +563,6 @@ async def _delete_registry_tag_via_regctl( await self.runner_handler.run_podman_command(command="regctl registry logout") else: - # Log failure - task_status reflects the result of our action - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('tag_deletion', 'pr_management', 'failed')} " - f"Failed to delete tag: {repository_full_tag} (registry login failed)", - ) await asyncio.to_thread( pull_request.create_issue_comment, f"Failed to delete tag: {repository_full_tag}. Please delete it manually.", @@ -708,16 +593,10 @@ def _find_matching_issue() -> Any | None: await asyncio.to_thread(matching_issue.edit, state="closed") async def process_opened_or_synchronize_pull_request(self, pull_request: PullRequest) -> None: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'started')} " - f"Starting PR processing workflow", - ) + if self.ctx: + self.ctx.start_step("pr_workflow_setup") # Stage 1: Initial setup and check queue tasks - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Stage: Initial setup and check queuing", - ) setup_tasks: list[Coroutine[Any, Any, Any]] = [] setup_tasks.append(self.owners_file_handler.assign_reviewers(pull_request=pull_request)) @@ -740,24 +619,19 @@ async def process_opened_or_synchronize_pull_request(self, pull_request: PullReq if self.github_webhook.conventional_title: setup_tasks.append(self.check_run_handler.set_conventional_title_queued()) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} Executing setup tasks" - ) setup_results = await asyncio.gather(*setup_tasks, return_exceptions=True) for result in setup_results: if isinstance(result, Exception): self.logger.error(f"{self.log_prefix} Setup task failed: {result}") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} Setup tasks completed" - ) + if self.ctx: + self.ctx.complete_step("pr_workflow_setup") # Stage 2: CI/CD execution tasks - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Stage: CI/CD execution", - ) + if self.ctx: + self.ctx.start_step("pr_cicd_execution") + ci_tasks: list[Coroutine[Any, Any, Any]] = [] ci_tasks.append(self.runner_handler.run_tox(pull_request=pull_request)) @@ -768,20 +642,14 @@ async def process_opened_or_synchronize_pull_request(self, pull_request: PullReq if self.github_webhook.conventional_title: ci_tasks.append(self.runner_handler.run_conventional_title_check(pull_request=pull_request)) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'processing')} " - f"Executing CI/CD tasks", - ) ci_results = await asyncio.gather(*ci_tasks, return_exceptions=True) for result in ci_results: if isinstance(result, Exception): self.logger.error(f"{self.log_prefix} CI/CD task failed: {result}") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"PR processing workflow completed", - ) + if self.ctx: + self.ctx.complete_step("pr_cicd_execution") async def create_issue_for_new_pull_request(self, pull_request: PullRequest) -> None: if not self.github_webhook.create_issue_for_new_pr: @@ -934,13 +802,13 @@ async def check_if_can_be_merged(self, pull_request: PullRequest) -> None: PR status is not 'dirty'. PR has no changed requests from approvers. """ - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('pr_handler', 'pr_management', 'started')} " - f"Starting merge eligibility check" - ) + if self.ctx: + self.ctx.start_step("check_merge_eligibility") + if await self.skip_if_pull_request_already_merged(pull_request=pull_request): self.logger.debug(f"{self.log_prefix} Pull request already merged") + if self.ctx: + self.ctx.complete_step("check_merge_eligibility", can_merge=False, reason="already_merged") return output = { @@ -1014,27 +882,19 @@ async def check_if_can_be_merged(self, pull_request: PullRequest) -> None: if not failure_output: await self.labels_handler._add_label(pull_request=pull_request, label=CAN_BE_MERGED_STR) await self.check_run_handler.set_merge_check_success() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'completed')} " - f"Merge eligibility check completed successfully", - ) self.logger.info(f"{self.log_prefix} Pull request can be merged") + if self.ctx: + self.ctx.complete_step("check_merge_eligibility", can_merge=True) return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'failed')} " - f"Merge eligibility check failed", - ) self.logger.debug(f"{self.log_prefix} cannot be merged: {failure_output}") output["text"] = failure_output await self.labels_handler._remove_label(pull_request=pull_request, label=CAN_BE_MERGED_STR) await self.check_run_handler.set_merge_check_failure(output=output) + if self.ctx: + self.ctx.complete_step("check_merge_eligibility", can_merge=False, reason=failure_output) except Exception as ex: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_handler', 'pr_management', 'failed')} " - f"Merge eligibility check failed with exception", - ) self.logger.error( f"{self.log_prefix} Failed to check if can be merged, set check run to {FAILURE_STR} {ex}" ) @@ -1042,6 +902,8 @@ async def check_if_can_be_merged(self, pull_request: PullRequest) -> None: output["text"] = _err await self.labels_handler._remove_label(pull_request=pull_request, label=CAN_BE_MERGED_STR) await self.check_run_handler.set_merge_check_failure(output=output) + if self.ctx: + self.ctx.fail_step("check_merge_eligibility", ex, traceback.format_exc()) async def _check_if_pr_approved(self, labels: list[str]) -> str: self.logger.info(f"{self.log_prefix} Check if pull request is approved by pull request labels.") @@ -1181,11 +1043,6 @@ async def process_new_or_reprocess_pull_request(self, pull_request: PullRequest) This method extracts the core logic from the "opened" event handler to make it reusable for both new PRs and the /reprocess command. It includes duplicate prevention checks. """ - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_initialization', 'pr_management', 'started')} " - f"Starting PR initialization workflow", - ) - tasks: list[Coroutine[Any, Any, Any]] = [] # Add welcome message if it doesn't exist yet @@ -1207,49 +1064,22 @@ async def process_new_or_reprocess_pull_request(self, pull_request: PullRequest) tasks.append(self.set_wip_label_based_on_title(pull_request=pull_request)) tasks.append(self.process_opened_or_synchronize_pull_request(pull_request=pull_request)) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_initialization', 'pr_management', 'processing')} " - f"Executing initialization tasks", - ) results = await asyncio.gather(*tasks, return_exceptions=True) for result in results: if isinstance(result, Exception): self.logger.error(f"{self.log_prefix} Async task failed: {result}") # Set auto merge only after all initialization is done - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_initialization', 'pr_management', 'processing')} " - f"Setting auto-merge configuration", - ) await self.set_pull_request_automerge(pull_request=pull_request) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('pr_initialization', 'pr_management', 'completed')} " - f"PR initialization workflow completed", - ) - async def process_command_reprocess(self, pull_request: PullRequest) -> None: """Handle /reprocess command - triggers full PR workflow from scratch.""" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('reprocess_command', 'pr_management', 'started')} " - f"Starting /reprocess command execution for PR #{pull_request.number}", - ) - # Check if PR is already merged - skip if merged if await asyncio.to_thread(lambda: pull_request.is_merged()): self.logger.info(f"{self.log_prefix} PR is already merged, skipping reprocess") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('reprocess_command', 'pr_management', 'completed')} " - f"/reprocess command completed (PR already merged - skipped)", - ) return self.logger.info(f"{self.log_prefix} Executing full PR reprocessing workflow") # Call the extracted reusable method await self.process_new_or_reprocess_pull_request(pull_request=pull_request) - - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('reprocess_command', 'pr_management', 'completed')} " - f"/reprocess command completed successfully", - ) diff --git a/webhook_server/libs/handlers/pull_request_review_handler.py b/webhook_server/libs/handlers/pull_request_review_handler.py index 619118665..c2d633f62 100644 --- a/webhook_server/libs/handlers/pull_request_review_handler.py +++ b/webhook_server/libs/handlers/pull_request_review_handler.py @@ -8,11 +8,13 @@ if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook + from webhook_server.utils.context import WebhookContext class PullRequestReviewHandler: def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersFileHandler): self.github_webhook = github_webhook + self.ctx: WebhookContext | None = github_webhook.ctx self.owners_file_handler = owners_file_handler self.hook_data = self.github_webhook.hook_data @@ -22,6 +24,9 @@ def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersF self.github_webhook.logger.debug(f"{self.github_webhook.log_prefix} Initialized PullRequestReviewHandler") async def process_pull_request_review_webhook_data(self, pull_request: PullRequest) -> None: + if self.ctx: + self.ctx.start_step("pr_review_handler") + if self.hook_data["action"] == "submitted": """ Available actions: @@ -52,3 +57,6 @@ async def process_pull_request_review_webhook_data(self, pull_request: PullReque remove=False, reviewed_user=reviewed_user, ) + + if self.ctx: + self.ctx.complete_step("pr_review_handler") diff --git a/webhook_server/libs/handlers/push_handler.py b/webhook_server/libs/handlers/push_handler.py index 8ff098731..5499408d5 100644 --- a/webhook_server/libs/handlers/push_handler.py +++ b/webhook_server/libs/handlers/push_handler.py @@ -6,11 +6,12 @@ from webhook_server.libs.handlers.check_run_handler import CheckRunHandler from webhook_server.libs.handlers.runner_handler import RunnerHandler -from webhook_server.utils.helpers import format_task_fields, run_command +from webhook_server.utils.helpers import run_command from webhook_server.utils.notification_utils import send_slack_message if TYPE_CHECKING: from webhook_server.libs.github_api import GithubWebhook + from webhook_server.utils.context import WebhookContext class PushHandler: @@ -21,58 +22,36 @@ def __init__(self, github_webhook: "GithubWebhook"): self.logger = self.github_webhook.logger self.log_prefix: str = self.github_webhook.log_prefix self.repository: Repository = self.github_webhook.repository + self.ctx: WebhookContext | None = github_webhook.ctx self.check_run_handler = CheckRunHandler(github_webhook=self.github_webhook) self.runner_handler = RunnerHandler(github_webhook=self.github_webhook) async def process_push_webhook_data(self) -> None: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'started')} " - f"Starting push webhook processing", # pragma: allowlist secret - ) + if self.ctx: + self.ctx.start_step("push_handler") + tag = re.search(r"^refs/tags/(.+)$", self.hook_data["ref"]) if tag: tag_name = tag.group(1) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'processing')} " - f"Processing tag push: {tag_name}", - ) self.logger.info(f"{self.log_prefix} Processing push for tag: {tag.group(1)}") self.logger.debug(f"{self.log_prefix} Tag: {tag_name}") if self.github_webhook.pypi: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'started')} " - f"Starting PyPI upload for tag: {tag_name}", - ) self.logger.info(f"{self.log_prefix} Processing upload to pypi for tag: {tag_name}") try: await self.upload_to_pypi(tag_name=tag_name) except Exception: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"PyPI upload failed with exception", - ) self.logger.exception(f"{self.log_prefix} PyPI upload failed") if self.github_webhook.build_and_push_container and self.github_webhook.container_release: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'started')} " - f"Starting container build and push for tag: {tag_name}", - ) self.logger.info(f"{self.log_prefix} Processing build and push container for tag: {tag_name}") try: await self.runner_handler.run_build_container(push=True, set_check=False, tag=tag_name) # Note: run_build_container logs completion/failure internally except Exception as ex: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"Container build and push failed with exception", - ) self.logger.exception(f"{self.log_prefix} Container build and push failed: {ex}") - else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'processing')} " - f"Non-tag push detected, skipping processing", - ) + + if self.ctx: + self.ctx.complete_step("push_handler") async def upload_to_pypi(self, tag_name: str) -> None: async def _issue_on_error(_error: str) -> None: @@ -89,10 +68,6 @@ async def _issue_on_error(_error: str) -> None: """, ) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'started')} " - f"Starting PyPI upload process for tag: {tag_name}", - ) self.logger.info(f"{self.log_prefix} Start uploading to pypi") async with self.runner_handler._checkout_worktree(checkout=tag_name) as (success, worktree_path, out, err): @@ -101,10 +76,6 @@ async def _issue_on_error(_error: str) -> None: self.logger.debug(f"{self.log_prefix} Worktree path: {worktree_path}") if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"PyPI upload failed: repository preparation failed", - ) _error = self.check_run_handler.get_check_run_text(out=out, err=err) await _issue_on_error(_error=_error) return @@ -113,20 +84,12 @@ async def _issue_on_error(_error: str) -> None: command=f"uv {uv_cmd_dir} build --sdist --out-dir {_dist_dir}", log_prefix=self.log_prefix ) if not rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"PyPI upload failed: build command failed", - ) _error = self.check_run_handler.get_check_run_text(out=out, err=err) await _issue_on_error(_error=_error) return rc, tar_gz_file, err = await run_command(command=f"ls {_dist_dir}", log_prefix=self.log_prefix) if not rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"PyPI upload failed: listing dist directory failed", - ) _error = self.check_run_handler.get_check_run_text(out=tar_gz_file, err=err) await _issue_on_error(_error=_error) return @@ -144,18 +107,10 @@ async def _issue_on_error(_error: str) -> None: for cmd in commands: rc, out, err = await run_command(command=cmd, log_prefix=self.log_prefix, redact_secrets=[pypi_token]) if not rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'failed')} " - f"PyPI upload failed: command execution failed", - ) _error = self.check_run_handler.get_check_run_text(out=out, err=err) await _issue_on_error(_error=_error) return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('push_processing', 'webhook_event', 'completed')} " - f"PyPI upload completed successfully for tag: {tag_name}", - ) self.logger.info(f"{self.log_prefix} Publish to pypi finished") if self.github_webhook.slack_webhook_url: message: str = f""" diff --git a/webhook_server/libs/handlers/runner_handler.py b/webhook_server/libs/handlers/runner_handler.py index d81c2d80b..cf468d245 100644 --- a/webhook_server/libs/handlers/runner_handler.py +++ b/webhook_server/libs/handlers/runner_handler.py @@ -22,7 +22,7 @@ PYTHON_MODULE_INSTALL_STR, TOX_STR, ) -from webhook_server.utils.helpers import _redact_secrets, format_task_fields, run_command +from webhook_server.utils.helpers import _redact_secrets, run_command from webhook_server.utils.notification_utils import send_slack_message if TYPE_CHECKING: @@ -173,10 +173,6 @@ async def run_tox(self, pull_request: PullRequest) -> None: self.logger.debug(f"{self.log_prefix} Tox not configured for this repository") return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'started')} Starting tox tests execution" - ) - if await self.check_run_handler.is_check_run_in_progress(check_run=TOX_STR): self.logger.debug(f"{self.log_prefix} Check run is in progress, re-running {TOX_STR}.") @@ -185,16 +181,8 @@ async def run_tox(self, pull_request: PullRequest) -> None: ) _tox_tests = self.github_webhook.tox.get(pull_request.base.ref, "") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting tox check status to in-progress", - ) await self.check_run_handler.set_run_tox_check_in_progress() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Preparing repository checkout for tox execution", - ) async with self._checkout_worktree(pull_request=pull_request) as (success, worktree_path, out, err): # Build tox command with worktree path cmd = f"uvx {python_ver} {TOX_STR} --workdir {worktree_path} --root {worktree_path} -c {worktree_path}" @@ -209,17 +197,10 @@ async def run_tox(self, pull_request: PullRequest) -> None: "text": None, } if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} " - f"Repository preparation failed for tox", - ) self.logger.error(f"{self.log_prefix} Repository preparation failed for tox") output["text"] = self.check_run_handler.get_check_run_text(out=out, err=err) return await self.check_run_handler.set_run_tox_check_failure(output=output) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} Executing tox command" - ) rc, out, err = await run_command( command=cmd, log_prefix=self.log_prefix, @@ -229,15 +210,8 @@ async def run_tox(self, pull_request: PullRequest) -> None: output["text"] = self.check_run_handler.get_check_run_text(err=err, out=out) if rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Tox tests completed successfully", - ) return await self.check_run_handler.set_run_tox_check_success(output=output) else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} Tox tests failed" - ) return await self.check_run_handler.set_run_tox_check_failure(output=output) async def run_pre_commit(self, pull_request: PullRequest) -> None: @@ -245,24 +219,11 @@ async def run_pre_commit(self, pull_request: PullRequest) -> None: self.logger.debug(f"{self.log_prefix} Pre-commit not configured for this repository") return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'started')} " - f"Starting pre-commit checks execution", - ) - if await self.check_run_handler.is_check_run_in_progress(check_run=PRE_COMMIT_STR): self.logger.debug(f"{self.log_prefix} Check run is in progress, re-running {PRE_COMMIT_STR}.") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting pre-commit check status to in-progress", - ) await self.check_run_handler.set_run_pre_commit_check_in_progress() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Preparing repository checkout for pre-commit execution", - ) async with self._checkout_worktree(pull_request=pull_request) as (success, worktree_path, out, err): cmd = f" uvx --directory {worktree_path} {PREK_STR} run --all-files" @@ -272,18 +233,10 @@ async def run_pre_commit(self, pull_request: PullRequest) -> None: "text": None, } if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} " - f"Repository preparation failed for pre-commit", - ) self.logger.error(f"{self.log_prefix} Repository preparation failed for pre-commit") output["text"] = self.check_run_handler.get_check_run_text(out=out, err=err) return await self.check_run_handler.set_run_pre_commit_check_failure(output=output) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Executing pre-commit command", - ) rc, out, err = await run_command( command=cmd, log_prefix=self.log_prefix, @@ -293,15 +246,8 @@ async def run_pre_commit(self, pull_request: PullRequest) -> None: output["text"] = self.check_run_handler.get_check_run_text(err=err, out=out) if rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Pre-commit checks completed successfully", - ) return await self.check_run_handler.set_run_pre_commit_check_success(output=output) else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} Pre-commit checks failed" - ) return await self.check_run_handler.set_run_pre_commit_check_failure(output=output) async def run_build_container( @@ -317,10 +263,6 @@ async def run_build_container( if not self.github_webhook.build_and_push_container: return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'started')} Starting container build process", - ) - if ( self.owners_file_handler and reviewed_user @@ -335,10 +277,6 @@ async def run_build_container( if await self.check_run_handler.is_check_run_in_progress(check_run=BUILD_CONTAINER_STR) and not is_merged: self.logger.info(f"{self.log_prefix} Check run is in progress, re-running {BUILD_CONTAINER_STR}.") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting container build check status to in-progress", - ) if set_check: await self.check_run_handler.set_container_build_in_progress() @@ -347,10 +285,6 @@ async def run_build_container( ) no_cache: str = " --no-cache" if is_merged else "" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Preparing repository checkout for container build", - ) async with self._checkout_worktree( pull_request=pull_request, is_merged=is_merged, @@ -382,45 +316,26 @@ async def run_build_container( "text": None, } if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} " - f"Repository preparation failed for container build", - ) output["text"] = self.check_run_handler.get_check_run_text(out=out, err=err) if pull_request and set_check: await self.check_run_handler.set_container_build_failure(output=output) return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Executing container build command", - ) build_rc, build_out, build_err = await self.run_podman_command( command=podman_build_cmd, ) output["text"] = self.check_run_handler.get_check_run_text(err=build_err, out=build_out) if build_rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Container build completed successfully", - ) self.logger.info(f"{self.log_prefix} Done building {_container_repository_and_tag}") if pull_request and set_check: return await self.check_run_handler.set_container_build_success(output=output) else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} Container build failed" - ) self.logger.error(f"{self.log_prefix} Failed to build {_container_repository_and_tag}") if pull_request and set_check: return await self.check_run_handler.set_container_build_failure(output=output) if push and build_rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'started')} " - f"Starting container push to registry", - ) cmd = ( f"podman push --creds " f"{self.github_webhook.container_repository_username}:" @@ -435,10 +350,6 @@ async def run_build_container( ], ) if push_rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Container push completed successfully", - ) push_msg: str = f"New container for {_container_repository_and_tag} published" if pull_request: await asyncio.to_thread(pull_request.create_issue_comment, push_msg) @@ -459,9 +370,6 @@ async def run_build_container( self.logger.info(f"{self.log_prefix} Done push {_container_repository_and_tag}") else: err_msg: str = f"Failed to build and push {_container_repository_and_tag}" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} Container push failed", - ) if pull_request: await asyncio.to_thread(pull_request.create_issue_comment, err_msg) @@ -482,25 +390,11 @@ async def run_install_python_module(self, pull_request: PullRequest) -> None: if not self.github_webhook.pypi: return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'started')} " - f"Starting Python module installation" - ) - if await self.check_run_handler.is_check_run_in_progress(check_run=PYTHON_MODULE_INSTALL_STR): self.logger.info(f"{self.log_prefix} Check run is in progress, re-running {PYTHON_MODULE_INSTALL_STR}.") self.logger.info(f"{self.log_prefix} Installing python module") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting Python module install check status to in-progress", - ) await self.check_run_handler.set_python_module_install_in_progress() - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Preparing repository checkout for Python module installation", - ) async with self._checkout_worktree(pull_request=pull_request) as (success, worktree_path, out, err): output: dict[str, Any] = { "title": "Python module installation", @@ -508,17 +402,9 @@ async def run_install_python_module(self, pull_request: PullRequest) -> None: "text": None, } if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} " - f"Repository preparation failed for Python module installation", - ) output["text"] = self.check_run_handler.get_check_run_text(out=out, err=err) return await self.check_run_handler.set_python_module_install_failure(output=output) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Executing Python module installation command", - ) rc, out, err = await run_command( command=f"uvx pip wheel --no-cache-dir -w {worktree_path}/dist {worktree_path}", log_prefix=self.log_prefix, @@ -528,27 +414,14 @@ async def run_install_python_module(self, pull_request: PullRequest) -> None: output["text"] = self.check_run_handler.get_check_run_text(err=err, out=out) if rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Python module installation completed successfully", - ) return await self.check_run_handler.set_python_module_install_success(output=output) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'failed')} " - f"Python module installation failed" - ) return await self.check_run_handler.set_python_module_install_failure(output=output) async def run_conventional_title_check(self, pull_request: PullRequest) -> None: if not self.github_webhook.conventional_title: return - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'started')} Starting conventional title check" - ) - output: dict[str, str] = { "title": "Conventional Title", "summary": "PR title follows Conventional Commits format", @@ -562,27 +435,14 @@ async def run_conventional_title_check(self, pull_request: PullRequest) -> None: if await self.check_run_handler.is_check_run_in_progress(check_run=CONVENTIONAL_TITLE_STR): self.logger.info(f"{self.log_prefix} Check run is in progress, re-running {CONVENTIONAL_TITLE_STR}.") - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting conventional title check status to in-progress", - ) await self.check_run_handler.set_conventional_title_in_progress() allowed_names = [name.strip() for name in self.github_webhook.conventional_title.split(",") if name.strip()] title = pull_request.title self.logger.debug(f"{self.log_prefix} Conventional title check for title: {title}, allowed: {allowed_names}") if any([re.match(rf"^{re.escape(_name)}(\([^)]+\))?!?: .+", title) for _name in allowed_names]): - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Conventional title check completed successfully", - ) await self.check_run_handler.set_conventional_title_success(output=output) else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'failed')} " - f"Conventional title check failed" - ) output["title"] = "❌ Conventional Title" output["summary"] = "Conventional Commit Format Violation" output["text"] = f"""## Conventional Commits Validation Failed @@ -625,30 +485,15 @@ async def is_branch_exists(self, branch: str) -> Branch: async def cherry_pick(self, pull_request: PullRequest, target_branch: str, reviewed_user: str = "") -> None: requested_by = reviewed_user or "by target-branch label" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'started')} " - f"Starting cherry-pick process to {target_branch}" - ) self.logger.info(f"{self.log_prefix} Cherry-pick requested by user: {requested_by}") new_branch_name = f"{CHERRY_PICKED_LABEL_PREFIX}-{pull_request.head.ref}-{shortuuid.uuid()[:5]}" if not await self.is_branch_exists(branch=target_branch): err_msg = f"cherry-pick failed: {target_branch} does not exists" - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'failed')} " - f"Cherry-pick failed: target branch does not exist" - ) self.logger.error(err_msg) await asyncio.to_thread(pull_request.create_issue_comment, err_msg) else: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'processing')} " - f"Setting cherry-pick check status to in-progress" - ) await self.check_run_handler.set_cherry_pick_in_progress() commit_hash = pull_request.merge_commit_sha commit_msg_striped = pull_request.title.replace("'", "") @@ -677,18 +522,9 @@ async def cherry_pick(self, pull_request: PullRequest, target_branch: str, revie "text": None, } if not success: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'failed')} " - f"Repository preparation failed for cherry-pick", - ) output["text"] = self.check_run_handler.get_check_run_text(out=out, err=err) await self.check_run_handler.set_cherry_pick_failure(output=output) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'processing')} " - f"Executing cherry-pick commands" - ) for cmd in commands: rc, out, err = await run_command( command=cmd, @@ -697,11 +533,6 @@ async def cherry_pick(self, pull_request: PullRequest, target_branch: str, revie mask_sensitive=self.github_webhook.mask_sensitive, ) if not rc: - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} " - f"{format_task_fields('runner', 'ci_check', 'failed')} " - f"Cherry-pick command failed" - ) output["text"] = self.check_run_handler.get_check_run_text(err=err, out=out) await self.check_run_handler.set_cherry_pick_failure(output=output) redacted_out = _redact_secrets( @@ -734,10 +565,6 @@ async def cherry_pick(self, pull_request: PullRequest, target_branch: str, revie output["text"] = self.check_run_handler.get_check_run_text(err=err, out=out) - self.logger.step( # type: ignore[attr-defined] - f"{self.log_prefix} {format_task_fields('runner', 'ci_check', 'completed')} " - f"Cherry-pick completed successfully", - ) await self.check_run_handler.set_cherry_pick_success(output=output) await asyncio.to_thread( pull_request.create_issue_comment, f"Cherry-picked PR {pull_request.title} into {target_branch}" diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index fa7c03da1..fb66ff72b 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -2,6 +2,7 @@ import asyncio import datetime +import json import re from collections.abc import AsyncGenerator from dataclasses import dataclass @@ -309,6 +310,134 @@ def parse_log_file(self, file_path: Path) -> list[LogEntry]: return entries + def parse_json_log_entry(self, json_line: str) -> LogEntry | None: + """Parse a single JSON log line into a LogEntry object. + + Args: + json_line: Raw JSON line string from webhooks_*.json files + + Returns: + LogEntry object if parsing successful, None otherwise + """ + if not json_line.strip(): + return None + + try: + data = json.loads(json_line) + except json.JSONDecodeError: + return None + + # Parse timestamp from timing.started_at + try: + timing = data.get("timing", {}) + timestamp_str = timing.get("started_at", "") + if not timestamp_str: + return None + timestamp = datetime.datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=datetime.UTC) + except (ValueError, TypeError): + return None + + # Extract PR number from pr object + pr_data = data.get("pr") or {} + pr_number = pr_data.get("number") if pr_data else None + + # Create summary message + message = self._create_json_summary_message(data) + + return LogEntry( + timestamp=timestamp, + level="INFO", # JSON logs don't have levels, default to INFO + logger_name="GithubWebhook", + message=message, + hook_id=data.get("hook_id"), + event_type=data.get("event_type"), + repository=data.get("repository"), + pr_number=pr_number, + github_user=data.get("api_user"), + task_id=None, # Not used in JSON format + task_type=None, # Not used in JSON format + task_status="completed" if data.get("success") else "failed", + token_spend=data.get("token_spend"), + ) + + def _create_json_summary_message(self, data: dict[str, Any]) -> str: + """Create a summary message from JSON log data. + + Args: + data: Parsed JSON log data + + Returns: + Human-readable summary message + """ + parts = [] + + event_type = data.get("event_type", "unknown") + action = data.get("action", "") + repo = data.get("repository", "unknown") + + if action: + parts.append(f"{event_type}/{action}") + else: + parts.append(event_type) + + parts.append(f"for {repo}") + + pr_data = data.get("pr") + if pr_data and pr_data.get("number"): + parts.append(f"PR #{pr_data['number']}") + + if data.get("success"): + parts.append("- completed successfully") + else: + parts.append("- failed") + if data.get("error", {}).get("type"): + parts.append(f"({data['error']['type']})") + + return " ".join(parts) + + def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: + """Parse a JSON log file and return list of LogEntry objects. + + Args: + file_path: Path to the webhooks_*.json file + + Returns: + List of successfully parsed LogEntry objects + """ + entries: list[LogEntry] = [] + + try: + with open(file_path, encoding="utf-8") as f: + for line in f: + entry = self.parse_json_log_entry(line) + if entry: + entries.append(entry) + except OSError as e: + self.logger.error(f"Failed to read JSON log file {file_path}: {e}") + except UnicodeDecodeError as e: + self.logger.error(f"Failed to decode JSON log file {file_path}: {e}") + + return entries + + def get_raw_json_entry(self, json_line: str) -> dict[str, Any] | None: + """Parse a JSON log line and return the raw dictionary. + + Args: + json_line: Raw JSON line string + + Returns: + Parsed JSON dictionary, or None if parsing fails + """ + if not json_line.strip(): + return None + + try: + return json.loads(json_line) + except json.JSONDecodeError: + return None + async def tail_log_file(self, file_path: Path, follow: bool = True) -> AsyncGenerator[LogEntry]: """ Tail a log file and yield new LogEntry objects as they are added. diff --git a/webhook_server/tests/test_check_run_handler.py b/webhook_server/tests/test_check_run_handler.py index 07d332885..d420f9d24 100644 --- a/webhook_server/tests/test_check_run_handler.py +++ b/webhook_server/tests/test_check_run_handler.py @@ -46,6 +46,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.token = "test-token" mock_webhook.container_repository_username = "test-user" mock_webhook.container_repository_password = "test-pass" # pragma: allowlist secret + mock_webhook.ctx = None return mock_webhook @pytest.fixture @@ -85,8 +86,6 @@ async def test_process_pull_request_check_run_webhook_data_not_completed( result = await check_run_handler.process_pull_request_check_run_webhook_data() assert result is False - # Verify completion log was called (skipping is acceptable) - assert check_run_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_process_pull_request_check_run_webhook_data_can_be_merged( @@ -100,8 +99,6 @@ async def test_process_pull_request_check_run_webhook_data_can_be_merged( result = await check_run_handler.process_pull_request_check_run_webhook_data() assert result is False - # Verify completion log was called - assert check_run_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_process_pull_request_check_run_webhook_data_completed_normal( @@ -115,8 +112,6 @@ async def test_process_pull_request_check_run_webhook_data_completed_normal( result = await check_run_handler.process_pull_request_check_run_webhook_data() assert result is True - # Verify completion log was called - assert check_run_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_set_verify_check_queued(self, check_run_handler: CheckRunHandler) -> None: @@ -414,11 +409,11 @@ async def test_set_check_run_status_success(self, check_run_handler: CheckRunHan with patch.object( check_run_handler.github_webhook.repository_by_github_app, "create_check_run", return_value=None ): - with patch.object(check_run_handler.github_webhook.logger, "success") as mock_success: + with patch.object(check_run_handler.github_webhook.logger, "info") as mock_info: await check_run_handler.set_check_run_status( check_run="test-check", status="queued", conclusion="", output=None ) - mock_success.assert_not_called() # Only called for certain conclusions + mock_info.assert_not_called() # Only called for certain conclusions @pytest.mark.asyncio async def test_set_check_run_status_with_conclusion(self, check_run_handler: CheckRunHandler) -> None: @@ -426,11 +421,11 @@ async def test_set_check_run_status_with_conclusion(self, check_run_handler: Che with patch.object( check_run_handler.github_webhook.repository_by_github_app, "create_check_run", return_value=None ): - with patch.object(check_run_handler.github_webhook.logger, "success") as mock_success: + with patch.object(check_run_handler.github_webhook.logger, "info") as mock_info: await check_run_handler.set_check_run_status( check_run="test-check", status="", conclusion="success", output=None ) - mock_success.assert_called_once() + mock_info.assert_called_once() @pytest.mark.asyncio async def test_set_check_run_status_with_output(self, check_run_handler: CheckRunHandler) -> None: @@ -438,12 +433,12 @@ async def test_set_check_run_status_with_output(self, check_run_handler: CheckRu with patch.object( check_run_handler.github_webhook.repository_by_github_app, "create_check_run", return_value=None ): - with patch.object(check_run_handler.github_webhook.logger, "success") as mock_success: + with patch.object(check_run_handler.github_webhook.logger, "info") as mock_info: output = {"title": "Test", "summary": "Summary"} await check_run_handler.set_check_run_status( check_run="test-check", status="queued", conclusion="", output=output ) - mock_success.assert_not_called() + mock_info.assert_not_called() @pytest.mark.asyncio async def test_set_check_run_status_exception_handling(self, check_run_handler: CheckRunHandler) -> None: @@ -1078,9 +1073,6 @@ async def test_check_run_action_not_completed_skips_clone( # Verify: Result is None (skipped processing) assert result is None - # Verify: Logger step was called with skipped status - assert github_webhook.logger.step.called - @pytest.mark.asyncio @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") @patch("webhook_server.libs.github_api.get_repository_github_app_api") @@ -1150,9 +1142,6 @@ async def test_can_be_merged_non_success_skips_clone( # Verify: Result is None (skipped processing) assert result is None - # Verify: Logger step was called with skipped status - assert github_webhook.logger.step.called - @pytest.mark.asyncio @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") @patch("webhook_server.libs.github_api.get_repository_github_app_api") @@ -1237,9 +1226,6 @@ async def test_check_run_completed_normal_clones_repository( # Verify: Result is None (successful processing) assert result is None - # Verify: Logger step was called - assert github_webhook.logger.step.called - @pytest.mark.asyncio @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") @patch("webhook_server.libs.github_api.get_repository_github_app_api") @@ -1317,6 +1303,3 @@ async def test_can_be_merged_success_clones_repository( # Verify: Result is None (successful processing) assert result is None - - # Verify: Logger step was called - assert github_webhook.logger.step.called diff --git a/webhook_server/tests/test_context.py b/webhook_server/tests/test_context.py new file mode 100644 index 000000000..3627e0e57 --- /dev/null +++ b/webhook_server/tests/test_context.py @@ -0,0 +1,589 @@ +"""Tests for webhook_server/utils/context.py. + +Tests WebhookContext dataclass and module-level context management functions. +""" + +from datetime import UTC, datetime, timedelta +from unittest.mock import patch + +import pytest + +from webhook_server.utils.context import ( + WebhookContext, + clear_context, + create_context, + get_context, +) + + +@pytest.fixture +def mock_datetime(): + """Mock datetime.now(UTC) for deterministic tests.""" + base_time = datetime(2024, 1, 15, 10, 30, 0, tzinfo=UTC) + + def mock_now(tz=None): + if tz == UTC: + return base_time + return datetime.now(tz) + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now = mock_now + mock_dt.side_effect = lambda *args, **kwargs: datetime(*args, **kwargs) + yield base_time + + +@pytest.fixture(autouse=True) +def cleanup_context(): + """Clean up context after each test.""" + yield + clear_context() + + +class TestWebhookContext: + """Tests for WebhookContext dataclass.""" + + def test_initialization_with_all_parameters(self, mock_datetime): + """Test WebhookContext initialization with all parameters.""" + ctx = WebhookContext( + hook_id="test-hook-123", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + action="opened", + sender="testuser", + pr_number=42, + pr_title="Test PR", + pr_author="prauthor", + api_user="api-bot", + token_spend=10, + initial_rate_limit=5000, + final_rate_limit=4990, + ) + + assert ctx.hook_id == "test-hook-123" + assert ctx.event_type == "pull_request" + assert ctx.repository == "owner/repo" + assert ctx.repository_full_name == "owner/repo" + assert ctx.action == "opened" + assert ctx.sender == "testuser" + assert ctx.pr_number == 42 + assert ctx.pr_title == "Test PR" + assert ctx.pr_author == "prauthor" + assert ctx.api_user == "api-bot" + assert ctx.started_at == mock_datetime + assert ctx.completed_at is None + assert ctx.workflow_steps == {} + assert ctx._step_start_times == {} + assert ctx.token_spend == 10 + assert ctx.initial_rate_limit == 5000 + assert ctx.final_rate_limit == 4990 + assert ctx.success is True + assert ctx.error is None + + def test_initialization_with_minimal_parameters(self, mock_datetime): + """Test WebhookContext initialization with minimal required parameters.""" + ctx = WebhookContext( + hook_id="test-hook-456", + event_type="check_run", + repository="org/project", + repository_full_name="org/project", + ) + + assert ctx.hook_id == "test-hook-456" + assert ctx.event_type == "check_run" + assert ctx.repository == "org/project" + assert ctx.repository_full_name == "org/project" + assert ctx.action is None + assert ctx.sender is None + assert ctx.pr_number is None + assert ctx.pr_title is None + assert ctx.pr_author is None + assert ctx.api_user == "" + assert ctx.started_at == mock_datetime + assert ctx.success is True + + def test_start_step_creates_step_with_correct_data(self, mock_datetime): + """Test start_step() creates step with timestamp and status 'started'.""" + ctx = WebhookContext( + hook_id="hook-1", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("clone_repository", branch="main", url="https://github.com/owner/repo") + + assert "clone_repository" in ctx.workflow_steps + step = ctx.workflow_steps["clone_repository"] + assert step["timestamp"] == mock_datetime.isoformat() + assert step["status"] == "started" + assert step["error"] is None + assert step["branch"] == "main" + assert step["url"] == "https://github.com/owner/repo" + assert ctx._step_start_times["clone_repository"] == mock_datetime + + def test_start_step_without_additional_data(self, mock_datetime): + """Test start_step() with no additional metadata.""" + ctx = WebhookContext( + hook_id="hook-2", + event_type="issue_comment", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("assign_reviewers") + + assert "assign_reviewers" in ctx.workflow_steps + step = ctx.workflow_steps["assign_reviewers"] + assert step["timestamp"] == mock_datetime.isoformat() + assert step["status"] == "started" + assert step["error"] is None + assert len(step) == 3 # timestamp, status, error + + def test_complete_step_updates_step_with_completed_status_and_duration(self, mock_datetime): + """Test complete_step() updates step with status 'completed' and duration.""" + ctx = WebhookContext( + hook_id="hook-3", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Start step + with patch("webhook_server.utils.context.datetime") as mock_dt: + start_time = datetime(2024, 1, 15, 10, 30, 0, tzinfo=UTC) + mock_dt.now.return_value = start_time + ctx.start_step("build_container") + + # Complete step 2.5 seconds later + with patch("webhook_server.utils.context.datetime") as mock_dt: + end_time = datetime(2024, 1, 15, 10, 30, 2, 500000, tzinfo=UTC) + mock_dt.now.return_value = end_time + ctx.complete_step("build_container", image_tag="v1.2.3", size_mb=150) + + step = ctx.workflow_steps["build_container"] + assert step["status"] == "completed" + assert step["duration_ms"] == 2500 + assert step["error"] is None + assert step["image_tag"] == "v1.2.3" + assert step["size_mb"] == 150 + + def test_complete_step_on_step_that_was_not_started(self, mock_datetime): + """Test complete_step() on a step that wasn't started (edge case).""" + ctx = WebhookContext( + hook_id="hook-4", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Complete step without starting it first + ctx.complete_step("never_started", result="ok") + + assert "never_started" in ctx.workflow_steps + step = ctx.workflow_steps["never_started"] + assert step["status"] == "completed" + assert step["duration_ms"] is None # No start time, so duration is None + assert step["error"] is None + assert step["result"] == "ok" + assert step["timestamp"] == mock_datetime.isoformat() + + def test_fail_step_sets_error_data_at_step_level(self, mock_datetime): + """Test fail_step() sets error data at step level.""" + ctx = WebhookContext( + hook_id="hook-5", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Start step + with patch("webhook_server.utils.context.datetime") as mock_dt: + start_time = datetime(2024, 1, 15, 10, 30, 0, tzinfo=UTC) + mock_dt.now.return_value = start_time + ctx.start_step("run_tests") + + # Fail step 1.5 seconds later + with patch("webhook_server.utils.context.datetime") as mock_dt: + end_time = datetime(2024, 1, 15, 10, 30, 1, 500000, tzinfo=UTC) + mock_dt.now.return_value = end_time + + exception = ValueError("Test failed: assertion error") + traceback_str = "Traceback (most recent call last):\n File test.py line 42\nValueError: Test failed" + + ctx.fail_step( + "run_tests", + exception=exception, + traceback_str=traceback_str, + failed_test="test_authentication", + ) + + step = ctx.workflow_steps["run_tests"] + assert step["status"] == "failed" + assert step["duration_ms"] == 1500 + assert step["error"] is not None + assert step["error"]["type"] == "ValueError" + assert step["error"]["message"] == "Test failed: assertion error" + assert step["error"]["traceback"] == traceback_str + assert step["failed_test"] == "test_authentication" + + def test_fail_step_sets_error_at_top_level_and_success_false(self): + """Test fail_step() sets error at top level AND sets success=False.""" + ctx = WebhookContext( + hook_id="hook-6", + event_type="check_run", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("deploy") + + exception = RuntimeError("Deployment failed: connection timeout") + traceback_str = "Traceback (most recent call last):\n File deploy.py line 100\nRuntimeError" + + ctx.fail_step("deploy", exception=exception, traceback_str=traceback_str) + + # Top-level error set + assert ctx.error is not None + assert ctx.error["type"] == "RuntimeError" + assert ctx.error["message"] == "Deployment failed: connection timeout" + assert ctx.error["traceback"] == traceback_str + + # Success flag set to False + assert ctx.success is False + + def test_fail_step_on_step_that_was_not_started(self, mock_datetime): + """Test fail_step() on a step that wasn't started (edge case).""" + ctx = WebhookContext( + hook_id="hook-7", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + exception = RuntimeError("Missing config key") + traceback_str = "Traceback (most recent call last):\n File config.py\nRuntimeError" + + ctx.fail_step("never_started", exception=exception, traceback_str=traceback_str) + + assert "never_started" in ctx.workflow_steps + step = ctx.workflow_steps["never_started"] + assert step["status"] == "failed" + assert step["duration_ms"] is None # No start time + assert step["error"]["type"] == "RuntimeError" + assert step["error"]["message"] == "Missing config key" + assert ctx.success is False + + def test_to_dict_returns_correct_structure(self, mock_datetime): + """Test to_dict() returns correct structure with all fields.""" + ctx = WebhookContext( + hook_id="hook-8", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + action="opened", + sender="testuser", + api_user="bot-user", + token_spend=15, + initial_rate_limit=5000, + final_rate_limit=4985, + ) + + # Set completed_at + completed_time = mock_datetime + timedelta(seconds=5) + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = completed_time + ctx.completed_at = completed_time + + ctx.start_step("test_step") + ctx.complete_step("test_step", result="success") + + result = ctx.to_dict() + + assert result["hook_id"] == "hook-8" + assert result["event_type"] == "pull_request" + assert result["action"] == "opened" + assert result["sender"] == "testuser" + assert result["repository"] == "owner/repo" + assert result["repository_full_name"] == "owner/repo" + assert result["pr"] is None # No pr_number set + assert result["api_user"] == "bot-user" + + # Timing + assert result["timing"]["started_at"] == mock_datetime.isoformat() + assert result["timing"]["completed_at"] == completed_time.isoformat() + assert result["timing"]["duration_ms"] == 5000 + + # Workflow steps + assert "test_step" in result["workflow_steps"] + + # Metrics + assert result["token_spend"] == 15 + assert result["initial_rate_limit"] == 5000 + assert result["final_rate_limit"] == 4985 + + # Status + assert result["success"] is True + assert result["error"] is None + + def test_to_dict_with_pr_info(self, mock_datetime): + """Test to_dict() with PR info (pr_number set).""" + ctx = WebhookContext( + hook_id="hook-9", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + pr_number=123, + pr_title="Add new feature", + pr_author="contributor", + ) + + result = ctx.to_dict() + + assert result["pr"] is not None + assert result["pr"]["number"] == 123 + assert result["pr"]["title"] == "Add new feature" + assert result["pr"]["author"] == "contributor" + + def test_to_dict_without_pr_info(self, mock_datetime): + """Test to_dict() without PR info (pr_number is None).""" + ctx = WebhookContext( + hook_id="hook-10", + event_type="check_run", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + result = ctx.to_dict() + + assert result["pr"] is None + + def test_to_dict_without_completed_at(self, mock_datetime): + """Test to_dict() when completed_at is None.""" + ctx = WebhookContext( + hook_id="hook-11", + event_type="issue_comment", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + result = ctx.to_dict() + + assert result["timing"]["started_at"] == mock_datetime.isoformat() + assert result["timing"]["completed_at"] is None + assert result["timing"]["duration_ms"] is None + + def test_to_dict_with_error(self, mock_datetime): + """Test to_dict() with error information.""" + ctx = WebhookContext( + hook_id="hook-12", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + exception = ValueError("Something went wrong") + traceback_str = "Traceback..." + ctx.fail_step("failed_step", exception=exception, traceback_str=traceback_str) + + result = ctx.to_dict() + + assert result["success"] is False + assert result["error"] is not None + assert result["error"]["type"] == "ValueError" + assert result["error"]["message"] == "Something went wrong" + + +class TestContextManagement: + """Tests for module-level context management functions.""" + + def test_create_context_creates_and_stores_context(self, mock_datetime): + """Test create_context() creates and stores context in ContextVar.""" + ctx = create_context( + hook_id="delivery-123", + event_type="pull_request", + repository="myorg/myrepo", + repository_full_name="myorg/myrepo", + action="synchronize", + sender="devuser", + api_user="github-bot", + ) + + # Verify returned context + assert isinstance(ctx, WebhookContext) + assert ctx.hook_id == "delivery-123" + assert ctx.event_type == "pull_request" + assert ctx.repository == "myorg/myrepo" + assert ctx.repository_full_name == "myorg/myrepo" + assert ctx.action == "synchronize" + assert ctx.sender == "devuser" + assert ctx.api_user == "github-bot" + assert ctx.started_at == mock_datetime + + # Verify it's stored in ContextVar + stored_ctx = get_context() + assert stored_ctx is ctx + + def test_create_context_with_minimal_parameters(self): + """Test create_context() with minimal required parameters.""" + ctx = create_context( + hook_id="delivery-456", + event_type="check_run", + repository="org/project", + repository_full_name="org/project", + ) + + assert ctx.hook_id == "delivery-456" + assert ctx.event_type == "check_run" + assert ctx.repository == "org/project" + assert ctx.repository_full_name == "org/project" + assert ctx.action is None + assert ctx.sender is None + assert ctx.api_user == "" + + def test_get_context_retrieves_stored_context(self): + """Test get_context() retrieves stored context.""" + # Create context + created_ctx = create_context( + hook_id="delivery-789", + event_type="issue_comment", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Retrieve context + retrieved_ctx = get_context() + + assert retrieved_ctx is created_ctx + assert retrieved_ctx.hook_id == "delivery-789" + assert retrieved_ctx.event_type == "issue_comment" + + def test_get_context_returns_none_when_no_context_set(self): + """Test get_context() returns None when no context set.""" + # Clear any existing context + clear_context() + + ctx = get_context() + assert ctx is None + + def test_clear_context_removes_context_from_contextvar(self): + """Test clear_context() removes context from ContextVar.""" + # Create context + create_context( + hook_id="delivery-999", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Verify context exists + assert get_context() is not None + + # Clear context + clear_context() + + # Verify context is gone + assert get_context() is None + + def test_context_isolation_between_create_calls(self): + """Test that creating new context replaces old context.""" + # Create first context + ctx1 = create_context( + hook_id="first-delivery", + event_type="pull_request", + repository="owner/repo1", + repository_full_name="owner/repo1", + ) + + assert get_context() is ctx1 + + # Create second context + ctx2 = create_context( + hook_id="second-delivery", + event_type="check_run", + repository="owner/repo2", + repository_full_name="owner/repo2", + ) + + # Second context should replace first + current_ctx = get_context() + assert current_ctx is ctx2 + assert current_ctx is not ctx1 + assert current_ctx.hook_id == "second-delivery" + + +class TestWorkflowStepSequence: + """Integration tests for complete workflow step sequences.""" + + def test_complete_workflow_with_successful_steps(self): + """Test a complete workflow with multiple successful steps.""" + ctx = create_context( + hook_id="workflow-1", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + action="opened", + ) + + # Step 1: Clone + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + ctx.start_step("clone_repository", branch="main") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 5, tzinfo=UTC) + ctx.complete_step("clone_repository", commit_sha="abc123") + + # Step 2: Build + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 5, tzinfo=UTC) + ctx.start_step("build_container") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 15, tzinfo=UTC) + ctx.complete_step("build_container", image="myimage:latest") + + # Verify workflow + assert len(ctx.workflow_steps) == 2 + assert ctx.workflow_steps["clone_repository"]["status"] == "completed" + assert ctx.workflow_steps["clone_repository"]["duration_ms"] == 5000 + assert ctx.workflow_steps["build_container"]["status"] == "completed" + assert ctx.workflow_steps["build_container"]["duration_ms"] == 10000 + assert ctx.success is True + assert ctx.error is None + + def test_workflow_with_failed_step(self): + """Test workflow with a failed step.""" + ctx = create_context( + hook_id="workflow-2", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Step 1: Success + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + ctx.start_step("validate_config") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 1, tzinfo=UTC) + ctx.complete_step("validate_config") + + # Step 2: Failure + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 1, tzinfo=UTC) + ctx.start_step("run_tests") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = datetime(2024, 1, 15, 10, 0, 10, tzinfo=UTC) + exception = AssertionError("Test assertion failed") + ctx.fail_step("run_tests", exception=exception, traceback_str="Traceback...") + + # Verify workflow state + assert len(ctx.workflow_steps) == 2 + assert ctx.workflow_steps["validate_config"]["status"] == "completed" + assert ctx.workflow_steps["run_tests"]["status"] == "failed" + assert ctx.success is False + assert ctx.error is not None + assert ctx.error["type"] == "AssertionError" diff --git a/webhook_server/tests/test_github_api.py b/webhook_server/tests/test_github_api.py index 2ff8f5adf..89ce2dd7e 100644 --- a/webhook_server/tests/test_github_api.py +++ b/webhook_server/tests/test_github_api.py @@ -1495,9 +1495,9 @@ async def test_process_push_event_deletion( ) # Verify completion log with "deletion event (skipped)" message - success_calls = [str(call) for call in mock_logger.success.call_args_list] - assert any("deletion event (skipped)" in call.lower() for call in success_calls), ( - f"Expected 'deletion event (skipped)' in success logs. Got: {success_calls}" + info_completion_calls = [str(call) for call in mock_logger.info.call_args_list] + assert any("deletion event (skipped)" in call.lower() for call in info_completion_calls), ( + f"Expected 'deletion event (skipped)' in info logs. Got: {info_completion_calls}" ) @patch.dict(os.environ, {"WEBHOOK_SERVER_DATA_DIR": "webhook_server/tests/manifests"}) diff --git a/webhook_server/tests/test_helpers_sanitization.py b/webhook_server/tests/test_helpers_sanitization.py index b8dc6fe47..0fe51a9b9 100644 --- a/webhook_server/tests/test_helpers_sanitization.py +++ b/webhook_server/tests/test_helpers_sanitization.py @@ -10,7 +10,6 @@ _redact_secrets, _sanitize_log_value, _truncate_output, - format_task_fields, run_command, strip_ansi_codes, ) @@ -87,82 +86,6 @@ def test_clean_value_unchanged_content(self) -> None: assert result == value # No brackets, newlines, or returns to sanitize -class TestFormatTaskFields: - """Test the format_task_fields function with sanitization.""" - - def test_format_task_fields_normal(self) -> None: - """Test normal task field formatting.""" - result = format_task_fields( - task_id="check_tox", - task_type="ci_check", - task_status="started", - ) - assert result == "[task_id=check_tox] [task_type=ci_check] [task_status=started]" - - def test_format_task_fields_with_injection(self) -> None: - """Test task field formatting with injection attempt.""" - # Try to inject additional fields via bracket manipulation - result = format_task_fields( - task_id="normal] [task_id=injected", - task_type="ci_check", - task_status="started", - ) - # Brackets should be escaped to prevent injection - assert result == "[task_id=normal\\] \\[task_id=injected] [task_type=ci_check] [task_status=started]" - - def test_format_task_fields_with_newlines(self) -> None: - """Test task field formatting with newline injection attempt.""" - result = format_task_fields( - task_id="check_tox\nFAKE_LOG_ENTRY", - task_type="ci_check", - task_status="started", - ) - # Newlines should be replaced with spaces - assert "\n" not in result - assert result == "[task_id=check_tox FAKE_LOG_ENTRY] [task_type=ci_check] [task_status=started]" - - def test_format_task_fields_with_tabs_and_control_chars(self) -> None: - """Test task field formatting with tabs and control characters.""" - result = format_task_fields( - task_id="check\ttox\x00test", - task_type="ci_check", - task_status="started", - ) - # Note: Current implementation preserves tabs and control chars (only sanitizes \n, \r, and brackets) - # This test documents current behavior - assert isinstance(result, str) - assert "task_id=" in result - assert "task_type=ci_check" in result - assert "task_status=started" in result - - def test_format_task_fields_partial(self) -> None: - """Test formatting with only some fields provided.""" - result = format_task_fields(task_id="check_tox") - assert result == "[task_id=check_tox]" - - result = format_task_fields(task_type="ci_check", task_status="started") - assert result == "[task_type=ci_check] [task_status=started]" - - def test_format_task_fields_empty(self) -> None: - """Test formatting with no fields provided.""" - result = format_task_fields() - assert result == "" - - def test_format_task_fields_all_injections(self) -> None: - """Test formatting with injection attempts in all fields.""" - result = format_task_fields( - task_id="id]\n[fake=field", - task_type="type]\r\n[fake=log", - task_status="status[bracket]test", - ) - # All dangerous characters should be sanitized - assert "\n" not in result - assert "\r" not in result - # Brackets should be escaped - assert "\\[" in result - assert "\\]" in result - - class TestRedactSecrets: """Test the _redact_secrets function.""" diff --git a/webhook_server/tests/test_issue_comment_handler.py b/webhook_server/tests/test_issue_comment_handler.py index 183cdd1f0..7e6de97fd 100644 --- a/webhook_server/tests/test_issue_comment_handler.py +++ b/webhook_server/tests/test_issue_comment_handler.py @@ -40,6 +40,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.issue_url_for_welcome_msg = "welcome-message-url" mock_webhook.build_and_push_container = True mock_webhook.current_pull_request_supported_retest = [TOX_STR, "pre-commit"] + mock_webhook.ctx = None return mock_webhook @pytest.fixture @@ -63,10 +64,6 @@ async def test_process_comment_webhook_data_edited_action(self, issue_comment_ha with patch.object(issue_comment_handler, "user_commands") as mock_user_commands: await issue_comment_handler.process_comment_webhook_data(Mock()) mock_user_commands.assert_not_called() - # Verify completion log was emitted with task_status='completed' - assert any( - "task_status=completed" in call.args[0] for call in issue_comment_handler.logger.step.call_args_list - ), "Expected a completion log with task_status='completed'" @pytest.mark.asyncio async def test_process_comment_webhook_data_deleted_action( @@ -78,10 +75,6 @@ async def test_process_comment_webhook_data_deleted_action( with patch.object(issue_comment_handler, "user_commands") as mock_user_commands: await issue_comment_handler.process_comment_webhook_data(Mock()) mock_user_commands.assert_not_called() - # Verify completion log was emitted with task_status='completed' - assert any( - "task_status=completed" in call.args[0] for call in issue_comment_handler.logger.step.call_args_list - ), "Expected a completion log with task_status='completed'" @pytest.mark.asyncio async def test_process_comment_webhook_data_welcome_message( @@ -93,10 +86,6 @@ async def test_process_comment_webhook_data_welcome_message( with patch.object(issue_comment_handler, "user_commands") as mock_user_commands: await issue_comment_handler.process_comment_webhook_data(Mock()) mock_user_commands.assert_not_called() - # Verify completion log was emitted with task_status='completed' - assert any( - "task_status=completed" in call.args[0] for call in issue_comment_handler.logger.step.call_args_list - ), "Expected a completion log with task_status='completed'" @pytest.mark.asyncio async def test_process_comment_webhook_data_normal_comment( @@ -108,10 +97,6 @@ async def test_process_comment_webhook_data_normal_comment( with patch.object(issue_comment_handler, "user_commands") as mock_user_commands: await issue_comment_handler.process_comment_webhook_data(Mock()) mock_user_commands.assert_called_once() - # Verify completion log was emitted with task_status='completed' - assert any( - "task_status=completed" in call.args[0] for call in issue_comment_handler.logger.step.call_args_list - ), "Expected a completion log with task_status='completed'" @pytest.mark.asyncio async def test_process_comment_webhook_data_no_commands(self, issue_comment_handler: IssueCommentHandler) -> None: @@ -121,10 +106,6 @@ async def test_process_comment_webhook_data_no_commands(self, issue_comment_hand with patch.object(issue_comment_handler, "user_commands") as mock_user_commands: await issue_comment_handler.process_comment_webhook_data(Mock()) mock_user_commands.assert_not_called() - # Verify completion log was emitted with task_status='completed' - assert any( - "task_status=completed" in call.args[0] for call in issue_comment_handler.logger.step.call_args_list - ), "Expected a completion log with task_status='completed'" @pytest.mark.asyncio async def test_process_comment_webhook_data_multiple_commands( diff --git a/webhook_server/tests/test_labels_handler.py b/webhook_server/tests/test_labels_handler.py index 9276bd841..a8c2df4b7 100644 --- a/webhook_server/tests/test_labels_handler.py +++ b/webhook_server/tests/test_labels_handler.py @@ -50,6 +50,7 @@ def mock_github_webhook(self) -> Mock: # Configure config.get_value to return None for pr-size-thresholds by default # This ensures existing tests use static defaults webhook.config.get_value.return_value = None + webhook.ctx = None return webhook @pytest.fixture @@ -134,8 +135,6 @@ async def test_add_label_success(self, labels_handler: LabelsHandler, mock_pull_ ): await labels_handler._add_label(mock_pull_request, "test-label") mock_pull_request.add_to_labels.assert_called_once_with("test-label") - # Verify completion log was called - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_add_label_too_long(self, labels_handler: LabelsHandler, mock_pull_request: Mock) -> None: @@ -144,8 +143,6 @@ async def test_add_label_too_long(self, labels_handler: LabelsHandler, mock_pull await labels_handler._add_label(mock_pull_request, long_label) # Verify label was not added mock_pull_request.add_to_labels.assert_not_called() - # Verify completion log was called (label too long is acceptable) - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_add_label_already_exists(self, labels_handler: LabelsHandler, mock_pull_request: Mock) -> None: @@ -154,8 +151,6 @@ async def test_add_label_already_exists(self, labels_handler: LabelsHandler, moc await labels_handler._add_label(mock_pull_request, "existing-label") # Verify label was not added (already exists) mock_pull_request.add_to_labels.assert_not_called() - # Verify completion log was called (label already exists is acceptable) - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_add_label_static_label(self, labels_handler: LabelsHandler, mock_pull_request: Mock) -> None: @@ -165,8 +160,6 @@ async def test_add_label_static_label(self, labels_handler: LabelsHandler, mock_ await labels_handler._add_label(mock_pull_request, static_label) # Verify label was added mock_pull_request.add_to_labels.assert_called_once_with(static_label) - # Verify completion log was called - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_add_label_exception_handling(self, labels_handler: LabelsHandler, mock_pull_request: Mock) -> None: @@ -255,8 +248,6 @@ async def test_remove_label_not_exists(self, labels_handler: LabelsHandler, mock assert result is False # Verify that remove_from_labels was not called (we don't check first to save API calls) mock_pull_request.remove_from_labels.assert_not_called() - # Verify completion log was called (label doesn't exist is acceptable) - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_remove_label_wait_timeout(self, labels_handler: LabelsHandler, mock_pull_request: Mock) -> None: @@ -272,8 +263,6 @@ async def test_remove_label_wait_timeout(self, labels_handler: LabelsHandler, mo result = await labels_handler._remove_label(mock_pull_request, "test-label") assert result is False mock_pull_request.remove_from_labels.assert_called_once_with("test-label") - # Verify failure log was called (timeout waiting for removal) - assert labels_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_add_label_dynamic_label_wait_exception( diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index 764fb07ed..8a9a06942 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -758,3 +758,610 @@ def test_parse_log_entry_with_token_spend(self) -> None: assert entry is not None assert entry.token_spend == 23 assert entry.hook_id == "6143a030-bbd7-11f0-95bd-b07354b8711c" + + +class TestJSONLogParsing: + """Test cases for JSON log parsing functionality.""" + + def test_parse_json_log_entry_valid_json(self) -> None: + """Test parsing valid JSON log entry returns LogEntry.""" + parser = LogParser() + json_line = """{ + "hook_id": "abc123-def456", + "event_type": "pull_request", + "action": "opened", + "repository": "org/test-repo", + "api_user": "test-user", + "success": true, + "token_spend": 35, + "timing": { + "started_at": "2025-07-31T10:30:00.123000Z" + }, + "pr": { + "number": 123, + "title": "Test PR" + } + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.timestamp == datetime.datetime(2025, 7, 31, 10, 30, 0, 123000, tzinfo=datetime.UTC) + assert entry.level == "INFO" + assert entry.logger_name == "GithubWebhook" + assert entry.hook_id == "abc123-def456" + assert entry.event_type == "pull_request" + assert entry.repository == "org/test-repo" + assert entry.pr_number == 123 + assert entry.github_user == "test-user" + assert entry.task_status == "completed" + assert entry.token_spend == 35 + assert "pull_request/opened" in entry.message + assert "org/test-repo" in entry.message + assert "PR #123" in entry.message + assert "completed successfully" in entry.message + + def test_parse_json_log_entry_failed_webhook(self) -> None: + """Test parsing JSON log entry for failed webhook.""" + parser = LogParser() + json_line = """{ + "hook_id": "failed-hook", + "event_type": "issue_comment", + "action": "created", + "repository": "org/repo", + "api_user": "user1", + "success": false, + "token_spend": 10, + "timing": { + "started_at": "2025-07-31T11:00:00Z" + }, + "error": { + "type": "ValidationError" + }, + "pr": { + "number": 456 + } + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.hook_id == "failed-hook" + assert entry.task_status == "failed" + assert entry.pr_number == 456 + assert "failed" in entry.message + assert "ValidationError" in entry.message + + def test_parse_json_log_entry_without_pr(self) -> None: + """Test parsing JSON log entry without PR information.""" + parser = LogParser() + json_line = """{ + "hook_id": "push-hook", + "event_type": "push", + "repository": "org/repo", + "api_user": "user2", + "success": true, + "timing": { + "started_at": "2025-07-31T12:00:00Z" + } + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.hook_id == "push-hook" + assert entry.event_type == "push" + assert entry.pr_number is None + assert "push" in entry.message + assert "org/repo" in entry.message + + def test_parse_json_log_entry_invalid_json(self) -> None: + """Test parsing invalid JSON returns None.""" + parser = LogParser() + invalid_lines = [ + "not json at all", + "{incomplete json", + '{"key": invalid}', + "", + " ", + ] + + for line in invalid_lines: + entry = parser.parse_json_log_entry(line) + assert entry is None + + def test_parse_json_log_entry_missing_timestamp(self) -> None: + """Test parsing JSON without timing.started_at returns None.""" + parser = LogParser() + json_line = """{ + "hook_id": "no-timestamp", + "event_type": "push", + "repository": "org/repo", + "api_user": "user1", + "success": true + }""" + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + def test_parse_json_log_entry_invalid_timestamp(self) -> None: + """Test parsing JSON with invalid timestamp returns None.""" + parser = LogParser() + json_line = """{ + "hook_id": "bad-timestamp", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": { + "started_at": "not-a-timestamp" + } + }""" + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + def test_parse_json_log_entry_timezone_handling(self) -> None: + """Test parsing JSON with different timezone formats.""" + parser = LogParser() + + # Test with Z suffix + json_z = """{ + "hook_id": "z-time", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": {"started_at": "2025-07-31T10:00:00Z"} + }""" + entry = parser.parse_json_log_entry(json_z) + assert entry is not None + assert entry.timestamp.tzinfo is not None + + # Test with +00:00 suffix + json_plus = """{ + "hook_id": "plus-time", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": {"started_at": "2025-07-31T10:00:00+00:00"} + }""" + entry = parser.parse_json_log_entry(json_plus) + assert entry is not None + assert entry.timestamp.tzinfo is not None + + def test_parse_json_log_entry_extracts_all_fields(self) -> None: + """Test that parse_json_log_entry extracts all fields correctly.""" + parser = LogParser() + json_line = """{ + "hook_id": "complete-hook", + "event_type": "pull_request", + "action": "synchronize", + "repository": "owner/repo-name", + "api_user": "github-user", + "success": true, + "token_spend": 42, + "timing": { + "started_at": "2025-08-01T14:30:45.678000Z" + }, + "pr": { + "number": 999 + }, + "error": { + "type": "SomeError" + } + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.timestamp == datetime.datetime(2025, 8, 1, 14, 30, 45, 678000, tzinfo=datetime.UTC) + assert entry.level == "INFO" + assert entry.logger_name == "GithubWebhook" + assert entry.message is not None + assert entry.hook_id == "complete-hook" + assert entry.event_type == "pull_request" + assert entry.repository == "owner/repo-name" + assert entry.pr_number == 999 + assert entry.github_user == "github-user" + assert entry.task_id is None + assert entry.task_type is None + assert entry.task_status == "completed" + assert entry.token_spend == 42 + + def test_parse_json_log_file_multiple_entries(self, tmp_path: Path) -> None: + """Test parsing JSON log file with multiple entries.""" + parser = LogParser() + # Each JSON object must be on a single line (JSON lines format) + json_content = ( + '{"hook_id": "hook1", "event_type": "push", "repository": "org/repo1", ' + '"api_user": "user1", "success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + '{"hook_id": "hook2", "event_type": "pull_request", "action": "opened", ' + '"repository": "org/repo2", "api_user": "user2", "success": true, ' + '"timing": {"started_at": "2025-07-31T10:01:00Z"}, "pr": {"number": 123}}\n' + '{"hook_id": "hook3", "event_type": "issue_comment", "repository": "org/repo3", ' + '"api_user": "user3", "success": false, "timing": {"started_at": "2025-07-31T10:02:00Z"}}\n' + ) + log_file = tmp_path / "webhooks_test.json" + log_file.write_text(json_content) + + entries = parser.parse_json_log_file(log_file) + + assert len(entries) == 3 + assert entries[0].hook_id == "hook1" + assert entries[0].event_type == "push" + assert entries[0].repository == "org/repo1" + assert entries[1].hook_id == "hook2" + assert entries[1].pr_number == 123 + assert entries[1].task_status == "completed" + assert entries[2].hook_id == "hook3" + assert entries[2].task_status == "failed" + + def test_parse_json_log_file_empty_file(self, tmp_path: Path) -> None: + """Test parsing empty JSON log file.""" + parser = LogParser() + log_file = tmp_path / "empty.json" + log_file.write_text("") + + entries = parser.parse_json_log_file(log_file) + + assert len(entries) == 0 + + def test_parse_json_log_file_skips_invalid_lines(self, tmp_path: Path) -> None: + """Test that parse_json_log_file skips invalid JSON lines.""" + parser = LogParser() + # Each JSON object must be on a single line (JSON lines format) + json_content = ( + '{"hook_id": "valid1", "event_type": "push", "repository": "org/repo", ' + '"success": true, "timing": {"started_at": "2025-07-31T10:00:00Z"}}\n' + "this is not valid json\n" + "{incomplete json\n" + '{"hook_id": "valid2", "event_type": "pull_request", "repository": "org/repo", ' + '"success": true, "timing": {"started_at": "2025-07-31T10:01:00Z"}}\n' + '{"missing_timestamp": true}\n' + ) + log_file = tmp_path / "mixed.json" + log_file.write_text(json_content) + + entries = parser.parse_json_log_file(log_file) + + assert len(entries) == 2 + assert entries[0].hook_id == "valid1" + assert entries[1].hook_id == "valid2" + + def test_parse_json_log_file_handles_oserror(self, tmp_path: Path, caplog) -> None: + """Test that parse_json_log_file handles OSError gracefully.""" + parser = LogParser() + caplog.set_level(logging.ERROR) + nonexistent_file = tmp_path / "nonexistent.json" + + entries = parser.parse_json_log_file(nonexistent_file) + + assert entries == [] + + def test_parse_json_log_file_handles_unicode_error(self, tmp_path: Path, caplog) -> None: + """Test that parse_json_log_file handles UnicodeDecodeError gracefully.""" + parser = LogParser() + caplog.set_level(logging.ERROR) + + # Create a file with invalid UTF-8 bytes + log_file = tmp_path / "invalid_utf8.json" + log_file.write_bytes(b"\x80\x81\x82\x83") + + entries = parser.parse_json_log_file(log_file) + + assert entries == [] + + def test_get_raw_json_entry_valid_json(self) -> None: + """Test get_raw_json_entry with valid JSON returns dictionary.""" + parser = LogParser() + json_line = """{ + "hook_id": "test-hook", + "event_type": "push", + "repository": "org/repo", + "nested": { + "data": "value" + } + }""" + + result = parser.get_raw_json_entry(json_line) + + assert result is not None + assert isinstance(result, dict) + assert result["hook_id"] == "test-hook" + assert result["event_type"] == "push" + assert result["repository"] == "org/repo" + assert result["nested"]["data"] == "value" + + def test_get_raw_json_entry_invalid_json(self) -> None: + """Test get_raw_json_entry with invalid JSON returns None.""" + parser = LogParser() + invalid_lines = [ + "not json", + "{incomplete", + '{"bad": value}', + "", + " ", + ] + + for line in invalid_lines: + result = parser.get_raw_json_entry(line) + assert result is None + + def test_get_raw_json_entry_preserves_structure(self) -> None: + """Test that get_raw_json_entry preserves complete JSON structure.""" + parser = LogParser() + json_line = """{ + "hook_id": "complex-hook", + "timing": { + "started_at": "2025-07-31T10:00:00Z", + "ended_at": "2025-07-31T10:00:05Z", + "duration_seconds": 5.123 + }, + "pr": { + "number": 123, + "title": "Test PR", + "labels": ["bug", "enhancement"] + }, + "error": null, + "success": true, + "token_spend": 42 + }""" + + result = parser.get_raw_json_entry(json_line) + + assert result is not None + assert result["hook_id"] == "complex-hook" + assert result["timing"]["duration_seconds"] == 5.123 + assert result["pr"]["labels"] == ["bug", "enhancement"] + assert result["error"] is None + assert result["success"] is True + assert result["token_spend"] == 42 + + def test_json_summary_message_format_with_action(self) -> None: + """Test that JSON summary message includes action when present.""" + parser = LogParser() + json_line = """{ + "hook_id": "hook1", + "event_type": "pull_request", + "action": "synchronize", + "repository": "org/repo", + "success": true, + "timing": {"started_at": "2025-07-31T10:00:00Z"} + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert "pull_request/synchronize" in entry.message + + def test_json_summary_message_format_without_action(self) -> None: + """Test that JSON summary message works without action.""" + parser = LogParser() + json_line = """{ + "hook_id": "hook1", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": {"started_at": "2025-07-31T10:00:00Z"} + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert "push for org/repo" in entry.message + assert "/" not in entry.message.split("for")[0] # No action before "for" + + def test_parse_json_log_entry_null_pr_field(self) -> None: + """Test parsing JSON with null pr field.""" + parser = LogParser() + json_line = """{ + "hook_id": "null-pr", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": {"started_at": "2025-07-31T10:00:00Z"}, + "pr": null + }""" + + entry = parser.parse_json_log_entry(json_line) + + assert entry is not None + assert entry.pr_number is None + + def test_parse_json_log_entry_empty_timing_object(self) -> None: + """Test parsing JSON with empty timing object returns None.""" + parser = LogParser() + json_line = """{ + "hook_id": "no-time", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": {} + }""" + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + +class TestAdditionalCoverageTests: + """Additional tests for edge cases to reach 90%+ coverage.""" + + def test_parse_log_entry_invalid_pr_number(self) -> None: + """Test parsing log entry with invalid PR number string.""" + parser = LogParser() + log_line = ( + "2025-07-31T10:00:00.000000 GithubWebhook INFO " + "test-repo [pull_request][hook1][user][PR abc]: Invalid PR number" + ) + + entry = parser.parse_log_entry(log_line) + + assert entry is not None + assert entry.pr_number is None # Invalid PR number should be None + + def test_extract_task_fields_with_escaped_brackets(self) -> None: + """Test extracting task fields with escaped brackets in values.""" + parser = LogParser() + log_line = ( + "2025-07-31T10:00:00.000000 GithubWebhook INFO " + "test-repo [pull_request][hook1][user]: " + "[task_id=test\\[id\\]] [task_type=ci\\]check] [task_status=started] Message" + ) + + entry = parser.parse_log_entry(log_line) + + assert entry is not None + assert entry.task_id == "test[id]" # Escaped brackets should be unescaped + assert entry.task_type == "ci]check" + assert entry.task_status == "started" + assert entry.message == "Message" + + def test_extract_token_spend_with_invalid_number_in_pattern(self) -> None: + """Test token spend extraction when number conversion fails.""" + parser = LogParser() + # This tests the ValueError exception handler in extract_token_spend + # Even though the pattern matches, if int() fails, it should return None + message = "Some message without valid token spend" + + result = parser.extract_token_spend(message) + + assert result is None + + def test_parse_json_log_entry_type_error_timestamp(self) -> None: + """Test parsing JSON with non-string timestamp value (triggers AttributeError/TypeError).""" + parser = LogParser() + json_line = """{ + "hook_id": "type-error", + "event_type": "push", + "repository": "org/repo", + "success": true, + "timing": { + "started_at": null + } + }""" + + entry = parser.parse_json_log_entry(json_line) + assert entry is None + + def test_parse_log_entry_naive_timestamp(self) -> None: + """Test parsing log entry with naive timestamp (no timezone).""" + parser = LogParser() + log_line = "2025-07-31T10:00:00.000000 GithubWebhook INFO test-repo [push][hook1][user]: Test" + + entry = parser.parse_log_entry(log_line) + + assert entry is not None + assert entry.timestamp.tzinfo is not None # Should be timezone-aware + + def test_filter_get_unique_values(self) -> None: + """Test LogFilter.get_unique_values method.""" + log_filter = LogFilter() + entries = [ + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0), + level="INFO", + logger_name="main", + message="msg1", + repository="org/repo1", + ), + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0), + level="DEBUG", + logger_name="main", + message="msg2", + repository="org/repo2", + ), + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0), + level="INFO", + logger_name="main", + message="msg3", + repository="org/repo1", + ), + ] + + unique_repos = log_filter.get_unique_values(entries, "repository") + unique_levels = log_filter.get_unique_values(entries, "level") + + assert sorted(unique_repos) == ["org/repo1", "org/repo2"] + assert sorted(unique_levels) == ["DEBUG", "INFO"] + + def test_filter_get_entry_count_by_field(self) -> None: + """Test LogFilter.get_entry_count_by_field method.""" + log_filter = LogFilter() + entries = [ + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0), + level="INFO", + logger_name="main", + message="msg1", + event_type="push", + ), + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0), + level="DEBUG", + logger_name="main", + message="msg2", + event_type="pull_request", + ), + LogEntry( + timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0), + level="INFO", + logger_name="main", + message="msg3", + event_type="push", + ), + ] + + counts_by_event = log_filter.get_entry_count_by_field(entries, "event_type") + counts_by_level = log_filter.get_entry_count_by_field(entries, "level") + + assert counts_by_event == {"push": 2, "pull_request": 1} + assert counts_by_level == {"INFO": 2, "DEBUG": 1} + + @pytest.mark.asyncio + async def test_monitor_log_directory_with_rotated_files(self, tmp_path: Path) -> None: + """Test that monitor_log_directory ignores rotated log files.""" + parser = LogParser() + + # Create multiple log files including rotated ones + current_log = tmp_path / "app.log" + current_log.write_text("2025-07-31T10:00:00.000000 main INFO Initial entry\n") + (tmp_path / "app.log.1").write_text("2025-07-31T09:00:00.000000 main INFO Old entry\n") + (tmp_path / "app.log.2").write_text("2025-07-31T08:00:00.000000 main INFO Older entry\n") + + entries = [] + + # Helper to collect entries from async generator + async def collect_entries(async_gen, max_entries=1): + count = 0 + async for entry in async_gen: + entries.append(entry) + count += 1 + if count >= max_entries: + break + + # Start monitoring task + monitor_task = asyncio.create_task( + collect_entries(parser.monitor_log_directory(tmp_path, pattern="*.log"), max_entries=1) + ) + + # Give the monitor a moment to start and seek to end of file + await asyncio.sleep(0.1) + + # Append new content to the current log file (not rotated ones) + with open(current_log, "a") as f: + f.write("2025-07-31T10:01:00.000000 main INFO New entry after monitoring started\n") + f.flush() + + # Wait for the monitor to collect the new entry with timeout + try: + await asyncio.wait_for(monitor_task, timeout=2.0) + except TimeoutError: + # Cancel the task and wait for it to complete + monitor_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await monitor_task + + # Should have collected exactly 1 entry from the new content (not from rotated files) + assert len(entries) == 1 + assert entries[0].message == "New entry after monitoring started" diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py new file mode 100644 index 000000000..60a0e26ab --- /dev/null +++ b/webhook_server/tests/test_log_viewer.py @@ -0,0 +1,433 @@ +"""Tests for log viewer JSON functionality.""" + +import copy +import json +import time +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest +from fastapi import HTTPException + +from webhook_server.web.log_viewer import LogViewerController + + +class TestLogViewerJSONMethods: + """Test cases for LogViewerController JSON log methods.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + @pytest.fixture + def sample_json_webhook_data(self) -> dict: + """Create sample JSON webhook log data.""" + return { + "hook_id": "test-hook-123", + "event_type": "pull_request", + "action": "opened", + "repository": "org/test-repo", + "sender": "test-user", + "pr": { + "number": 456, + "title": "Test PR", + "url": "https://github.com/org/test-repo/pull/456", + }, + "timing": { + "started_at": "2025-01-05T10:00:00.000000Z", + "ended_at": "2025-01-05T10:00:05.000000Z", + "duration_seconds": 5.0, + }, + "workflow_steps": { + "step1": {"status": "completed", "duration_ms": 1000}, + "step2": {"status": "completed", "duration_ms": 2000}, + }, + "token_spend": 35, + "success": True, + } + + def create_json_log_file(self, log_dir: Path, filename: str, entries: list[dict]) -> Path: + """Create a test JSON log file with entries. + + Args: + log_dir: Directory to create the log file in + filename: Name of the log file + entries: List of JSON webhook data dictionaries + + Returns: + Path to created log file + """ + log_file = log_dir / filename + with open(log_file, "w", encoding="utf-8") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + return log_file + + def test_stream_json_log_entries_yields_entries(self, controller, tmp_path, sample_json_webhook_data): + """Test that _stream_json_log_entries yields JSON entries from log files.""" + # Create logs directory + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a JSON log file with multiple entries + entry1 = sample_json_webhook_data.copy() + entry2 = sample_json_webhook_data.copy() + entry2["hook_id"] = "test-hook-456" + entry2["pr"]["number"] = 789 + + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [entry1, entry2]) + + # Stream JSON entries + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should yield 2 entries (reversed order - newest first) + assert len(entries) == 2 + assert entries[0]["hook_id"] == "test-hook-456" + assert entries[1]["hook_id"] == "test-hook-123" + + def test_stream_json_log_entries_respects_max_files_limit(self, controller, tmp_path, sample_json_webhook_data): + """Test that _stream_json_log_entries respects max_files limit.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create 5 JSON log files + for i in range(5): + entry = sample_json_webhook_data.copy() + entry["hook_id"] = f"hook-file-{i}" + self.create_json_log_file(log_dir, f"webhooks_2025-01-0{i}.json", [entry]) + + # Stream with max_files=2 + entries = list(controller._stream_json_log_entries(max_files=2, max_entries=100)) + + # Should only process 2 files (2 entries total) + assert len(entries) == 2 + + def test_stream_json_log_entries_respects_max_entries_limit(self, controller, tmp_path, sample_json_webhook_data): + """Test that _stream_json_log_entries respects max_entries limit.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a JSON log file with 10 entries + entries_data = [] + for i in range(10): + entry = sample_json_webhook_data.copy() + entry["hook_id"] = f"hook-{i}" + entries_data.append(entry) + + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", entries_data) + + # Stream with max_entries=5 + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=5)) + + # Should only yield 5 entries + assert len(entries) == 5 + + def test_stream_json_log_entries_skips_invalid_json_lines(self, controller, tmp_path, sample_json_webhook_data): + """Test that _stream_json_log_entries skips invalid JSON lines.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a log file with mixed valid and invalid JSON + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + # Valid JSON + f.write(json.dumps(sample_json_webhook_data) + "\n") + # Invalid JSON lines + f.write("not valid json\n") + f.write('{"incomplete": \n') + # Another valid JSON + entry2 = sample_json_webhook_data.copy() + entry2["hook_id"] = "hook-valid-2" + f.write(json.dumps(entry2) + "\n") + + # Stream JSON entries + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should only yield 2 valid entries (reversed order) + assert len(entries) == 2 + assert entries[0]["hook_id"] == "hook-valid-2" + assert entries[1]["hook_id"] == "test-hook-123" + + def test_stream_json_log_entries_no_log_directory(self, controller, tmp_path): + """Test _stream_json_log_entries when log directory doesn't exist.""" + # Don't create logs directory + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should yield nothing + assert len(entries) == 0 + + def test_stream_json_log_entries_empty_directory(self, controller, tmp_path): + """Test _stream_json_log_entries with empty log directory.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # No log files created + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should yield nothing + assert len(entries) == 0 + + def test_stream_json_log_entries_newest_first_ordering(self, controller, tmp_path, sample_json_webhook_data): + """Test that _stream_json_log_entries returns newest files first.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create multiple JSON log files with different modification times + # Older file + entry1 = sample_json_webhook_data.copy() + entry1["hook_id"] = "old-hook" + self.create_json_log_file(log_dir, "webhooks_2025-01-01.json", [entry1]) + + time.sleep(0.01) # Ensure different mtime + + # Newer file + entry2 = sample_json_webhook_data.copy() + entry2["hook_id"] = "new-hook" + file2 = self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [entry2]) + + # Ensure file2 has a newer mtime + file2.touch() + + # Stream entries + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should process newer file first (entries within file are reversed) + # So first entry should be from newer file + assert len(entries) == 2 + assert entries[0]["hook_id"] == "new-hook" + assert entries[1]["hook_id"] == "old-hook" + + def test_get_workflow_steps_json_returns_workflow_data(self, controller, tmp_path, sample_json_webhook_data): + """Test get_workflow_steps_json returns workflow steps for valid hook_id.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON log file + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [sample_json_webhook_data]) + + # Get workflow steps + result = controller.get_workflow_steps_json("test-hook-123") + + # Should return structured workflow data + assert result["hook_id"] == "test-hook-123" + assert result["event_type"] == "pull_request" + assert result["action"] == "opened" + assert result["repository"] == "org/test-repo" + assert result["sender"] == "test-user" + assert result["pr"]["number"] == 456 + assert result["timing"]["duration_seconds"] == 5.0 + assert result["steps"] == sample_json_webhook_data["workflow_steps"] + assert result["token_spend"] == 35 + assert result["success"] is True + + def test_get_workflow_steps_json_returns_none_for_unknown_hook_id( + self, controller, tmp_path, sample_json_webhook_data + ): + """Test get_workflow_steps_json raises HTTPException for unknown hook_id.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON log file with different hook_id + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [sample_json_webhook_data]) + + # Try to get workflow steps for non-existent hook_id + with pytest.raises(HTTPException) as exc: + controller.get_workflow_steps_json("non-existent-hook") + + # Should raise 404 + assert exc.value.status_code == 404 + assert "No JSON log entry found" in str(exc.value.detail) + + def test_get_workflow_steps_json_no_log_files(self, controller, tmp_path): + """Test get_workflow_steps_json when no log files exist.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Try to get workflow steps when no logs exist + with pytest.raises(HTTPException) as exc: + controller.get_workflow_steps_json("test-hook-123") + + # Should raise 404 + assert exc.value.status_code == 404 + + def test_get_workflow_steps_json_with_error_in_log(self, controller, tmp_path, sample_json_webhook_data): + """Test get_workflow_steps_json with webhook that has error.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON log entry with error + error_data = sample_json_webhook_data.copy() + error_data["success"] = False + error_data["error"] = "Test error occurred" + + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [error_data]) + + # Get workflow steps + result = controller.get_workflow_steps_json("test-hook-123") + + # Should include error information + assert result["success"] is False + assert result["error"] == "Test error occurred" + + def test_get_workflow_steps_uses_json_when_available(self, controller, tmp_path, sample_json_webhook_data): + """Test get_workflow_steps uses JSON logs when available.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON log file + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [sample_json_webhook_data]) + + # Get workflow steps (should use JSON, not fall back to text) + result = controller.get_workflow_steps("test-hook-123") + + # Should return JSON-based data + assert result["hook_id"] == "test-hook-123" + assert result["event_type"] == "pull_request" + assert "steps" in result + assert result["token_spend"] == 35 + + def test_get_workflow_steps_falls_back_to_text_logs(self, controller, tmp_path): + """Test get_workflow_steps falls back to text logs when JSON not found.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a text log file (not JSON) + log_file = log_dir / "webhook-server.log" + log_entries = [ + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/test-repo " + "[pull_request][fallback-hook][test-user][PR 123]: Processing webhook " + "[task_id=task1][task_type=webhook][task_status=started]", + "2025-01-05T10:00:01.000000 GithubWebhook INFO org/test-repo " + "[pull_request][fallback-hook][test-user][PR 123]: Validation complete " + "[task_id=task2][task_type=validation][task_status=completed]", + "2025-01-05T10:00:02.000000 GithubWebhook INFO org/test-repo " + "[pull_request][fallback-hook][test-user][PR 123]: Token spend: 15 API calls", + ] + with open(log_file, "w", encoding="utf-8") as f: + for line in log_entries: + f.write(line + "\n") + + # Get workflow steps for hook not in JSON logs + result = controller.get_workflow_steps("fallback-hook") + + # Should fall back to text log parsing + assert result["hook_id"] == "fallback-hook" + assert "steps" in result + assert len(result["steps"]) == 2 # Two workflow steps with task_status + assert result["token_spend"] == 15 + + def test_get_workflow_steps_json_searches_multiple_files(self, controller, tmp_path, sample_json_webhook_data): + """Test get_workflow_steps_json searches through multiple JSON log files.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create multiple JSON log files + entry1 = sample_json_webhook_data.copy() + entry1["hook_id"] = "hook-file1" + self.create_json_log_file(log_dir, "webhooks_2025-01-01.json", [entry1]) + + entry2 = sample_json_webhook_data.copy() + entry2["hook_id"] = "hook-file2" + self.create_json_log_file(log_dir, "webhooks_2025-01-02.json", [entry2]) + + entry3 = sample_json_webhook_data.copy() + entry3["hook_id"] = "target-hook" + self.create_json_log_file(log_dir, "webhooks_2025-01-03.json", [entry3]) + + # Search for hook in third file + result = controller.get_workflow_steps_json("target-hook") + + # Should find it + assert result["hook_id"] == "target-hook" + + def test_get_workflow_steps_json_handles_missing_optional_fields(self, controller, tmp_path): + """Test get_workflow_steps_json handles missing optional fields gracefully.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create minimal JSON log entry + minimal_data = { + "hook_id": "minimal-hook", + # Missing: event_type, action, sender, pr, workflow_steps, token_spend, success, error + } + + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [minimal_data]) + + # Get workflow steps + result = controller.get_workflow_steps_json("minimal-hook") + + # Should handle missing fields with None + assert result["hook_id"] == "minimal-hook" + assert result["event_type"] is None + assert result["action"] is None + assert result["repository"] is None + assert result["sender"] is None + assert result["pr"] is None + assert result["timing"] is None + assert result["steps"] == {} # Default to empty dict + assert result["token_spend"] is None + assert result["success"] is None + assert result["error"] is None + + def test_stream_json_log_entries_handles_file_read_errors(self, controller, tmp_path, sample_json_webhook_data): + """Test _stream_json_log_entries handles file read errors gracefully.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a valid JSON log file + self.create_json_log_file(log_dir, "webhooks_valid.json", [sample_json_webhook_data]) + + # Create a file that will cause read error (simulate by making it unreadable) + bad_file = log_dir / "webhooks_bad.json" + bad_file.write_text(json.dumps(sample_json_webhook_data)) + bad_file.chmod(0o000) # Remove all permissions + + try: + # Stream entries - should skip bad file and continue + entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + + # Should still get the valid entry (or none if permission error blocks all) + # Depending on OS, this may yield 0 or 1 entry + assert len(entries) >= 0 + finally: + # Restore permissions for cleanup + bad_file.chmod(0o644) + + def test_get_workflow_steps_json_with_multiple_entries_same_file( + self, controller, tmp_path, sample_json_webhook_data + ): + """Test get_workflow_steps_json finds correct entry in file with multiple hooks.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON log file with multiple hook entries (deep copy to avoid reference issues) + entry1 = copy.deepcopy(sample_json_webhook_data) + entry1["hook_id"] = "hook-1" + entry1["pr"]["number"] = 100 + + entry2 = copy.deepcopy(sample_json_webhook_data) + entry2["hook_id"] = "target-hook" + entry2["pr"]["number"] = 200 + + entry3 = copy.deepcopy(sample_json_webhook_data) + entry3["hook_id"] = "hook-3" + entry3["pr"]["number"] = 300 + + self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [entry1, entry2, entry3]) + + # Search for middle entry + result = controller.get_workflow_steps_json("target-hook") + + # Should find correct entry + assert result["hook_id"] == "target-hook" + assert result["pr"]["number"] == 200 diff --git a/webhook_server/tests/test_owners_files_handler.py b/webhook_server/tests/test_owners_files_handler.py index 87a6be153..561b6d2f7 100644 --- a/webhook_server/tests/test_owners_files_handler.py +++ b/webhook_server/tests/test_owners_files_handler.py @@ -18,6 +18,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.logger = Mock() mock_webhook.log_prefix = "[TEST]" mock_webhook.repository = Mock() + mock_webhook.ctx = None return mock_webhook @pytest.fixture @@ -478,8 +479,6 @@ async def test_assign_reviewers(self, owners_file_handler: OwnersFileHandler, mo expected_calls = [call(["reviewer1"]), call(["reviewer2"])] actual_calls = mock_create_request.call_args_list assert sorted(actual_calls, key=str) == sorted(expected_calls, key=str) - # Verify completion log was called - assert owners_file_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_assign_reviewers_no_reviewers( @@ -490,8 +489,6 @@ async def test_assign_reviewers_no_reviewers( owners_file_handler.all_pull_request_reviewers = [] await owners_file_handler.assign_reviewers(mock_pull_request) - # Verify completion log was called (no reviewers to assign is acceptable) - assert owners_file_handler.logger.step.called # type: ignore[attr-defined] @pytest.mark.asyncio async def test_assign_reviewers_github_exception( diff --git a/webhook_server/tests/test_pull_request_handler.py b/webhook_server/tests/test_pull_request_handler.py index 93e1a3bde..7222735cc 100644 --- a/webhook_server/tests/test_pull_request_handler.py +++ b/webhook_server/tests/test_pull_request_handler.py @@ -79,6 +79,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.token = "test-token" # pragma: allowlist secret mock_webhook.auto_verify_cherry_picked_prs = True mock_webhook.last_commit = Mock() + mock_webhook.ctx = None return mock_webhook @pytest.fixture @@ -440,8 +441,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_with_tag( patch.object(mock_pull_request, "create_issue_comment", new=Mock()), ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - # Verify step logging was called - assert pull_request_handler.logger.step.called # The method uses runner_handler.run_podman_command, not repository.delete_tag @pytest.mark.asyncio @@ -850,8 +849,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_without_tag( with patch.object(pull_request_handler.github_webhook, "build_and_push_container", False): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - # Verify step logging was called (processing + completed) - assert pull_request_handler.logger.step.call_count >= 2 # Should return early when build_and_push_container is False @pytest.mark.asyncio @@ -880,8 +877,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_failed_deletion( ), ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - # Verify step logging was called (processing + failed) - assert pull_request_handler.logger.step.called # Verify error was logged assert pull_request_handler.logger.error.called @@ -910,8 +905,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_login_failed( patch.object(mock_pull_request, "create_issue_comment", new=Mock()), ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - # Verify step logging was called (processing + failed) - assert pull_request_handler.logger.step.called # Verify error was logged assert pull_request_handler.logger.error.called @@ -942,7 +935,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_success( patch.object(mock_pull_request, "create_issue_comment", new=Mock()), ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert mock_pull_request.create_issue_comment.called @pytest.mark.asyncio @@ -978,7 +970,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_users_scope_fallba ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) # Verify the deletion was successful - assert pull_request_handler.logger.step.called assert mock_pull_request.create_issue_comment.called # Verify requestJsonAndCheck was called 3 times (orgs GET, users GET, DELETE) assert mock_requester.requestJsonAndCheck.call_count == 3 @@ -1005,7 +996,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_package_not_found( patch.object(pull_request_handler.github_webhook, "token", "test-token"), # pragma: allowlist secret ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.warning.called @pytest.mark.asyncio @@ -1031,7 +1021,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_tag_not_found( patch.object(pull_request_handler.github_webhook, "token", "test-token"), # pragma: allowlist secret ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.warning.called @pytest.mark.asyncio @@ -1056,7 +1045,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_api_failure( patch.object(pull_request_handler.github_webhook, "token", "test-token"), # pragma: allowlist secret ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.exception.called @pytest.mark.asyncio @@ -1078,7 +1066,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_no_api( patch.object(pull_request_handler.github_webhook, "token", "test-token"), # pragma: allowlist secret ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.error.called @pytest.mark.asyncio @@ -1106,7 +1093,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_invalid_format( repository_full_tag="ghcr.io/invalid:pr-123", pr_tag="pr-123", ) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.error.called @pytest.mark.asyncio @@ -1137,7 +1123,6 @@ async def test_delete_remote_tag_for_merged_or_closed_pr_ghcr_delete_404( patch.object(mock_pull_request, "create_issue_comment", new=Mock()), ): await pull_request_handler.delete_remote_tag_for_merged_or_closed_pr(pull_request=mock_pull_request) - assert pull_request_handler.logger.step.called assert pull_request_handler.logger.warning.called @pytest.mark.asyncio @@ -1524,14 +1509,6 @@ async def test_process_labeled_can_be_merged( await pull_request_handler.process_pull_request_webhook_data(mock_pull_request) - # Verify step call with substring - found = False - for call in pull_request_handler.logger.step.call_args_list: - if "skipped - can-be-merged label" in str(call): - found = True - break - assert found, "Log step for can-be-merged label skip not found" - @pytest.mark.asyncio async def test_process_labeled_wip( self, pull_request_handler: PullRequestHandler, mock_github_webhook: Mock, mock_pull_request: Mock @@ -1560,13 +1537,6 @@ async def test_process_unhandled_action( await pull_request_handler.process_pull_request_webhook_data(mock_pull_request) - found = False - for call in pull_request_handler.logger.step.call_args_list: - if "no action handler - completed" in str(call): - found = True - break - assert found - @pytest.mark.asyncio async def test_delete_ghcr_tag_exceptions( self, pull_request_handler: PullRequestHandler, mock_github_webhook: Mock, mock_pull_request: Mock diff --git a/webhook_server/tests/test_pull_request_review_handler.py b/webhook_server/tests/test_pull_request_review_handler.py index a977de361..74675b5aa 100644 --- a/webhook_server/tests/test_pull_request_review_handler.py +++ b/webhook_server/tests/test_pull_request_review_handler.py @@ -22,6 +22,7 @@ def mock_github_webhook(self) -> Mock: } mock_webhook.logger = Mock() mock_webhook.log_prefix = "[TEST]" + mock_webhook.ctx = None return mock_webhook @pytest.fixture diff --git a/webhook_server/tests/test_push_handler.py b/webhook_server/tests/test_push_handler.py index 0fac3555d..c8c63b9fe 100644 --- a/webhook_server/tests/test_push_handler.py +++ b/webhook_server/tests/test_push_handler.py @@ -43,6 +43,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.container_repository_username = "test-user" # Always a string mock_webhook.container_repository_password = "test-password" # Always a string # pragma: allowlist secret mock_webhook.token = "test-token" # Always a string + mock_webhook.ctx = None return mock_webhook @pytest.fixture diff --git a/webhook_server/tests/test_runner_handler.py b/webhook_server/tests/test_runner_handler.py index 7aa4b66e3..0e61da80b 100644 --- a/webhook_server/tests/test_runner_handler.py +++ b/webhook_server/tests/test_runner_handler.py @@ -35,6 +35,7 @@ def mock_github_webhook(self) -> Mock: mock_webhook.dockerfile = "Dockerfile" mock_webhook.container_build_args = [] mock_webhook.container_command_args = [] + mock_webhook.ctx = None return mock_webhook @pytest.fixture diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py new file mode 100644 index 000000000..e2f623033 --- /dev/null +++ b/webhook_server/tests/test_structured_logger.py @@ -0,0 +1,672 @@ +"""Comprehensive tests for structured logger functionality.""" + +import json +from datetime import UTC, datetime, timedelta +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from webhook_server.libs.config import Config +from webhook_server.utils.context import WebhookContext +from webhook_server.utils.structured_logger import StructuredLogWriter, write_webhook_log + + +class TestStructuredLogWriter: + """Test suite for StructuredLogWriter class.""" + + @pytest.fixture + def mock_config(self, tmp_path: Path) -> Mock: + """Create a mock Config with temporary data directory.""" + config = Mock(spec=Config) + config.data_dir = str(tmp_path) + return config + + @pytest.fixture + def mock_logger(self) -> Mock: + """Create a mock logger.""" + return Mock() + + @pytest.fixture + def log_writer(self, mock_config: Mock, mock_logger: Mock) -> StructuredLogWriter: + """Create StructuredLogWriter instance with mocks.""" + return StructuredLogWriter(config=mock_config, logger=mock_logger) + + @pytest.fixture + def sample_context(self) -> WebhookContext: + """Create a sample WebhookContext for testing.""" + return WebhookContext( + hook_id="test-hook-123", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + action="opened", + sender="test-user", + pr_number=42, + pr_title="Test PR", + pr_author="pr-author", + api_user="api-bot", + ) + + def test_init_creates_log_directory(self, tmp_path: Path, mock_config: Mock, mock_logger: Mock) -> None: + """Test that __init__ creates the log directory.""" + # Arrange + log_dir = tmp_path / "logs" + assert not log_dir.exists() + + # Act + writer = StructuredLogWriter(config=mock_config, logger=mock_logger) + + # Assert + assert log_dir.exists() + assert log_dir.is_dir() + assert writer.config == mock_config + assert writer.logger == mock_logger + assert writer.log_dir == log_dir + + def test_init_with_existing_log_directory(self, tmp_path: Path, mock_config: Mock, mock_logger: Mock) -> None: + """Test that __init__ works when log directory already exists.""" + # Arrange + log_dir = tmp_path / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + assert log_dir.exists() + + # Act + writer = StructuredLogWriter(config=mock_config, logger=mock_logger) + + # Assert + assert writer.log_dir == log_dir + assert log_dir.exists() + + def test_init_without_logger_creates_default(self, mock_config: Mock) -> None: + """Test that __init__ creates a default logger if not provided.""" + # Act + writer = StructuredLogWriter(config=mock_config) + + # Assert + assert writer.logger is not None + + def test_get_log_file_path_default_date(self, log_writer: StructuredLogWriter, tmp_path: Path) -> None: + """Test _get_log_file_path returns correct path with default (current) date.""" + # Arrange + expected_date_str = datetime.now(UTC).strftime("%Y-%m-%d") + expected_path = tmp_path / "logs" / f"webhooks_{expected_date_str}.json" + + # Act + result = log_writer._get_log_file_path() + + # Assert + assert result == expected_path + + def test_get_log_file_path_with_specific_date(self, log_writer: StructuredLogWriter, tmp_path: Path) -> None: + """Test _get_log_file_path returns correct path with specific date.""" + # Arrange + test_date = datetime(2026, 1, 5, 12, 30, 45, tzinfo=UTC) + expected_path = tmp_path / "logs" / "webhooks_2026-01-05.json" + + # Act + result = log_writer._get_log_file_path(date=test_date) + + # Assert + assert result == expected_path + + def test_write_log_writes_valid_json( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log writes valid JSON to correct file.""" + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + # Read and validate JSON + with open(log_file) as f: + content = f.read() + lines = content.strip().split("\n") + assert len(lines) == 1 # Single log entry + + log_entry = json.loads(lines[0]) + assert log_entry["hook_id"] == "test-hook-123" + assert log_entry["event_type"] == "pull_request" + assert log_entry["repository"] == "org/repo" + assert log_entry["action"] == "opened" + assert log_entry["sender"] == "test-user" + + def test_write_log_sets_completed_at( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log sets completed_at timestamp in timing.""" + # Arrange - context without completed_at + assert sample_context.completed_at is None + + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert "timing" in log_entry + assert log_entry["timing"]["completed_at"] is not None + # Verify it's a valid ISO format timestamp + completed_at = datetime.fromisoformat(log_entry["timing"]["completed_at"]) + assert completed_at.tzinfo is not None + + def test_write_log_calculates_duration( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log calculates duration_ms when started_at is available.""" + # Arrange - set started_at to 5 seconds ago + sample_context.started_at = datetime.now(UTC) - timedelta(seconds=5) + + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert "timing" in log_entry + assert log_entry["timing"]["duration_ms"] is not None + # Duration should be approximately 5000ms (allowing some tolerance) + assert 4900 <= log_entry["timing"]["duration_ms"] <= 6000 + + def test_write_log_does_not_mutate_context( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext + ) -> None: + """Test write_log does not mutate the original context.""" + # Arrange - capture original state + original_completed_at = sample_context.completed_at + + # Act + log_writer.write_log(sample_context) + + # Assert - context unchanged + assert sample_context.completed_at == original_completed_at + + def test_write_log_multiple_entries_append( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test multiple writes append to same file (JSONL format).""" + # Arrange + context2 = WebhookContext( + hook_id="test-hook-456", + event_type="issue_comment", + repository="org/repo2", + repository_full_name="org/repo2", + ) + + # Act + log_writer.write_log(sample_context) + log_writer.write_log(context2) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + lines = f.read().strip().split("\n") + + assert len(lines) == 2 + + entry1 = json.loads(lines[0]) + entry2 = json.loads(lines[1]) + + assert entry1["hook_id"] == "test-hook-123" + assert entry2["hook_id"] == "test-hook-456" + + def test_write_log_with_workflow_steps( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log includes workflow steps from context.""" + # Arrange + sample_context.start_step("clone_repository", branch="main") + sample_context.complete_step("clone_repository", commit_sha="abc123") + + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert "workflow_steps" in log_entry + assert "clone_repository" in log_entry["workflow_steps"] + assert log_entry["workflow_steps"]["clone_repository"]["status"] == "completed" + assert log_entry["workflow_steps"]["clone_repository"]["commit_sha"] == "abc123" + + def test_write_log_with_error( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log includes error details from context.""" + # Arrange + sample_context.success = False + sample_context.error = { + "type": "ValueError", + "message": "Something went wrong", + "traceback": "Traceback...", + } + + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["success"] is False + assert log_entry["error"] is not None + assert log_entry["error"]["type"] == "ValueError" + assert log_entry["error"]["message"] == "Something went wrong" + + def test_write_log_with_pr_details( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log includes PR details when available.""" + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["pr"] is not None + assert log_entry["pr"]["number"] == 42 + assert log_entry["pr"]["title"] == "Test PR" + assert log_entry["pr"]["author"] == "pr-author" + + def test_write_log_without_pr_details(self, log_writer: StructuredLogWriter, tmp_path: Path) -> None: + """Test write_log handles context without PR details.""" + # Arrange + context = WebhookContext( + hook_id="test-hook-789", + event_type="push", + repository="org/repo", + repository_full_name="org/repo", + ) + + # Act + log_writer.write_log(context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["pr"] is None + + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", False) + def test_write_log_without_fcntl( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_log works on platforms without fcntl (Windows).""" + # Act + log_writer.write_log(sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["hook_id"] == "test-hook-123" + + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", True) + @patch("fcntl.flock") + def test_write_log_uses_file_locking( + self, mock_flock: Mock, log_writer: StructuredLogWriter, sample_context: WebhookContext + ) -> None: + """Test write_log uses file locking when fcntl is available.""" + # Act + log_writer.write_log(sample_context) + + # Assert - flock called for both temp file and log file + assert mock_flock.call_count >= 2 # At least lock and unlock + + def test_write_log_handles_exception_gracefully( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, mock_logger: Mock + ) -> None: + """Test write_log handles exceptions and logs them.""" + # Arrange - make tempfile.mkstemp fail + with patch("tempfile.mkstemp", side_effect=OSError("Disk full")): + # Act + log_writer.write_log(sample_context) + + # Assert + mock_logger.exception.assert_called_once() + assert "Failed to write webhook log entry" in str(mock_logger.exception.call_args) + + def test_write_log_logs_success( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, mock_logger: Mock + ) -> None: + """Test write_log logs debug message on success.""" + # Act + log_writer.write_log(sample_context) + + # Assert + mock_logger.debug.assert_called_once() + debug_msg = str(mock_logger.debug.call_args) + assert "test-hook-123" in debug_msg + assert "pull_request" in debug_msg + + def test_write_error_log_with_partial_context( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_error_log with partial context.""" + # Arrange + sample_context.success = True # Initially success + sample_context.error = None + + # Act + log_writer.write_error_log( + hook_id="test-hook-123", + event_type="pull_request", + repository="org/repo", + error_message="Early failure", + context=sample_context, + ) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["success"] is False + assert log_entry["error"] is not None + assert log_entry["error"]["message"] == "Early failure" + + def test_write_error_log_without_context(self, log_writer: StructuredLogWriter, tmp_path: Path) -> None: + """Test write_error_log creates minimal entry when no context available.""" + # Act + log_writer.write_error_log( + hook_id="test-hook-error", + event_type="push", + repository="org/repo", + error_message="Critical error", + context=None, + ) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["hook_id"] == "test-hook-error" + assert log_entry["event_type"] == "push" + assert log_entry["repository"] == "org/repo" + assert log_entry["success"] is False + assert log_entry["error"]["message"] == "Critical error" + assert log_entry["pr"] is None + assert log_entry["action"] is None + + def test_write_error_log_preserves_existing_error( + self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path + ) -> None: + """Test write_error_log preserves existing error in context.""" + # Arrange + sample_context.error = { + "type": "ExistingError", + "message": "Original error", + "traceback": "Original traceback", + } + + # Act + log_writer.write_error_log( + hook_id="test-hook-123", + event_type="pull_request", + repository="org/repo", + error_message="New error", + context=sample_context, + ) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + # Original error preserved + assert log_entry["error"]["type"] == "ExistingError" + assert log_entry["error"]["message"] == "Original error" + + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", True) + @patch("fcntl.flock") + def test_write_error_log_uses_file_locking(self, mock_flock: Mock, log_writer: StructuredLogWriter) -> None: + """Test write_error_log uses file locking when fcntl is available.""" + # Act + log_writer.write_error_log( + hook_id="test-hook", + event_type="push", + repository="org/repo", + error_message="Error message", + context=None, + ) + + # Assert + assert mock_flock.call_count >= 1 + + def test_write_error_log_handles_exception(self, log_writer: StructuredLogWriter, mock_logger: Mock) -> None: + """Test write_error_log handles exceptions gracefully.""" + # Arrange - make open fail + with patch("builtins.open", side_effect=OSError("Disk full")): + # Act + log_writer.write_error_log( + hook_id="test-hook", + event_type="push", + repository="org/repo", + error_message="Error message", + context=None, + ) + + # Assert + mock_logger.exception.assert_called_once() + + +class TestWriteWebhookLogFunction: + """Test suite for write_webhook_log module-level function.""" + + @pytest.fixture + def sample_context(self) -> WebhookContext: + """Create a sample WebhookContext for testing.""" + return WebhookContext( + hook_id="test-hook-func", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + ) + + def test_write_webhook_log_with_provided_context(self, sample_context: WebhookContext, tmp_path: Path) -> None: + """Test write_webhook_log uses provided context.""" + # Arrange + with patch("webhook_server.utils.structured_logger.Config") as mock_config_class: + mock_config = Mock() + mock_config.data_dir = str(tmp_path) + mock_config_class.return_value = mock_config + + # Act + write_webhook_log(context=sample_context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["hook_id"] == "test-hook-func" + + def test_write_webhook_log_uses_context_var(self, sample_context: WebhookContext, tmp_path: Path) -> None: + """Test write_webhook_log gets context from ContextVar when not provided.""" + # Arrange + with patch("webhook_server.utils.structured_logger.Config") as mock_config_class: + mock_config = Mock() + mock_config.data_dir = str(tmp_path) + mock_config_class.return_value = mock_config + + with patch("webhook_server.utils.structured_logger.get_context", return_value=sample_context): + # Act + write_webhook_log() + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["hook_id"] == "test-hook-func" + + def test_write_webhook_log_raises_when_no_context(self) -> None: + """Test write_webhook_log raises ValueError when no context available.""" + # Arrange + with patch("webhook_server.utils.structured_logger.get_context", return_value=None): + # Act & Assert + with pytest.raises(ValueError, match="No webhook context available"): + write_webhook_log() + + def test_write_webhook_log_creates_config_and_logger(self, sample_context: WebhookContext) -> None: + """Test write_webhook_log creates Config and logger instances.""" + # Arrange + with ( + patch("webhook_server.utils.structured_logger.Config") as mock_config_class, + patch("webhook_server.utils.structured_logger.get_logger") as mock_get_logger, + patch("webhook_server.utils.structured_logger.StructuredLogWriter") as mock_writer_class, + ): + mock_config = Mock() + mock_logger = Mock() + mock_writer = Mock() + + mock_config_class.return_value = mock_config + mock_get_logger.return_value = mock_logger + mock_writer_class.return_value = mock_writer + + # Act + write_webhook_log(context=sample_context) + + # Assert + mock_config_class.assert_called_once_with() + mock_get_logger.assert_called_once_with(name="structured_logger") + mock_writer_class.assert_called_once_with(config=mock_config, logger=mock_logger) + mock_writer.write_log.assert_called_once_with(sample_context) + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + @pytest.fixture + def mock_config(self, tmp_path: Path) -> Mock: + """Create a mock Config with temporary data directory.""" + config = Mock(spec=Config) + config.data_dir = str(tmp_path) + return config + + def test_write_log_with_unicode_content(self, mock_config: Mock, tmp_path: Path) -> None: + """Test write_log handles Unicode content correctly.""" + # Arrange + writer = StructuredLogWriter(config=mock_config) + context = WebhookContext( + hook_id="test-unicode", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + pr_number=1, + pr_title="测试 🚀 émojis", + sender="用户", + ) + + # Act + writer.write_log(context) + + # Assert + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + with open(log_file, encoding="utf-8") as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["pr"]["title"] == "测试 🚀 émojis" + assert log_entry["sender"] == "用户" + + def test_write_log_temp_file_cleanup_on_error(self, mock_config: Mock, tmp_path: Path) -> None: + """Test temporary file is cleaned up even when an error occurs.""" + # Arrange + writer = StructuredLogWriter(config=mock_config) + context = WebhookContext( + hook_id="test-cleanup", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + ) + + # Count temp files before + temp_files_before = list((tmp_path / "logs").glob(".webhooks_*.tmp")) + + # Act - cause an error during write + with patch("os.fsync", side_effect=OSError("Sync failed")): + writer.write_log(context) + + # Assert - no temp files left behind + temp_files_after = list((tmp_path / "logs").glob(".webhooks_*.tmp")) + assert len(temp_files_after) == len(temp_files_before) + + def test_write_log_handles_missing_timing_in_context_dict(self, mock_config: Mock, tmp_path: Path) -> None: + """Test write_log handles context without timing key.""" + # Arrange + writer = StructuredLogWriter(config=mock_config) + context = Mock(spec=WebhookContext) + context.hook_id = "test" + context.event_type = "push" + context.repository = "org/repo" + context.started_at = None + context.to_dict = Mock(return_value={"hook_id": "test", "event_type": "push"}) + + # Act + writer.write_log(context) + + # Assert - should not raise exception + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + def test_different_dates_create_different_files(self, mock_config: Mock, tmp_path: Path) -> None: + """Test that logs for different dates go to different files.""" + # Arrange + writer = StructuredLogWriter(config=mock_config) + date1 = datetime(2026, 1, 5, tzinfo=UTC) + date2 = datetime(2026, 1, 6, tzinfo=UTC) + + # Act + path1 = writer._get_log_file_path(date=date1) + path2 = writer._get_log_file_path(date=date2) + + # Assert + assert path1 != path2 + assert path1.name == "webhooks_2026-01-05.json" + assert path2.name == "webhooks_2026-01-06.json" + + def test_write_log_handles_temp_file_deletion_error(self, mock_config: Mock, tmp_path: Path) -> None: + """Test that write_log handles temp file deletion errors gracefully.""" + # Arrange + writer = StructuredLogWriter(config=mock_config) + context = WebhookContext( + hook_id="test-cleanup-error", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + ) + + # Mock os.unlink to raise OSError + with patch("os.unlink", side_effect=OSError("Permission denied")): + # Act - should not raise exception + writer.write_log(context) + + # Assert - log file still created successfully + log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" + assert log_file.exists() + + with open(log_file) as f: + log_entry = json.loads(f.read().strip()) + + assert log_entry["hook_id"] == "test-cleanup-error" diff --git a/webhook_server/utils/app_utils.py b/webhook_server/utils/app_utils.py index 121e4dc58..260943ff3 100644 --- a/webhook_server/utils/app_utils.py +++ b/webhook_server/utils/app_utils.py @@ -4,11 +4,15 @@ import hashlib import hmac import ipaddress +import logging +from typing import Any import httpx from fastapi import HTTPException, Request, status +from webhook_server.utils.context import WebhookContext from webhook_server.utils.helpers import get_logger_with_params +from webhook_server.web.log_viewer import LogViewerController # Constants HTTP_TIMEOUT_SECONDS: float = 10.0 @@ -117,3 +121,92 @@ def parse_datetime_string(datetime_str: str | None, field_name: str) -> datetime status_code=400, detail=f"Invalid {field_name} format: {datetime_str}. Expected ISO 8601 format. Error: {e!s}", ) from e + + +def format_duration(ms: int) -> str: + """Format milliseconds to human-readable duration string. + + Args: + ms: Duration in milliseconds + + Returns: + Human-readable duration (e.g., "3m12s", "1h5m", "500ms") + """ + if ms < 1000: + return f"{ms}ms" + + seconds = ms // 1000 + if seconds < 60: + remaining_ms = ms % 1000 + if remaining_ms > 0: + return f"{seconds}s{remaining_ms}ms" + return f"{seconds}s" + + minutes = seconds // 60 + remaining_seconds = seconds % 60 + if minutes < 60: + if remaining_seconds > 0: + return f"{minutes}m{remaining_seconds}s" + return f"{minutes}m" + + hours = minutes // 60 + remaining_minutes = minutes % 60 + if remaining_minutes > 0: + return f"{hours}h{remaining_minutes}m" + return f"{hours}h" + + +def log_webhook_summary(ctx: WebhookContext, logger: logging.Logger, log_prefix: str) -> None: + """Log a summary of webhook processing from the structured context. + + Generates a single-line summary showing: + - Success/failure status + - PR number (if applicable) + - Total duration + - Token spend + - All workflow steps with their status and duration + + Args: + ctx: WebhookContext containing execution metadata and workflow steps + logger: Logger instance to write the summary + log_prefix: Log prefix for consistent formatting + """ + # Calculate duration - completed_at is always set before this is called + if ctx.completed_at is None: + raise ValueError("Context completed_at is None - context not completed") + duration_ms = int((ctx.completed_at - ctx.started_at).total_seconds() * 1000) + + # Build summary of workflow steps - all steps have duration_ms + steps_summary = [] + for step_name, step_data in ctx.workflow_steps.items(): + status = step_data["status"] + step_duration_ms = step_data["duration_ms"] + steps_summary.append(f"{step_name}:{status}({format_duration(step_duration_ms)})") + + steps_str = ", ".join(steps_summary) if steps_summary else "no steps recorded" + + # Build final summary message + status_text = "SUCCESS" if ctx.success else "FAILED" + pr_info = f" PR#{ctx.pr_number}" if ctx.pr_number else "" + token_info = f", tokens:{ctx.token_spend}" if ctx.token_spend else "" + + logger.info( + f"{log_prefix} [{status_text}] Webhook completed{pr_info} " + f"[{format_duration(duration_ms)}{token_info}] steps=[{steps_str}]" + ) + + +def get_workflow_steps_core( + controller: LogViewerController, + hook_id: str, +) -> dict[str, Any]: + """Core logic for getting workflow step timeline data for a specific hook ID. + + Args: + controller: LogViewerController instance + hook_id: GitHub webhook delivery ID + + Returns: + dict containing workflow step timeline data + """ + return controller.get_workflow_steps(hook_id) diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py new file mode 100644 index 000000000..46ce3a1e0 --- /dev/null +++ b/webhook_server/utils/context.py @@ -0,0 +1,293 @@ +"""Webhook execution context tracking using ContextVars. + +This module provides a thread-safe, async-safe context tracking system for webhook processing. +Each webhook execution creates a WebhookContext that captures workflow steps, timing, errors, +and API metrics. + +Architecture: +- Uses ContextVar for thread-safe and async-safe context isolation +- Each webhook request gets its own isolated context +- Context persists through async operations and handler chains +- Automatically tracks workflow steps with timing and errors + +Usage: + from webhook_server.utils.context import create_context, get_context + + # Create context at webhook entry point + ctx = create_context( + hook_id="github-delivery-id", + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + action="opened", + sender="username", + ) + + # Track workflow steps + ctx.start_step("clone_repository", branch="main") + try: + await clone_repo() + ctx.complete_step("clone_repository", commit_sha="abc123") + except Exception as ex: + ctx.fail_step("clone_repository", exception=ex, traceback_str=traceback.format_exc()) + + # Get context anywhere in the call stack + ctx = get_context() + if ctx: + ctx.start_step("assign_reviewers") +""" + +from contextvars import ContextVar +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import Any + +_webhook_context: ContextVar["WebhookContext | None"] = ContextVar("webhook_context", default=None) + + +@dataclass +class WebhookContext: + """Webhook execution context with workflow tracking and metrics. + + Captures all relevant information about a webhook execution including: + - Core webhook metadata (hook_id, event_type, repository, action, sender) + - PR information when available (number, title, author) + - API user making requests + - Timing information (start, completion, step durations) + - Workflow steps with individual status and errors + - GitHub API token metrics (spend, rate limits) + - Overall execution status and errors + + Attributes: + hook_id: GitHub webhook delivery ID (X-GitHub-Delivery header) + event_type: GitHub event type (pull_request, issue_comment, check_run, etc.) + repository: Repository name (org/repo) + repository_full_name: Full repository name (org/repo) + action: Webhook action (opened, synchronize, completed, etc.) + sender: GitHub username who triggered the webhook + pr_number: Pull request number if applicable + pr_title: Pull request title if applicable + pr_author: Pull request author username if applicable + api_user: GitHub API user making requests + started_at: Webhook processing start time (UTC) + completed_at: Webhook processing completion time (UTC) + workflow_steps: Dict of workflow steps keyed by step name + token_spend: GitHub API tokens consumed (rate_limit_before - rate_limit_after) + initial_rate_limit: GitHub API rate limit at start + final_rate_limit: GitHub API rate limit at end + success: Overall execution success status + error: Top-level error details with traceback if execution failed + """ + + # Core webhook info + hook_id: str + event_type: str + repository: str + repository_full_name: str + action: str | None = None + sender: str | None = None + + # PR info (populated when available) + pr_number: int | None = None + pr_title: str | None = None + pr_author: str | None = None + + # API user + api_user: str = "" + + # Timing + started_at: datetime = field(default_factory=lambda: datetime.now(UTC)) + completed_at: datetime | None = None + + # Workflow steps - dict keyed by action name + # e.g., {"clone_repository": {...}, "assign_reviewers": {...}} + workflow_steps: dict[str, dict[str, Any]] = field(default_factory=dict) + _step_start_times: dict[str, datetime] = field(default_factory=dict) # Internal tracking + + # Token metrics + token_spend: int | None = None + initial_rate_limit: int | None = None + final_rate_limit: int | None = None + + # Final status + success: bool = True + error: dict[str, Any] | None = None # Top-level error with traceback + + def start_step(self, step_name: str, **data: Any) -> None: + """Start a workflow step. + + Records the step start time and initializes step tracking with "started" status. + Additional step metadata can be passed as keyword arguments. + + Args: + step_name: Unique identifier for this workflow step + **data: Additional step metadata (e.g., branch="main", commit_sha="abc123") + """ + now = datetime.now(UTC) + self._step_start_times[step_name] = now + self.workflow_steps[step_name] = { + "timestamp": now.isoformat(), + "status": "started", + "error": None, + **data, + } + + def complete_step(self, step_name: str, **data: Any) -> None: + """Complete a workflow step successfully. + + Marks the step as completed, calculates duration, and updates step metadata. + Additional result data can be passed as keyword arguments. + + Args: + step_name: Unique identifier for this workflow step + **data: Additional step result data (e.g., reviewers_assigned=3, labels_added=["verified"]) + """ + now = datetime.now(UTC) + start_time = self._step_start_times.get(step_name) + duration_ms = int((now - start_time).total_seconds() * 1000) if start_time else None + + if step_name not in self.workflow_steps: + self.workflow_steps[step_name] = {"timestamp": now.isoformat()} + + self.workflow_steps[step_name].update({ + "status": "completed", + "duration_ms": duration_ms, + "error": None, + **data, + }) + + def fail_step(self, step_name: str, exception: Exception, traceback_str: str, **data: Any) -> None: + """Mark a workflow step as failed with error details. + + Captures exception type, message, and full traceback. Sets the step status to "failed" + and also updates the top-level context error and success flag. + + Args: + step_name: Unique identifier for this workflow step + exception: Exception that caused the failure + traceback_str: Full traceback string (use traceback.format_exc()) + **data: Additional error context data + """ + now = datetime.now(UTC) + start_time = self._step_start_times.get(step_name) + duration_ms = int((now - start_time).total_seconds() * 1000) if start_time else None + + error_data = { + "type": type(exception).__name__, + "message": str(exception), + "traceback": traceback_str, + } + + if step_name not in self.workflow_steps: + self.workflow_steps[step_name] = {"timestamp": now.isoformat()} + + self.workflow_steps[step_name].update({ + "status": "failed", + "duration_ms": duration_ms, + "error": error_data, + **data, + }) + + # Also set top-level error + self.success = False + self.error = error_data + + def to_dict(self) -> dict[str, Any]: + """Convert context to dictionary for JSON serialization. + + Returns a complete representation of the webhook execution context including + all workflow steps, timing information, and error details. + + Returns: + Dict containing all context data in JSON-serializable format + """ + return { + "hook_id": self.hook_id, + "event_type": self.event_type, + "action": self.action, + "sender": self.sender, + "repository": self.repository, + "repository_full_name": self.repository_full_name, + "pr": { + "number": self.pr_number, + "title": self.pr_title, + "author": self.pr_author, + } + if self.pr_number + else None, + "api_user": self.api_user, + "timing": { + "started_at": self.started_at.isoformat(), + "completed_at": (self.completed_at.isoformat() if self.completed_at else None), + "duration_ms": int((self.completed_at - self.started_at).total_seconds() * 1000) + if self.completed_at + else None, + }, + "workflow_steps": self.workflow_steps, + "token_spend": self.token_spend, + "initial_rate_limit": self.initial_rate_limit, + "final_rate_limit": self.final_rate_limit, + "success": self.success, + "error": self.error, + } + + +def create_context( + hook_id: str, + event_type: str, + repository: str, + repository_full_name: str, + action: str | None = None, + sender: str | None = None, + api_user: str = "", +) -> WebhookContext: + """Create and set a new WebhookContext in the current async context. + + Creates a new context and stores it in the ContextVar for the current execution context. + This context will be accessible to all code running in the same async task. + + Args: + hook_id: GitHub webhook delivery ID (X-GitHub-Delivery header) + event_type: GitHub event type (pull_request, issue_comment, check_run, etc.) + repository: Repository name (org/repo) + repository_full_name: Full repository name (org/repo) + action: Webhook action (opened, synchronize, completed, etc.) + sender: GitHub username who triggered the webhook + api_user: GitHub API user making requests + + Returns: + The created WebhookContext instance + """ + ctx = WebhookContext( + hook_id=hook_id, + event_type=event_type, + repository=repository, + repository_full_name=repository_full_name, + action=action, + sender=sender, + api_user=api_user, + ) + _webhook_context.set(ctx) + return ctx + + +def get_context() -> WebhookContext | None: + """Get the current WebhookContext for this execution context. + + Returns the context associated with the current async task, or None if no context + has been set. This allows any code in the call stack to access the current webhook + context without explicit parameter passing. + + Returns: + The current WebhookContext, or None if no context is set + """ + return _webhook_context.get() + + +def clear_context() -> None: + """Clear the current WebhookContext. + + Removes the context from the current execution context. Should be called at the end + of webhook processing to prevent context leakage between requests. + """ + _webhook_context.set(None) diff --git a/webhook_server/utils/helpers.py b/webhook_server/utils/helpers.py index 4c9805e1b..16d9301e8 100644 --- a/webhook_server/utils/helpers.py +++ b/webhook_server/utils/helpers.py @@ -132,28 +132,6 @@ def _sanitize_log_value(value: str) -> str: return sanitized -def format_task_fields(task_id: str | None = None, task_type: str | None = None, task_status: str | None = None) -> str: - """Format task correlation fields for log messages. - - Args: - task_id: Task identifier (e.g., "check_tox", "webhook_processing") - task_type: Task type category (e.g., "ci_check", "webhook_routing") - task_status: Task status (e.g., "started", "completed", "failed") - - Returns: - Formatted string with task fields in brackets, or empty string if no fields provided. - Example: "[task_id=check_tox] [task_type=ci_check] [task_status=started]" - """ - parts = [] - if task_id: - parts.append(f"[task_id={_sanitize_log_value(task_id)}]") - if task_type: - parts.append(f"[task_type={_sanitize_log_value(task_type)}]") - if task_status: - parts.append(f"[task_status={_sanitize_log_value(task_status)}]") - return " ".join(parts) - - # Global cache for compiled regex patterns # Cache key: (tuple of secrets, case_insensitive flag) _REDACT_REGEX_CACHE: dict[tuple[tuple[str, ...], bool], re.Pattern[str]] = {} diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py new file mode 100644 index 000000000..bcbb737db --- /dev/null +++ b/webhook_server/utils/structured_logger.py @@ -0,0 +1,288 @@ +"""Structured JSON logging for webhook execution tracking. + +This module provides JSON-based logging for webhook executions in JSONL (JSON Lines) format. +Each webhook execution generates a single-line JSON entry containing all workflow steps, +timing, errors, and API metrics. + +Architecture: +- JSONL format: One JSON object per line for efficient streaming and parsing +- Date-based files: webhooks_YYYY-MM-DD.json for easy log rotation +- Atomic writes: Temporary file + rename for crash safety +- Concurrent writes: File locking to handle multiple webhook processes + +Log File Format: +- Location: {config.data_dir}/logs/webhooks_YYYY-MM-DD.json +- Format: JSONL (newline-delimited JSON) +- Rotation: Daily based on date +- Size: Unbounded (external rotation recommended) + +Usage: + from webhook_server.utils.structured_logger import write_webhook_log + from webhook_server.utils.context import get_context + + # At end of webhook processing + ctx = get_context() + write_webhook_log(ctx) + + # Or use current context automatically + write_webhook_log() +""" + +import json +import os +import tempfile +from datetime import UTC, datetime +from logging import Logger +from pathlib import Path + +from simple_logger.logger import get_logger + +from webhook_server.libs.config import Config +from webhook_server.utils.context import WebhookContext, get_context + +# Platform-specific imports for file locking +try: + import fcntl + + HAS_FCNTL = True +except ImportError: + HAS_FCNTL = False + + +class StructuredLogWriter: + """JSON log writer for webhook execution tracking. + + Writes webhook execution contexts as JSONL (JSON Lines) format to date-based log files. + Provides atomic writes with file locking for safe concurrent access. + + Attributes: + config: Configuration instance for accessing data_dir + logger: Logger instance for error reporting + log_dir: Directory path for log files ({config.data_dir}/logs/) + """ + + def __init__(self, config: Config, logger: Logger | None = None) -> None: + """Initialize the structured log writer. + + Args: + config: Configuration instance for accessing data_dir + logger: Logger instance for error reporting (creates one if not provided) + """ + self.config = config + self.logger = logger or get_logger(name="structured_logger") + self.log_dir = Path(self.config.data_dir) / "logs" + + # Create log directory if it doesn't exist + self.log_dir.mkdir(parents=True, exist_ok=True) + + def _get_log_file_path(self, date: datetime | None = None) -> Path: + """Get log file path for the specified date. + + Args: + date: Date for the log file (defaults to current UTC date) + + Returns: + Path to the log file (e.g., {log_dir}/webhooks_2026-01-05.json) + """ + if date is None: + date = datetime.now(UTC) + date_str = date.strftime("%Y-%m-%d") + return self.log_dir / f"webhooks_{date_str}.json" + + def write_log(self, context: WebhookContext) -> None: + """Write webhook context as JSONL entry to date-based log file. + + Writes a single-line JSON entry containing complete webhook execution context. + Uses atomic write pattern (temp file + rename) with file locking for safety. + + Args: + context: WebhookContext to serialize and write + + Note: + Calculates completion time locally without mutating the context + """ + # Calculate completion time locally (don't mutate context) + completed_at = datetime.now(UTC) + + # Get context dict and update timing locally + context_dict = context.to_dict() + if "timing" in context_dict: + context_dict["timing"]["completed_at"] = completed_at.isoformat() + if context.started_at: + duration_ms = int((completed_at - context.started_at).total_seconds() * 1000) + context_dict["timing"]["duration_ms"] = duration_ms + + # Get log file path + log_file = self._get_log_file_path(completed_at) + + # Serialize context to JSON (single line, no pretty printing) + log_entry = json.dumps(context_dict, ensure_ascii=False) + + # Atomic write with file locking + try: + # Write to temporary file in same directory (ensures atomic rename on same filesystem) + temp_fd, temp_path = tempfile.mkstemp( + dir=self.log_dir, + prefix=f".{log_file.name}_", + suffix=".tmp", + ) + + try: + # Acquire exclusive lock (blocks if another process is writing) + if HAS_FCNTL: + fcntl.flock(temp_fd, fcntl.LOCK_EX) + + try: + # Write JSON line with newline + os.write(temp_fd, f"{log_entry}\n".encode()) + os.fsync(temp_fd) # Ensure data is written to disk + + # Append to target log file (atomic on POSIX) + with open(log_file, "a") as log_fd: + # Acquire lock on target file + if HAS_FCNTL: + fcntl.flock(log_fd.fileno(), fcntl.LOCK_EX) + try: + # Read temp file and append to log file + os.lseek(temp_fd, 0, os.SEEK_SET) + data = os.read(temp_fd, os.path.getsize(temp_path)) + log_fd.write(data.decode("utf-8")) + log_fd.flush() + os.fsync(log_fd.fileno()) + finally: + if HAS_FCNTL: + fcntl.flock(log_fd.fileno(), fcntl.LOCK_UN) + + finally: + if HAS_FCNTL: + fcntl.flock(temp_fd, fcntl.LOCK_UN) + + finally: + os.close(temp_fd) + # Clean up temp file + try: + os.unlink(temp_path) + except OSError: + pass # Ignore errors during cleanup + + self.logger.debug( + f"Wrote webhook log entry: hook_id={context.hook_id} " + f"event={context.event_type} repository={context.repository}" + ) + + except Exception: + self.logger.exception( + f"Failed to write webhook log entry: hook_id={context.hook_id} " + f"event={context.event_type} repository={context.repository}" + ) + + def write_error_log( + self, + hook_id: str, + event_type: str, + repository: str, + error_message: str, + context: WebhookContext | None = None, + ) -> None: + """Write error log entry for early webhook failures. + + Used when webhook processing fails before context is fully populated. + Creates a minimal log entry with error details. + + Args: + hook_id: GitHub webhook delivery ID + event_type: GitHub event type + repository: Repository name + error_message: Error message describing the failure + context: Partial WebhookContext if available + """ + try: + # Use existing context if provided, otherwise create minimal entry + if context: + # Context exists but failed - mark as failed and set error + context.success = False + if not context.error: + context.error = { + "type": "WebhookProcessingError", + "message": error_message, + "traceback": "", + } + self.write_log(context) + else: + # No context - create minimal error entry + error_entry = { + "hook_id": hook_id, + "event_type": event_type, + "action": None, + "sender": None, + "repository": repository, + "repository_full_name": repository, + "pr": None, + "api_user": "", + "timing": { + "started_at": datetime.now(UTC).isoformat(), + "completed_at": datetime.now(UTC).isoformat(), + "duration_ms": 0, + }, + "workflow_steps": {}, + "token_spend": None, + "initial_rate_limit": None, + "final_rate_limit": None, + "success": False, + "error": { + "type": "WebhookProcessingError", + "message": error_message, + "traceback": "", + }, + } + + # Write to log file + log_file = self._get_log_file_path() + log_entry = json.dumps(error_entry, ensure_ascii=False) + + with open(log_file, "a") as log_fd: + if HAS_FCNTL: + fcntl.flock(log_fd.fileno(), fcntl.LOCK_EX) + try: + log_fd.write(f"{log_entry}\n") + log_fd.flush() + os.fsync(log_fd.fileno()) + finally: + if HAS_FCNTL: + fcntl.flock(log_fd.fileno(), fcntl.LOCK_UN) + + self.logger.debug( + f"Wrote error log entry: hook_id={hook_id} event={event_type} repository={repository}" + ) + + except Exception: + self.logger.exception( + f"Failed to write error log entry: hook_id={hook_id} event={event_type} repository={repository}" + ) + + +def write_webhook_log(context: WebhookContext | None = None) -> None: + """Write webhook log entry using current or provided context. + + Convenience function that handles Config and StructuredLogWriter instantiation. + Uses the current context from ContextVar if not explicitly provided. + + Args: + context: WebhookContext to log (uses get_context() if not provided) + + Raises: + ValueError: If no context is provided and get_context() returns None + """ + # Get context from ContextVar if not provided + if context is None: + context = get_context() + if context is None: + raise ValueError("No webhook context available - call create_context() first") + + # Create Config and StructuredLogWriter + config = Config() + logger = get_logger(name="structured_logger") + writer = StructuredLogWriter(config=config, logger=logger) + + # Write log entry + writer.write_log(context) diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index f0ed05d64..23e531cbe 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -533,6 +533,45 @@ def _build_log_prefix_from_context( log_prefix_parts.append(f"[PR {pr_number}]") return " ".join(log_prefix_parts) + ": " if log_prefix_parts else "" + def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: + """Get workflow steps directly from JSON logs for a specific hook ID. + + This is more efficient than parsing text logs since JSON logs contain + the full structured workflow data. + + Args: + hook_id: The hook ID to get workflow steps for + + Returns: + Dictionary with workflow steps from JSON log + + Raises: + HTTPException: 404 if hook ID not found + """ + try: + # Search JSON logs for this hook_id + for entry in self._stream_json_log_entries(max_files=25, max_entries=50000): + if entry.get("hook_id") == hook_id: + # Found the entry - return structured workflow data + return { + "hook_id": hook_id, + "event_type": entry.get("event_type"), + "action": entry.get("action"), + "repository": entry.get("repository"), + "sender": entry.get("sender"), + "pr": entry.get("pr"), + "timing": entry.get("timing"), + "steps": entry.get("workflow_steps", {}), + "token_spend": entry.get("token_spend"), + "success": entry.get("success"), + "error": entry.get("error"), + } + + raise ValueError(f"No JSON log entry found for hook ID: {hook_id}") + + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) from e + def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: """Get workflow step timeline data for a specific hook ID. @@ -546,6 +585,12 @@ def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: HTTPException: 404 if no steps found for hook ID """ try: + # First try JSON logs (more efficient and complete) + try: + return self.get_workflow_steps_json(hook_id) + except HTTPException: + # Fall back to text log parsing for backward compatibility + pass # Use streaming approach for memory efficiency filtered_entries: list[LogEntry] = [] @@ -694,6 +739,8 @@ def _stream_log_entries( This replaces _load_log_entries() to prevent memory exhaustion from loading all log files simultaneously. Uses lazy evaluation and chunked processing. + Supports both text log files (*.log) and JSON log files (webhooks_*.json). + Args: max_files: Maximum number of log files to process (newest first) _chunk_size: Number of entries to yield per chunk from each file (unused, reserved for future) @@ -708,10 +755,11 @@ def _stream_log_entries( self.logger.warning(f"Log directory not found: {log_dir}") return - # Find all log files including rotated ones (*.log, *.log.1, *.log.2, etc.) + # Find all log files including rotated ones and JSON files log_files: list[Path] = [] log_files.extend(log_dir.glob("*.log")) log_files.extend(log_dir.glob("*.log.*")) + log_files.extend(log_dir.glob("webhooks_*.json")) # Sort log files to process in correct order (current log first, then rotated by number) def sort_key(f: Path) -> tuple: @@ -744,7 +792,11 @@ def sort_key(f: Path) -> tuple: with open(log_file, encoding="utf-8") as f: for line in f: - entry = self.log_parser.parse_log_entry(line) + # Use appropriate parser based on file type + if log_file.suffix == ".json": + entry = self.log_parser.parse_json_log_entry(line) + else: + entry = self.log_parser.parse_log_entry(line) if entry: buffer.append(entry) @@ -759,6 +811,50 @@ def sort_key(f: Path) -> tuple: except Exception as e: self.logger.warning(f"Error streaming log file {log_file}: {e}") + def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000) -> Iterator[dict[str, Any]]: + """Stream raw JSON log entries from webhooks_*.json files. + + Returns raw JSON dicts instead of LogEntry objects for access to full structured data. + + Args: + max_files: Maximum number of log files to process (newest first) + max_entries: Maximum total entries to yield (safety limit) + + Yields: + Raw JSON dictionaries from log files (newest first) + """ + log_dir = self._get_log_directory() + + if not log_dir.exists(): + return + + # Find JSON log files + json_files = list(log_dir.glob("webhooks_*.json")) + # Sort by modification time (newest first) + json_files.sort(key=lambda f: f.stat().st_mtime, reverse=True) + json_files = json_files[:max_files] + + total_yielded = 0 + + for log_file in json_files: + if total_yielded >= max_entries: + break + + try: + with open(log_file, encoding="utf-8") as f: + # Read lines in reverse for newest-first ordering + lines = f.readlines() + for line in reversed(lines): + if total_yielded >= max_entries: + break + + data = self.log_parser.get_raw_json_entry(line) + if data: + yield data + total_yielded += 1 + except Exception as e: + self.logger.warning(f"Error streaming JSON log file {log_file}: {e}") + def _load_log_entries(self) -> list[LogEntry]: """Load log entries using streaming approach for memory efficiency. From 510b79e1bfca0a6308910bb12d970750a5c71afd Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 12:23:59 +0200 Subject: [PATCH 02/14] feat(logging): improve JSON log readability and prioritization Enhance structured logging with pretty-printed JSON format and improve log viewer file ordering for better user experience. Changes: - structured_logger.py: Pretty-print JSON logs with 2-space indentation and blank line separators between entries for improved readability - log_viewer.py: Prioritize JSON webhook files over internal logs in file listing to show primary data source first --- webhook_server/utils/structured_logger.py | 28 ++++++++++++----------- webhook_server/web/log_viewer.py | 15 ++++++------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py index bcbb737db..0af1f6643 100644 --- a/webhook_server/utils/structured_logger.py +++ b/webhook_server/utils/structured_logger.py @@ -1,18 +1,19 @@ """Structured JSON logging for webhook execution tracking. -This module provides JSON-based logging for webhook executions in JSONL (JSON Lines) format. -Each webhook execution generates a single-line JSON entry containing all workflow steps, +This module provides JSON-based logging for webhook executions with pretty-printed formatting. +Each webhook execution generates a formatted JSON entry containing all workflow steps, timing, errors, and API metrics. Architecture: -- JSONL format: One JSON object per line for efficient streaming and parsing +- Pretty-printed JSON: Multi-line JSON objects with 2-space indentation for human readability +- Entry separation: Blank lines between entries for visual clarity - Date-based files: webhooks_YYYY-MM-DD.json for easy log rotation - Atomic writes: Temporary file + rename for crash safety - Concurrent writes: File locking to handle multiple webhook processes Log File Format: - Location: {config.data_dir}/logs/webhooks_YYYY-MM-DD.json -- Format: JSONL (newline-delimited JSON) +- Format: Pretty-printed JSON with blank line separators - Rotation: Daily based on date - Size: Unbounded (external rotation recommended) @@ -52,7 +53,7 @@ class StructuredLogWriter: """JSON log writer for webhook execution tracking. - Writes webhook execution contexts as JSONL (JSON Lines) format to date-based log files. + Writes webhook execution contexts as pretty-printed JSON to date-based log files. Provides atomic writes with file locking for safe concurrent access. Attributes: @@ -90,9 +91,10 @@ def _get_log_file_path(self, date: datetime | None = None) -> Path: return self.log_dir / f"webhooks_{date_str}.json" def write_log(self, context: WebhookContext) -> None: - """Write webhook context as JSONL entry to date-based log file. + """Write webhook context as pretty-printed JSON to date-based log file. - Writes a single-line JSON entry containing complete webhook execution context. + Writes a formatted JSON entry (2-space indentation) containing complete webhook execution context. + Each entry is followed by a blank line for visual separation. Uses atomic write pattern (temp file + rename) with file locking for safety. Args: @@ -115,8 +117,8 @@ def write_log(self, context: WebhookContext) -> None: # Get log file path log_file = self._get_log_file_path(completed_at) - # Serialize context to JSON (single line, no pretty printing) - log_entry = json.dumps(context_dict, ensure_ascii=False) + # Serialize context to JSON (pretty-printed with indentation) + log_entry = json.dumps(context_dict, ensure_ascii=False, indent=2) # Atomic write with file locking try: @@ -133,8 +135,8 @@ def write_log(self, context: WebhookContext) -> None: fcntl.flock(temp_fd, fcntl.LOCK_EX) try: - # Write JSON line with newline - os.write(temp_fd, f"{log_entry}\n".encode()) + # Write JSON entry with newline and blank line separator + os.write(temp_fd, f"{log_entry}\n\n".encode()) os.fsync(temp_fd) # Ensure data is written to disk # Append to target log file (atomic on POSIX) @@ -238,13 +240,13 @@ def write_error_log( # Write to log file log_file = self._get_log_file_path() - log_entry = json.dumps(error_entry, ensure_ascii=False) + log_entry = json.dumps(error_entry, ensure_ascii=False, indent=2) with open(log_file, "a") as log_fd: if HAS_FCNTL: fcntl.flock(log_fd.fileno(), fcntl.LOCK_EX) try: - log_fd.write(f"{log_entry}\n") + log_fd.write(f"{log_entry}\n\n") log_fd.flush() os.fsync(log_fd.fileno()) finally: diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 23e531cbe..25963dd33 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -761,15 +761,14 @@ def _stream_log_entries( log_files.extend(log_dir.glob("*.log.*")) log_files.extend(log_dir.glob("webhooks_*.json")) - # Sort log files to process in correct order (current log first, then rotated by number) + # Sort log files to prioritize JSON webhook files first (primary data source), + # then other files by modification time (newest first) + # This ensures webhook data is displayed before internal log files def sort_key(f: Path) -> tuple: - name_parts = f.name.split(".") - if len(name_parts) > 2 and name_parts[-1].isdigit(): - # Rotated file: extract rotation number - return (1, int(name_parts[-1])) - else: - # Current log file - return (0, 0) + is_json_webhook = f.suffix == ".json" and f.name.startswith("webhooks_") + # JSON webhook files: (0, -mtime) - highest priority, newest first + # Other files: (1, -mtime) - lower priority, newest first + return (0 if is_json_webhook else 1, -f.stat().st_mtime) log_files.sort(key=sort_key) log_files = log_files[:max_files] From 5b5a6314174a8daffe09f3129b70e49c56fe2e37 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 12:27:09 +0200 Subject: [PATCH 03/14] test: fix structured logger and log API test mocks Updated test_structured_logger.py to handle pretty-printed JSON format with blank line separators instead of JSONL. Fixed test_log_api.py to properly mock file operations and stat() method for error handling test. --- webhook_server/tests/test_log_api.py | 13 ++++++++++- .../tests/test_structured_logger.py | 22 +++++++++---------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/webhook_server/tests/test_log_api.py b/webhook_server/tests/test_log_api.py index f8b71bf2b..a06a092bd 100644 --- a/webhook_server/tests/test_log_api.py +++ b/webhook_server/tests/test_log_api.py @@ -449,14 +449,25 @@ def test_stream_log_entries_parse_error(self, controller): with patch("webhook_server.web.log_viewer.Path") as mock_path: mock_path_instance = Mock() mock_path_instance.exists.return_value = True + + # Create proper mock log file with stat() method mock_log_file = Mock() mock_log_file.name = "test.log" + mock_log_file.suffix = ".log" + mock_stat = Mock() + mock_stat.st_mtime = 123456789 + mock_log_file.stat.return_value = mock_stat + mock_path_instance.glob.return_value = [mock_log_file] mock_path.return_value = mock_path_instance - with patch.object(controller.log_parser, "parse_log_file", side_effect=Exception("Parse error")): + # Mock open() to raise parse error when reading file + with patch("builtins.open", side_effect=Exception("Parse error")): result = list(controller._stream_log_entries()) + # Should return empty list due to exception handling assert isinstance(result, list) + # Verify logger.warning was called for the error + assert controller.logger.warning.called def test_get_log_directory(self, controller): """Test log directory path generation.""" diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py index e2f623033..1a7ff5743 100644 --- a/webhook_server/tests/test_structured_logger.py +++ b/webhook_server/tests/test_structured_logger.py @@ -121,13 +121,11 @@ def test_write_log_writes_valid_json( log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" assert log_file.exists() - # Read and validate JSON + # Read and validate JSON (pretty-printed format with blank line separator) with open(log_file) as f: - content = f.read() - lines = content.strip().split("\n") - assert len(lines) == 1 # Single log entry - - log_entry = json.loads(lines[0]) + content = f.read().strip() + # Pretty-printed JSON is multi-line, parse the entire content as one JSON object + log_entry = json.loads(content) assert log_entry["hook_id"] == "test-hook-123" assert log_entry["event_type"] == "pull_request" assert log_entry["repository"] == "org/repo" @@ -191,7 +189,7 @@ def test_write_log_does_not_mutate_context( def test_write_log_multiple_entries_append( self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path ) -> None: - """Test multiple writes append to same file (JSONL format).""" + """Test multiple writes append to same file (pretty-printed format).""" # Arrange context2 = WebhookContext( hook_id="test-hook-456", @@ -207,12 +205,14 @@ def test_write_log_multiple_entries_append( # Assert log_file = tmp_path / "logs" / f"webhooks_{datetime.now(UTC).strftime('%Y-%m-%d')}.json" with open(log_file) as f: - lines = f.read().strip().split("\n") + content = f.read().strip() - assert len(lines) == 2 + # Split by double newline to separate pretty-printed JSON entries + json_blocks = content.split("\n\n") + assert len(json_blocks) == 2 - entry1 = json.loads(lines[0]) - entry2 = json.loads(lines[1]) + entry1 = json.loads(json_blocks[0]) + entry2 = json.loads(json_blocks[1]) assert entry1["hook_id"] == "test-hook-123" assert entry2["hook_id"] == "test-hook-456" From be1a11ff15dfb01a5f75509f0ee12d5e42fbae06 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 5 Jan 2026 08:02:49 +0000 Subject: [PATCH 04/14] ci(deps): lock file maintenance (#967) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Update | Change | |---|---| | lockFileMaintenance | All locks refreshed | 🔧 This Pull Request updates lock files to use the latest dependency versions. --- ### Configuration 📅 **Schedule**: Branch creation - Between 12:00 AM and 03:59 AM, only on Monday ( * 0-3 * * 1 ) (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR is behind base branch, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://redirect.github.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/myk-org/github-webhook-server). Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- uv.lock | 74 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/uv.lock b/uv.lock index fc1f18108..1af44a479 100644 --- a/uv.lock +++ b/uv.lock @@ -128,11 +128,11 @@ wheels = [ [[package]] name = "certifi" -version = "2025.11.12" +version = "2026.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] [[package]] @@ -772,24 +772,24 @@ wheels = [ [[package]] name = "psutil" -version = "7.2.0" +version = "7.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/7c/31d1c3ceb1260301f87565f50689dc6da3db427ece1e1e012af22abca54e/psutil-7.2.0.tar.gz", hash = "sha256:2e4f8e1552f77d14dc96fb0f6240c5b34a37081c0889f0853b3b29a496e5ef64", size = 489863, upload-time = "2025-12-23T20:26:24.616Z" } +sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/8e/b35aae6ed19bc4e2286cac4832e4d522fcf00571867b0a85a3f77ef96a80/psutil-7.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c31e927555539132a00380c971816ea43d089bf4bd5f3e918ed8c16776d68474", size = 129593, upload-time = "2025-12-23T20:26:28.019Z" }, - { url = "https://files.pythonhosted.org/packages/61/a2/773d17d74e122bbffe08b97f73f2d4a01ef53fb03b98e61b8e4f64a9c6b9/psutil-7.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:db8e44e766cef86dea47d9a1fa535d38dc76449e5878a92f33683b7dba5bfcb2", size = 130104, upload-time = "2025-12-23T20:26:30.27Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e3/d3a9b3f4bd231abbd70a988beb2e3edd15306051bccbfc4472bd34a56e01/psutil-7.2.0-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85ef849ac92169dedc59a7ac2fb565f47b3468fbe1524bf748746bc21afb94c7", size = 180579, upload-time = "2025-12-23T20:26:32.628Z" }, - { url = "https://files.pythonhosted.org/packages/66/f8/6c73044424aabe1b7824d4d4504029d406648286d8fe7ba8c4682e0d3042/psutil-7.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26782bdbae2f5c14ce9ebe8ad2411dc2ca870495e0cd90f8910ede7fa5e27117", size = 183171, upload-time = "2025-12-23T20:26:34.972Z" }, - { url = "https://files.pythonhosted.org/packages/48/7d/76d7a863340885d41826562225a566683e653ee6c9ba03c9f3856afa7d80/psutil-7.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b7665f612d3b38a583391b95969667a53aaf6c5706dc27a602c9a4874fbf09e4", size = 139055, upload-time = "2025-12-23T20:26:36.848Z" }, - { url = "https://files.pythonhosted.org/packages/a0/48/200054ada0ae4872c8a71db54f3eb6a9af4101680ee6830d373b7fda526b/psutil-7.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4413373c174520ae28a24a8974ad8ce6b21f060d27dde94e25f8c73a7effe57a", size = 134737, upload-time = "2025-12-23T20:26:38.784Z" }, - { url = "https://files.pythonhosted.org/packages/40/c5/a49160bf3e165b7b93a60579a353cf5d939d7f878fe5fd369110f1d18043/psutil-7.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:977a2fcd132d15cb05b32b2d85b98d087cad039b0ce435731670ba74da9e6133", size = 128116, upload-time = "2025-12-23T20:26:53.516Z" }, - { url = "https://files.pythonhosted.org/packages/10/a1/c75feb480f60cd768fb6ed00ac362a16a33e5076ec8475a22d8162fb2659/psutil-7.2.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:24151011c21fadd94214d7139d7c6c54569290d7e553989bdf0eab73b13beb8c", size = 128925, upload-time = "2025-12-23T20:26:55.573Z" }, - { url = "https://files.pythonhosted.org/packages/12/ff/e93136587c00a543f4bc768b157fac2c47cd77b180d4f4e5c6efb6ea53a2/psutil-7.2.0-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91f211ba9279e7c61d9d8f84b713cfc38fa161cb0597d5cb3f1ca742f6848254", size = 154666, upload-time = "2025-12-23T20:26:57.312Z" }, - { url = "https://files.pythonhosted.org/packages/b8/dd/4c2de9c3827c892599d277a69d2224136800870a8a88a80981de905de28d/psutil-7.2.0-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f37415188b7ea98faf90fed51131181646c59098b077550246e2e092e127418b", size = 156109, upload-time = "2025-12-23T20:26:58.851Z" }, - { url = "https://files.pythonhosted.org/packages/81/3f/090943c682d3629968dd0b04826ddcbc760ee1379021dbe316e2ddfcd01b/psutil-7.2.0-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0d12c7ce6ed1128cd81fd54606afa054ac7dbb9773469ebb58cf2f171c49f2ac", size = 148081, upload-time = "2025-12-23T20:27:01.318Z" }, - { url = "https://files.pythonhosted.org/packages/c4/88/c39648ebb8ec182d0364af53cdefe6eddb5f3872ba718b5855a8ff65d6d4/psutil-7.2.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ca0faef7976530940dcd39bc5382d0d0d5eb023b186a4901ca341bd8d8684151", size = 147376, upload-time = "2025-12-23T20:27:03.347Z" }, - { url = "https://files.pythonhosted.org/packages/01/a2/5b39e08bd9b27476bc7cce7e21c71a481ad60b81ffac49baf02687a50d7f/psutil-7.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:abdb74137ca232d20250e9ad471f58d500e7743bc8253ba0bfbf26e570c0e437", size = 136910, upload-time = "2025-12-23T20:27:05.289Z" }, - { url = "https://files.pythonhosted.org/packages/59/54/53839db1258c1eaeb4ded57ff202144ebc75b23facc05a74fd98d338b0c6/psutil-7.2.0-cp37-abi3-win_arm64.whl", hash = "sha256:284e71038b3139e7ab3834b63b3eb5aa5565fcd61a681ec746ef9a0a8c457fd2", size = 133807, upload-time = "2025-12-23T20:27:06.825Z" }, + { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" }, + { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" }, + { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" }, + { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload-time = "2025-12-29T08:26:12.483Z" }, + { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload-time = "2025-12-29T08:26:14.528Z" }, + { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" }, + { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" }, + { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" }, + { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" }, + { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" }, + { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload-time = "2025-12-29T08:26:41.036Z" }, + { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" }, ] [[package]] @@ -927,27 +927,25 @@ crypto = [ [[package]] name = "pynacl" -version = "1.6.1" +version = "1.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b2/46/aeca065d227e2265125aea590c9c47fbf5786128c9400ee0eb7c88931f06/pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d", size = 3506616, upload-time = "2025-11-10T16:02:13.195Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/9a/4019b524b03a13438637b11538c82781a5eda427394380381af8f04f467a/pynacl-1.6.2.tar.gz", hash = "sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c", size = 3511692, upload-time = "2026-01-01T17:48:10.851Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/41/3cfb3b4f3519f6ff62bf71bf1722547644bcfb1b05b8fdbdc300249ba113/pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021", size = 387591, upload-time = "2025-11-10T16:01:49.1Z" }, - { url = "https://files.pythonhosted.org/packages/18/21/b8a6563637799f617a3960f659513eccb3fcc655d5fc2be6e9dc6416826f/pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993", size = 798866, upload-time = "2025-11-10T16:01:55.688Z" }, - { url = "https://files.pythonhosted.org/packages/e8/6c/dc38033bc3ea461e05ae8f15a81e0e67ab9a01861d352ae971c99de23e7c/pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078", size = 1398001, upload-time = "2025-11-10T16:01:57.101Z" }, - { url = "https://files.pythonhosted.org/packages/9f/05/3ec0796a9917100a62c5073b20c4bce7bf0fea49e99b7906d1699cc7b61b/pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc", size = 834024, upload-time = "2025-11-10T16:01:50.228Z" }, - { url = "https://files.pythonhosted.org/packages/f0/b7/ae9982be0f344f58d9c64a1c25d1f0125c79201634efe3c87305ac7cb3e3/pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9", size = 1436766, upload-time = "2025-11-10T16:01:51.886Z" }, - { url = "https://files.pythonhosted.org/packages/b4/51/b2ccbf89cf3025a02e044dd68a365cad593ebf70f532299f2c047d2b7714/pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7", size = 817275, upload-time = "2025-11-10T16:01:53.351Z" }, - { url = "https://files.pythonhosted.org/packages/a8/6c/dd9ee8214edf63ac563b08a9b30f98d116942b621d39a751ac3256694536/pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8", size = 1401891, upload-time = "2025-11-10T16:01:54.587Z" }, - { url = "https://files.pythonhosted.org/packages/0f/c1/97d3e1c83772d78ee1db3053fd674bc6c524afbace2bfe8d419fd55d7ed1/pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0", size = 772291, upload-time = "2025-11-10T16:01:58.111Z" }, - { url = "https://files.pythonhosted.org/packages/4d/ca/691ff2fe12f3bb3e43e8e8df4b806f6384593d427f635104d337b8e00291/pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0", size = 1370839, upload-time = "2025-11-10T16:01:59.252Z" }, - { url = "https://files.pythonhosted.org/packages/30/27/06fe5389d30391fce006442246062cc35773c84fbcad0209fbbf5e173734/pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c", size = 791371, upload-time = "2025-11-10T16:02:01.075Z" }, - { url = "https://files.pythonhosted.org/packages/2c/7a/e2bde8c9d39074a5aa046c7d7953401608d1f16f71e237f4bef3fb9d7e49/pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe", size = 1363031, upload-time = "2025-11-10T16:02:02.656Z" }, - { url = "https://files.pythonhosted.org/packages/dd/b6/63fd77264dae1087770a1bb414bc604470f58fbc21d83822fc9c76248076/pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde", size = 226585, upload-time = "2025-11-10T16:02:07.116Z" }, - { url = "https://files.pythonhosted.org/packages/12/c8/b419180f3fdb72ab4d45e1d88580761c267c7ca6eda9a20dcbcba254efe6/pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21", size = 238923, upload-time = "2025-11-10T16:02:04.401Z" }, - { url = "https://files.pythonhosted.org/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" }, + { url = "https://files.pythonhosted.org/packages/be/7b/4845bbf88e94586ec47a432da4e9107e3fc3ce37eb412b1398630a37f7dd/pynacl-1.6.2-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:c949ea47e4206af7c8f604b8278093b674f7c79ed0d4719cc836902bf4517465", size = 388458, upload-time = "2026-01-01T17:32:16.829Z" }, + { url = "https://files.pythonhosted.org/packages/1e/b4/e927e0653ba63b02a4ca5b4d852a8d1d678afbf69b3dbf9c4d0785ac905c/pynacl-1.6.2-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8845c0631c0be43abdd865511c41eab235e0be69c81dc66a50911594198679b0", size = 800020, upload-time = "2026-01-01T17:32:18.34Z" }, + { url = "https://files.pythonhosted.org/packages/7f/81/d60984052df5c97b1d24365bc1e30024379b42c4edcd79d2436b1b9806f2/pynacl-1.6.2-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:22de65bb9010a725b0dac248f353bb072969c94fa8d6b1f34b87d7953cf7bbe4", size = 1399174, upload-time = "2026-01-01T17:32:20.239Z" }, + { url = "https://files.pythonhosted.org/packages/68/f7/322f2f9915c4ef27d140101dd0ed26b479f7e6f5f183590fd32dfc48c4d3/pynacl-1.6.2-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46065496ab748469cdd999246d17e301b2c24ae2fdf739132e580a0e94c94a87", size = 835085, upload-time = "2026-01-01T17:32:22.24Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d0/f301f83ac8dbe53442c5a43f6a39016f94f754d7a9815a875b65e218a307/pynacl-1.6.2-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a66d6fb6ae7661c58995f9c6435bda2b1e68b54b598a6a10247bfcdadac996c", size = 1437614, upload-time = "2026-01-01T17:32:23.766Z" }, + { url = "https://files.pythonhosted.org/packages/c4/58/fc6e649762b029315325ace1a8c6be66125e42f67416d3dbd47b69563d61/pynacl-1.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:26bfcd00dcf2cf160f122186af731ae30ab120c18e8375684ec2670dccd28130", size = 818251, upload-time = "2026-01-01T17:32:25.69Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a8/b917096b1accc9acd878819a49d3d84875731a41eb665f6ebc826b1af99e/pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6", size = 1402859, upload-time = "2026-01-01T17:32:27.215Z" }, + { url = "https://files.pythonhosted.org/packages/85/42/fe60b5f4473e12c72f977548e4028156f4d340b884c635ec6b063fe7e9a5/pynacl-1.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68be3a09455743ff9505491220b64440ced8973fe930f270c8e07ccfa25b1f9e", size = 791926, upload-time = "2026-01-01T17:32:29.314Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f9/e40e318c604259301cc091a2a63f237d9e7b424c4851cafaea4ea7c4834e/pynacl-1.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b097553b380236d51ed11356c953bf8ce36a29a3e596e934ecabe76c985a577", size = 1363101, upload-time = "2026-01-01T17:32:31.263Z" }, + { url = "https://files.pythonhosted.org/packages/48/47/e761c254f410c023a469284a9bc210933e18588ca87706ae93002c05114c/pynacl-1.6.2-cp38-abi3-win32.whl", hash = "sha256:5811c72b473b2f38f7e2a3dc4f8642e3a3e9b5e7317266e4ced1fba85cae41aa", size = 227421, upload-time = "2026-01-01T17:32:33.076Z" }, + { url = "https://files.pythonhosted.org/packages/41/ad/334600e8cacc7d86587fe5f565480fde569dfb487389c8e1be56ac21d8ac/pynacl-1.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:62985f233210dee6548c223301b6c25440852e13d59a8b81490203c3227c5ba0", size = 239754, upload-time = "2026-01-01T17:32:34.557Z" }, + { url = "https://files.pythonhosted.org/packages/29/7d/5945b5af29534641820d3bd7b00962abbbdfee84ec7e19f0d5b3175f9a31/pynacl-1.6.2-cp38-abi3-win_arm64.whl", hash = "sha256:834a43af110f743a754448463e8fd61259cd4ab5bbedcf70f9dabad1d28a394c", size = 184801, upload-time = "2026-01-01T17:32:36.309Z" }, ] [[package]] @@ -1237,15 +1235,15 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.1.1" +version = "3.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/08/8f554b0e5bad3e4e880521a1686d96c05198471eed860b0eb89b57ea3636/sse_starlette-3.1.1.tar.gz", hash = "sha256:bffa531420c1793ab224f63648c059bcadc412bf9fdb1301ac8de1cf9a67b7fb", size = 24306, upload-time = "2025-12-26T15:22:53.836Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/34/f5df66cb383efdbf4f2db23cabb27f51b1dcb737efaf8a558f6f1d195134/sse_starlette-3.1.2.tar.gz", hash = "sha256:55eff034207a83a0eb86de9a68099bd0157838f0b8b999a1b742005c71e33618", size = 26303, upload-time = "2025-12-31T08:02:20.023Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/31/4c281581a0f8de137b710a07f65518b34bcf333b201cfa06cfda9af05f8a/sse_starlette-3.1.1-py3-none-any.whl", hash = "sha256:bb38f71ae74cfd86b529907a9fda5632195dfa6ae120f214ea4c890c7ee9d436", size = 12442, upload-time = "2025-12-26T15:22:52.911Z" }, + { url = "https://files.pythonhosted.org/packages/b7/95/8c4b76eec9ae574474e5d2997557cebf764bcd3586458956c30631ae08f4/sse_starlette-3.1.2-py3-none-any.whl", hash = "sha256:cd800dd349f4521b317b9391d3796fa97b71748a4da9b9e00aafab32dda375c8", size = 12484, upload-time = "2025-12-31T08:02:18.894Z" }, ] [[package]] From 38dc3709166e03c0aa15ab5e609e63d0dcc015d8 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 13:19:08 +0200 Subject: [PATCH 05/14] fix: address CodeRabbit review comments for structured logging - Fix CancelledError handling in handlers (re-raise instead of catching) - Add fail_step() with traceback in exception handlers - Use logger.exception() instead of logger.error() for automatic traceback - Add from __future__ import annotations for type annotation fixes - Fix test mocks for OwnersFileHandler.initialize - Rename test_stream_log_entries_parse_error to test_stream_log_entries_file_read_error - Memory-efficient streaming with collections.deque in log_viewer - Prefer context.completed_at as timing source of truth --- .gitignore | 1 - webhook_server/app.py | 4 + .../libs/handlers/issue_comment_handler.py | 99 ++-- .../libs/handlers/owners_files_handler.py | 56 +- .../libs/handlers/pull_request_handler.py | 7 +- .../handlers/pull_request_review_handler.py | 69 +-- webhook_server/libs/handlers/push_handler.py | 9 +- webhook_server/libs/log_parser.py | 4 +- webhook_server/tests/test_context.py | 481 +++++++++++++++++- webhook_server/tests/test_github_api.py | 25 +- .../tests/test_issue_comment_handler.py | 5 +- webhook_server/tests/test_log_api.py | 8 +- webhook_server/tests/test_log_parser.py | 26 +- webhook_server/tests/test_log_viewer.py | 1 + .../tests/test_structured_logger.py | 6 +- webhook_server/utils/context.py | 144 +++++- webhook_server/utils/structured_logger.py | 8 +- webhook_server/web/log_viewer.py | 10 +- 18 files changed, 832 insertions(+), 131 deletions(-) diff --git a/.gitignore b/.gitignore index f79fe781d..fefaeebb6 100644 --- a/.gitignore +++ b/.gitignore @@ -158,7 +158,6 @@ find_unused_code.py # AI .cursor/ -CLAUDE.md .agent-os/ .cursorrules .claude/ diff --git a/webhook_server/app.py b/webhook_server/app.py index 8f8d096d3..a11a22a03 100644 --- a/webhook_server/app.py +++ b/webhook_server/app.py @@ -434,6 +434,10 @@ async def process_with_error_handling( "message": str(ex), "traceback": traceback.format_exc(), } + except asyncio.CancelledError: + # Task cancellation - propagate without logging as error + _logger.debug(f"{_log_context} Webhook processing cancelled") + raise except Exception as ex: # Catch-all for unexpected errors _logger.exception(f"{_log_context} Unexpected error in background webhook processing") diff --git a/webhook_server/libs/handlers/issue_comment_handler.py b/webhook_server/libs/handlers/issue_comment_handler.py index 6dfef06eb..9c93761a8 100644 --- a/webhook_server/libs/handlers/issue_comment_handler.py +++ b/webhook_server/libs/handlers/issue_comment_handler.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import traceback from asyncio import Task from collections.abc import Callable, Coroutine from typing import TYPE_CHECKING, Any @@ -66,53 +67,73 @@ async def process_comment_webhook_data(self, pull_request: PullRequest) -> None: if self.ctx: self.ctx.start_step("issue_comment_handler") - comment_action = self.hook_data["action"] + try: + comment_action = self.hook_data["action"] - if comment_action in ("edited", "deleted"): - self.logger.debug(f"{self.log_prefix} Not processing comment. action is {comment_action}") - if self.ctx: - self.ctx.complete_step("issue_comment_handler") - return - - self.logger.info(f"{self.log_prefix} Processing issue {self.hook_data['issue']['number']}") - - body: str = self.hook_data["comment"]["body"] - - if self.github_webhook.issue_url_for_welcome_msg in body: - self.logger.debug(f"{self.log_prefix} Welcome message found in issue {pull_request.title}. Not processing") - if self.ctx: - self.ctx.complete_step("issue_comment_handler") - return + if comment_action in ("edited", "deleted"): + self.logger.debug(f"{self.log_prefix} Not processing comment. action is {comment_action}") + if self.ctx: + self.ctx.complete_step("issue_comment_handler") + return - _user_commands: list[str] = [_cmd.strip("/") for _cmd in body.strip().splitlines() if _cmd.startswith("/")] + self.logger.info(f"{self.log_prefix} Processing issue {self.hook_data['issue']['number']}") - user_login: str = self.hook_data["sender"]["login"] + body: str = self.hook_data["comment"]["body"] - # Execute all commands in parallel - if _user_commands: - tasks: list[Coroutine[Any, Any, Any] | Task[Any]] = [] - for user_command in _user_commands: - task = asyncio.create_task( - self.user_commands( - pull_request=pull_request, - command=user_command, - reviewed_user=user_login, - issue_comment_id=self.hook_data["comment"]["id"], - ) + if self.github_webhook.issue_url_for_welcome_msg in body: + self.logger.debug( + f"{self.log_prefix} Welcome message found in issue {pull_request.title}. Not processing" ) - tasks.append(task) + if self.ctx: + self.ctx.complete_step("issue_comment_handler") + return - # Execute all commands concurrently - results = await asyncio.gather(*tasks, return_exceptions=True) + _user_commands: list[str] = [_cmd.strip("/") for _cmd in body.strip().splitlines() if _cmd.startswith("/")] + + user_login: str = self.hook_data["sender"]["login"] + + # Execute all commands in parallel + if _user_commands: + tasks: list[Coroutine[Any, Any, Any] | Task[Any]] = [] + for user_command in _user_commands: + task = asyncio.create_task( + self.user_commands( + pull_request=pull_request, + command=user_command, + reviewed_user=user_login, + issue_comment_id=self.hook_data["comment"]["id"], + ) + ) + tasks.append(task) + + # Execute all commands concurrently + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Check if any command failed + failed_commands: list[tuple[str, Exception]] = [] + for idx, result in enumerate(results): + user_command = _user_commands[idx] + if isinstance(result, Exception): + self.logger.error(f"{self.log_prefix} Command execution failed: /{user_command} - {result}") + failed_commands.append((user_command, result)) + + # If any command failed, mark step as failed + if failed_commands: + # Use first exception for context failure + first_failed_command, first_exception = failed_commands[0] + error_msg = f"Command /{first_failed_command} failed: {first_exception}" + if self.ctx: + self.ctx.fail_step("issue_comment_handler", first_exception, traceback.format_exc()) + raise RuntimeError(error_msg) from first_exception - # Log results and handle exceptions - for idx, result in enumerate(results): - user_command = _user_commands[idx] - if isinstance(result, Exception): - self.logger.error(f"{self.log_prefix} Command execution failed: /{user_command} - {result}") + if self.ctx: + self.ctx.complete_step("issue_comment_handler") - if self.ctx: - self.ctx.complete_step("issue_comment_handler") + except Exception as ex: + # If step not already failed, mark it as failed + if self.ctx and not self.ctx.workflow_steps.get("issue_comment_handler", {}).get("status") == "failed": + self.ctx.fail_step("issue_comment_handler", ex, traceback.format_exc()) + raise async def user_commands( self, pull_request: PullRequest, command: str, reviewed_user: str, issue_comment_id: int diff --git a/webhook_server/libs/handlers/owners_files_handler.py b/webhook_server/libs/handlers/owners_files_handler.py index c734fc587..77f1e49d6 100644 --- a/webhook_server/libs/handlers/owners_files_handler.py +++ b/webhook_server/libs/handlers/owners_files_handler.py @@ -94,31 +94,40 @@ async def list_changed_files(self, pull_request: PullRequest) -> list[str]: Returns: List of changed file paths relative to repository root + + Raises: + RuntimeError: If git diff command fails + asyncio.CancelledError: Propagates cancellation (never caught) """ - try: - # Get base and head SHAs (wrap property accesses in asyncio.to_thread) - base_sha, head_sha = await asyncio.gather( - asyncio.to_thread(lambda: pull_request.base.sha), - asyncio.to_thread(lambda: pull_request.head.sha), - ) + # Get base and head SHAs (wrap property accesses in asyncio.to_thread) + base_sha, head_sha = await asyncio.gather( + asyncio.to_thread(lambda: pull_request.base.sha), + asyncio.to_thread(lambda: pull_request.head.sha), + ) - # Run git diff command on cloned repository - # Quote clone_repo_dir to handle paths with spaces or special characters - git_diff_command = ( - f"git -C {shlex.quote(self.github_webhook.clone_repo_dir)} diff --name-only {base_sha}...{head_sha}" - ) + # Run git diff command on cloned repository + # Quote clone_repo_dir to handle paths with spaces or special characters + git_diff_command = ( + f"git -C {shlex.quote(self.github_webhook.clone_repo_dir)} diff --name-only {base_sha}...{head_sha}" + ) - success, out, _ = await run_command( + try: + success, out, err = await run_command( command=git_diff_command, log_prefix=self.log_prefix, verify_stderr=False, mask_sensitive=self.github_webhook.mask_sensitive, ) - # Check success flag - return empty list if git diff failed + # Check success flag - raise if git diff failed if not success: - self.logger.error(f"{self.log_prefix} git diff command failed") - return [] + error_msg = ( + f"git diff command failed for {base_sha}...{head_sha}. " + f"stdout: {out.strip() if out else '(empty)'}, " + f"stderr: {err.strip() if err else '(empty)'}" + ) + self.logger.error(f"{self.log_prefix} {error_msg}") + raise RuntimeError(error_msg) # Parse output: split by newlines and filter empty lines changed_files = [line.strip() for line in out.splitlines() if line.strip()] @@ -126,10 +135,19 @@ async def list_changed_files(self, pull_request: PullRequest) -> list[str]: self.logger.debug(f"{self.log_prefix} Changed files: {changed_files}") return changed_files - except Exception: - # Log error and return empty list if git diff fails - self.logger.exception(f"{self.log_prefix} Failed to get changed files via git diff") - return [] + except asyncio.CancelledError: + # Never catch CancelledError - let it propagate + raise + + except RuntimeError: + # Re-raise RuntimeError from git diff failure check + raise + + except Exception as ex: + # Wrap unexpected exceptions with context + error_msg = f"Unexpected error getting changed files via git diff for {base_sha}...{head_sha}: {ex}" + self.logger.exception(f"{self.log_prefix} {error_msg}") + raise RuntimeError(error_msg) from ex def _validate_owners_content(self, content: Any, path: str) -> bool: """Validate OWNERS file content structure.""" diff --git a/webhook_server/libs/handlers/pull_request_handler.py b/webhook_server/libs/handlers/pull_request_handler.py index 2f888c3e2..1776e9065 100644 --- a/webhook_server/libs/handlers/pull_request_handler.py +++ b/webhook_server/libs/handlers/pull_request_handler.py @@ -894,10 +894,11 @@ async def check_if_can_be_merged(self, pull_request: PullRequest) -> None: if self.ctx: self.ctx.complete_step("check_merge_eligibility", can_merge=False, reason=failure_output) + except asyncio.CancelledError: + self.logger.debug(f"{self.log_prefix} Merge check cancelled") + raise except Exception as ex: - self.logger.error( - f"{self.log_prefix} Failed to check if can be merged, set check run to {FAILURE_STR} {ex}" - ) + self.logger.exception(f"{self.log_prefix} Failed to check if can be merged, set check run to {FAILURE_STR}") _err = "Failed to check if can be merged, check logs" output["text"] = _err await self.labels_handler._remove_label(pull_request=pull_request, label=CAN_BE_MERGED_STR) diff --git a/webhook_server/libs/handlers/pull_request_review_handler.py b/webhook_server/libs/handlers/pull_request_review_handler.py index c2d633f62..eac1720fd 100644 --- a/webhook_server/libs/handlers/pull_request_review_handler.py +++ b/webhook_server/libs/handlers/pull_request_review_handler.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import TYPE_CHECKING from github.PullRequest import PullRequest @@ -12,7 +14,7 @@ class PullRequestReviewHandler: - def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersFileHandler): + def __init__(self, github_webhook: GithubWebhook, owners_file_handler: OwnersFileHandler) -> None: self.github_webhook = github_webhook self.ctx: WebhookContext | None = github_webhook.ctx self.owners_file_handler = owners_file_handler @@ -27,36 +29,37 @@ async def process_pull_request_review_webhook_data(self, pull_request: PullReque if self.ctx: self.ctx.start_step("pr_review_handler") - if self.hook_data["action"] == "submitted": - """ - Available actions: - commented - approved - changes_requested - """ - reviewed_user = self.hook_data["review"]["user"]["login"] - review_state = self.hook_data["review"]["state"] - self.github_webhook.logger.debug( - f"{self.github_webhook.log_prefix} " - f"Processing pull request review for user {reviewed_user} with state {review_state}" - ) - - await self.labels_handler.manage_reviewed_by_label( - pull_request=pull_request, - review_state=review_state, - action=ADD_STR, - reviewed_user=reviewed_user, - ) - - if body := self.hook_data["review"]["body"]: - self.github_webhook.logger.debug(f"{self.github_webhook.log_prefix} Found review body: {body}") - if f"/{APPROVE_STR}" in body: - await self.labels_handler.label_by_user_comment( - pull_request=pull_request, - user_requested_label=APPROVE_STR, - remove=False, - reviewed_user=reviewed_user, - ) + try: + if self.hook_data["action"] == "submitted": + """ + Available actions: + commented + approved + changes_requested + """ + reviewed_user = self.hook_data["review"]["user"]["login"] + review_state = self.hook_data["review"]["state"] + self.github_webhook.logger.debug( + f"{self.github_webhook.log_prefix} " + f"Processing pull request review for user {reviewed_user} with state {review_state}" + ) - if self.ctx: - self.ctx.complete_step("pr_review_handler") + await self.labels_handler.manage_reviewed_by_label( + pull_request=pull_request, + review_state=review_state, + action=ADD_STR, + reviewed_user=reviewed_user, + ) + + if body := self.hook_data["review"]["body"]: + self.github_webhook.logger.debug(f"{self.github_webhook.log_prefix} Found review body: {body}") + if f"/{APPROVE_STR}" in body: + await self.labels_handler.label_by_user_comment( + pull_request=pull_request, + user_requested_label=APPROVE_STR, + remove=False, + reviewed_user=reviewed_user, + ) + finally: + if self.ctx: + self.ctx.complete_step("pr_review_handler") diff --git a/webhook_server/libs/handlers/push_handler.py b/webhook_server/libs/handlers/push_handler.py index 5499408d5..16d565971 100644 --- a/webhook_server/libs/handlers/push_handler.py +++ b/webhook_server/libs/handlers/push_handler.py @@ -1,5 +1,6 @@ import asyncio import re +import traceback from typing import TYPE_CHECKING from github.Repository import Repository @@ -39,8 +40,11 @@ async def process_push_webhook_data(self) -> None: self.logger.info(f"{self.log_prefix} Processing upload to pypi for tag: {tag_name}") try: await self.upload_to_pypi(tag_name=tag_name) - except Exception: + except Exception as ex: self.logger.exception(f"{self.log_prefix} PyPI upload failed") + if self.ctx: + self.ctx.fail_step("push_handler", ex, traceback.format_exc()) + return if self.github_webhook.build_and_push_container and self.github_webhook.container_release: self.logger.info(f"{self.log_prefix} Processing build and push container for tag: {tag_name}") @@ -49,6 +53,9 @@ async def process_push_webhook_data(self) -> None: # Note: run_build_container logs completion/failure internally except Exception as ex: self.logger.exception(f"{self.log_prefix} Container build and push failed: {ex}") + if self.ctx: + self.ctx.fail_step("push_handler", ex, traceback.format_exc()) + return if self.ctx: self.ctx.complete_step("push_handler") diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index fb66ff72b..9eb4eafa2 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -415,9 +415,9 @@ def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: if entry: entries.append(entry) except OSError as e: - self.logger.error(f"Failed to read JSON log file {file_path}: {e}") + self.logger.exception(f"Failed to read JSON log file {file_path}: {e}") except UnicodeDecodeError as e: - self.logger.error(f"Failed to decode JSON log file {file_path}: {e}") + self.logger.exception(f"Failed to decode JSON log file {file_path}: {e}") return entries diff --git a/webhook_server/tests/test_context.py b/webhook_server/tests/test_context.py index 3627e0e57..0e4b1ea96 100644 --- a/webhook_server/tests/test_context.py +++ b/webhook_server/tests/test_context.py @@ -3,6 +3,8 @@ Tests WebhookContext dataclass and module-level context management functions. """ +from __future__ import annotations + from datetime import UTC, datetime, timedelta from unittest.mock import patch @@ -21,7 +23,7 @@ def mock_datetime(): """Mock datetime.now(UTC) for deterministic tests.""" base_time = datetime(2024, 1, 15, 10, 30, 0, tzinfo=UTC) - def mock_now(tz=None): + def mock_now(tz: datetime.tzinfo | None = None) -> datetime: if tz == UTC: return base_time return datetime.now(tz) @@ -587,3 +589,480 @@ def test_workflow_with_failed_step(self): assert ctx.success is False assert ctx.error is not None assert ctx.error["type"] == "AssertionError" + + +class TestCompleteStepSmartFiltering: + """Tests for complete_step() smart filtering of verbose output.""" + + def test_complete_step_filters_reason_on_success_can_merge_true(self): + """Test complete_step() filters 'reason' field when can_merge=True.""" + ctx = WebhookContext( + hook_id="hook-filter-1", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("check_merge_eligibility") + ctx.complete_step("check_merge_eligibility", can_merge=True, reason="All checks passed") + + step = ctx.workflow_steps["check_merge_eligibility"] + assert step["status"] == "completed" + assert step["can_merge"] is True + assert "reason" not in step # Reason filtered out on success + + def test_complete_step_includes_reason_on_failure_can_merge_false(self): + """Test complete_step() includes 'reason' field when can_merge=False.""" + ctx = WebhookContext( + hook_id="hook-filter-2", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("check_merge_eligibility") + ctx.complete_step("check_merge_eligibility", can_merge=False, reason="Missing approver") + + step = ctx.workflow_steps["check_merge_eligibility"] + assert step["status"] == "completed" + assert step["can_merge"] is False + assert step["reason"] == "Missing approver" # Reason included on failure + + def test_complete_step_filters_reason_on_success_true(self): + """Test complete_step() filters 'reason' field when success=True.""" + ctx = WebhookContext( + hook_id="hook-filter-3", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("validate_config") + ctx.complete_step("validate_config", success=True, reason="Config is valid") + + step = ctx.workflow_steps["validate_config"] + assert step["status"] == "completed" + assert step["success"] is True + assert "reason" not in step # Reason filtered out on success + + def test_complete_step_includes_reason_on_success_false(self): + """Test complete_step() includes 'reason' field when success=False.""" + ctx = WebhookContext( + hook_id="hook-filter-4", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("validate_config") + ctx.complete_step("validate_config", success=False, reason="Missing required field") + + step = ctx.workflow_steps["validate_config"] + assert step["status"] == "completed" + assert step["success"] is False + assert step["reason"] == "Missing required field" # Reason included on failure + + def test_complete_step_custom_verbose_fields(self): + """Test complete_step() with custom verbose_fields parameter.""" + ctx = WebhookContext( + hook_id="hook-filter-5", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("build_image") + ctx.complete_step( + "build_image", + verbose_fields=["build_log", "debug_info"], + success=True, + build_log="Log output...", + debug_info="Debug details...", + image_tag="v1.0.0", + ) + + step = ctx.workflow_steps["build_image"] + assert step["status"] == "completed" + assert step["success"] is True + assert step["image_tag"] == "v1.0.0" # Non-verbose field included + assert "build_log" not in step # Verbose field filtered out + assert "debug_info" not in step # Verbose field filtered out + + def test_complete_step_custom_verbose_fields_on_failure(self): + """Test complete_step() custom verbose_fields included on failure.""" + ctx = WebhookContext( + hook_id="hook-filter-6", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("build_image") + ctx.complete_step( + "build_image", + verbose_fields=["build_log", "debug_info"], + success=False, + build_log="Error: build failed", + debug_info="Stack trace...", + image_tag="v1.0.0", + ) + + step = ctx.workflow_steps["build_image"] + assert step["status"] == "completed" + assert step["success"] is False + assert step["image_tag"] == "v1.0.0" + assert step["build_log"] == "Error: build failed" # Verbose field included on failure + assert step["debug_info"] == "Stack trace..." # Verbose field included on failure + + def test_complete_step_detects_success_with_suffix_patterns(self): + """Test complete_step() detects success using _success and _failed suffix patterns.""" + ctx = WebhookContext( + hook_id="hook-filter-7", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Test _success suffix + ctx.start_step("test_success_suffix") + ctx.complete_step("test_success_suffix", build_success=True, reason="Build succeeded") + + step = ctx.workflow_steps["test_success_suffix"] + assert step["build_success"] is True + assert "reason" not in step # Filtered out because build_success=True + + # Test _failed suffix + ctx.start_step("test_failed_suffix") + ctx.complete_step("test_failed_suffix", build_failed=False, reason="Build succeeded") + + step = ctx.workflow_steps["test_failed_suffix"] + assert step["build_failed"] is False + assert "reason" not in step # Filtered out because build_failed=False (success) + + def test_complete_step_includes_verbose_on_failure_suffix_patterns(self): + """Test complete_step() includes verbose fields on failure using suffix patterns.""" + ctx = WebhookContext( + hook_id="hook-filter-8", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Test _success=False + ctx.start_step("test_success_false") + ctx.complete_step("test_success_false", build_success=False, reason="Build failed") + + step = ctx.workflow_steps["test_success_false"] + assert step["build_success"] is False + assert step["reason"] == "Build failed" # Included because build_success=False + + # Test _failed=True + ctx.start_step("test_failed_true") + ctx.complete_step("test_failed_true", build_failed=True, reason="Build failed") + + step = ctx.workflow_steps["test_failed_true"] + assert step["build_failed"] is True + assert step["reason"] == "Build failed" # Included because build_failed=True + + def test_complete_step_default_success_when_no_indicators(self): + """Test complete_step() defaults to success when no indicators present.""" + ctx = WebhookContext( + hook_id="hook-filter-9", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("neutral_step") + ctx.complete_step("neutral_step", count=5, reason="Processed 5 items") + + step = ctx.workflow_steps["neutral_step"] + assert step["status"] == "completed" + assert step["count"] == 5 + assert "reason" not in step # Filtered out (defaults to success) + + def test_complete_step_no_filtering_when_verbose_fields_empty(self): + """Test complete_step() with verbose_fields=[] (no filtering).""" + ctx = WebhookContext( + hook_id="hook-filter-10", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("no_filtering") + ctx.complete_step("no_filtering", verbose_fields=[], can_merge=True, reason="All good") + + step = ctx.workflow_steps["no_filtering"] + assert step["can_merge"] is True + assert step["reason"] == "All good" # Not filtered (verbose_fields=[]) + + def test_complete_step_filters_multiple_verbose_fields(self): + """Test complete_step() filters multiple verbose fields on success.""" + ctx = WebhookContext( + hook_id="hook-filter-11", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("multi_verbose") + ctx.complete_step( + "multi_verbose", + verbose_fields=["reason", "details", "debug"], + can_merge=True, + reason="Success", + details="Details...", + debug="Debug info...", + count=10, + ) + + step = ctx.workflow_steps["multi_verbose"] + assert step["can_merge"] is True + assert step["count"] == 10 + assert "reason" not in step + assert "details" not in step + assert "debug" not in step + + def test_complete_step_error_indicator_overrides_can_merge(self): + """Test complete_step() error indicator takes precedence.""" + ctx = WebhookContext( + hook_id="hook-filter-12", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + ctx.start_step("error_override") + ctx.complete_step("error_override", can_merge=True, error="Something went wrong", reason="Error occurred") + + step = ctx.workflow_steps["error_override"] + assert step["can_merge"] is True + assert step["error"] == "Something went wrong" + assert step["reason"] == "Error occurred" # Included because error is not None + + +class TestBuildSummary: + """Tests for _build_summary() method and summary field in to_dict().""" + + def test_build_summary_with_pr_and_token_spend(self): + """Test _build_summary() with PR number and token spend.""" + # Mock datetime for entire test to control started_at and completed_at + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = start_time + ctx = WebhookContext( + hook_id="hook-summary-1", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + pr_number=968, + token_spend=4, + ) + + # Set workflow steps with durations + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("webhook_routing") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 2, 547000, tzinfo=UTC) + mock_dt.now.return_value = step_end + ctx.complete_step("webhook_routing") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 2, 547000, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("repo_clone") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 5, 93000, tzinfo=UTC) + mock_dt.now.return_value = step_end + ctx.complete_step("repo_clone") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 5, 93000, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("push_handler") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 5, 93000, tzinfo=UTC) + mock_dt.now.return_value = step_end + ctx.complete_step("push_handler") + + # Set completed_at + completed_time = datetime(2024, 1, 15, 10, 0, 7, 712000, tzinfo=UTC) + ctx.completed_at = completed_time + + summary = ctx._build_summary() + + # Verify format: [SUCCESS] Webhook completed PR#968 [7s712ms, tokens:4] steps=[...] + assert summary is not None + assert summary.startswith("[SUCCESS] Webhook completed PR#968 [7s712ms, tokens:4] steps=[") + assert "webhook_routing:completed(2s547ms)" in summary + assert "repo_clone:completed(2s546ms)" in summary + assert "push_handler:completed(0ms)" in summary + + def test_build_summary_without_pr(self): + """Test _build_summary() without PR number.""" + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = start_time + ctx = WebhookContext( + hook_id="hook-summary-2", + event_type="check_run", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Add one step + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("validate_config") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 1, 500000, tzinfo=UTC) + mock_dt.now.return_value = step_end + ctx.complete_step("validate_config") + + # Set completed_at + completed_time = datetime(2024, 1, 15, 10, 0, 1, 500000, tzinfo=UTC) + ctx.completed_at = completed_time + + summary = ctx._build_summary() + + # Verify format: [SUCCESS] Webhook completed [1s500ms] steps=[...] + assert summary is not None + assert summary.startswith("[SUCCESS] Webhook completed [1s500ms] steps=[") + assert "PR#" not in summary # No PR number + assert "tokens:" not in summary # No token spend + assert "validate_config:completed(1s500ms)" in summary + + def test_build_summary_with_failed_step(self): + """Test _build_summary() with failed workflow step.""" + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = start_time + ctx = WebhookContext( + hook_id="hook-summary-3", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + pr_number=123, + ) + + # Add failed step + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("build_container") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 5, tzinfo=UTC) + mock_dt.now.return_value = step_end + exception = RuntimeError("Build failed") + ctx.fail_step("build_container", exception=exception, traceback_str="Traceback...") + + completed_time = datetime(2024, 1, 15, 10, 0, 5, tzinfo=UTC) + ctx.completed_at = completed_time + + summary = ctx._build_summary() + + # Verify format: [FAILED] Webhook completed PR#123 [5s] steps=[build_container:failed(5s)] + assert summary is not None + assert summary.startswith("[FAILED] Webhook completed PR#123 [5s] steps=[") + assert "build_container:failed(5s)" in summary + + def test_build_summary_without_completed_at(self): + """Test _build_summary() returns None when completed_at is not set.""" + ctx = WebhookContext( + hook_id="hook-summary-4", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + summary = ctx._build_summary() + + assert summary is None + + def test_build_summary_without_steps(self): + """Test _build_summary() with no workflow steps.""" + ctx = WebhookContext( + hook_id="hook-summary-5", + event_type="issue_comment", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + # Set completed_at + with patch("webhook_server.utils.context.datetime") as mock_dt: + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + mock_dt.now.return_value = start_time + ctx.started_at = start_time + + completed_time = datetime(2024, 1, 15, 10, 0, 3, tzinfo=UTC) + ctx.completed_at = completed_time + + summary = ctx._build_summary() + + # Verify format: [SUCCESS] Webhook completed [3s] steps=[no steps recorded] + assert summary is not None + assert summary == "[SUCCESS] Webhook completed [3s] steps=[no steps recorded]" + + def test_to_dict_includes_summary_field(self): + """Test to_dict() includes summary field when completed_at is set.""" + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + + with patch("webhook_server.utils.context.datetime") as mock_dt: + mock_dt.now.return_value = start_time + ctx = WebhookContext( + hook_id="hook-summary-6", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + pr_number=456, + token_spend=10, + ) + + # Add step + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_start = datetime(2024, 1, 15, 10, 0, 0, tzinfo=UTC) + mock_dt.now.return_value = step_start + ctx.start_step("test_step") + + with patch("webhook_server.utils.context.datetime") as mock_dt: + step_end = datetime(2024, 1, 15, 10, 0, 2, tzinfo=UTC) + mock_dt.now.return_value = step_end + ctx.complete_step("test_step") + + completed_time = datetime(2024, 1, 15, 10, 0, 2, tzinfo=UTC) + ctx.completed_at = completed_time + + result = ctx.to_dict() + + # Verify summary field is present and correct + assert "summary" in result + assert result["summary"] is not None + assert result["summary"].startswith("[SUCCESS] Webhook completed PR#456 [2s, tokens:10] steps=[") + assert "test_step:completed(2s)" in result["summary"] + + def test_to_dict_summary_is_none_without_completed_at(self): + """Test to_dict() summary field is None when completed_at is not set.""" + ctx = WebhookContext( + hook_id="hook-summary-7", + event_type="pull_request", + repository="owner/repo", + repository_full_name="owner/repo", + ) + + result = ctx.to_dict() + + # Verify summary field is None + assert "summary" in result + assert result["summary"] is None diff --git a/webhook_server/tests/test_github_api.py b/webhook_server/tests/test_github_api.py index 89ce2dd7e..f7e9c7795 100644 --- a/webhook_server/tests/test_github_api.py +++ b/webhook_server/tests/test_github_api.py @@ -12,6 +12,7 @@ from webhook_server.libs.exceptions import RepositoryNotFoundInConfigError from webhook_server.libs.github_api import GithubWebhook +from webhook_server.libs.handlers.owners_files_handler import OwnersFileHandler class TestGithubWebhook: @@ -291,6 +292,11 @@ async def test_process_pull_request_event( return_value=Mock(decoded_content=b"approvers:\n - user1\nreviewers:\n - user2"), ), patch.object(webhook, "_clone_repository", new=AsyncMock(return_value=None)), + patch.object( + OwnersFileHandler, + "initialize", + new=AsyncMock(return_value=None), + ), ): await webhook.process() mock_process_pr.assert_called_once() @@ -401,6 +407,11 @@ async def test_process_issue_comment_event( return_value=Mock(decoded_content=b"approvers:\n - user1\nreviewers:\n - user2"), ), patch.object(webhook, "_clone_repository", new=AsyncMock(return_value=None)), + patch.object( + OwnersFileHandler, + "initialize", + new=AsyncMock(return_value=None), + ), ): await webhook.process() mock_process_comment.assert_called_once() @@ -786,7 +797,14 @@ async def test_process_check_run_event(self, minimal_hook_data: dict, minimal_he mock_pr_handler.return_value.check_if_can_be_merged = AsyncMock(return_value=None) webhook = GithubWebhook(check_run_data, headers, logger) - with patch.object(webhook, "_clone_repository", new=AsyncMock(return_value=None)): + with ( + patch.object(webhook, "_clone_repository", new=AsyncMock(return_value=None)), + patch.object( + OwnersFileHandler, + "initialize", + new=AsyncMock(return_value=None), + ), + ): await webhook.process() mock_check_handler.return_value.process_pull_request_check_run_webhook_data.assert_awaited_once() @@ -1495,9 +1513,8 @@ async def test_process_push_event_deletion( ) # Verify completion log with "deletion event (skipped)" message - info_completion_calls = [str(call) for call in mock_logger.info.call_args_list] - assert any("deletion event (skipped)" in call.lower() for call in info_completion_calls), ( - f"Expected 'deletion event (skipped)' in info logs. Got: {info_completion_calls}" + assert any("deletion event (skipped)" in call.lower() for call in info_calls), ( + f"Expected 'deletion event (skipped)' in info logs. Got: {info_calls}" ) @patch.dict(os.environ, {"WEBHOOK_SERVER_DATA_DIR": "webhook_server/tests/manifests"}) diff --git a/webhook_server/tests/test_issue_comment_handler.py b/webhook_server/tests/test_issue_comment_handler.py index 7e6de97fd..88185efe4 100644 --- a/webhook_server/tests/test_issue_comment_handler.py +++ b/webhook_server/tests/test_issue_comment_handler.py @@ -151,9 +151,10 @@ async def mock_command(pull_request, command, reviewed_user, issue_comment_id): execution_events.append((command, "end", time.time())) with patch.object(issue_comment_handler, "user_commands", side_effect=mock_command): - # Execute commands + # Execute commands - expect exception due to failed command start = time.time() - await issue_comment_handler.process_comment_webhook_data(Mock()) + with pytest.raises(RuntimeError, match="Command /approved failed"): + await issue_comment_handler.process_comment_webhook_data(Mock()) total_duration = time.time() - start # VERIFICATION 1: All three commands should have started diff --git a/webhook_server/tests/test_log_api.py b/webhook_server/tests/test_log_api.py index a06a092bd..8df2c7826 100644 --- a/webhook_server/tests/test_log_api.py +++ b/webhook_server/tests/test_log_api.py @@ -440,8 +440,8 @@ def test_stream_log_entries_no_directory(self, controller): result = list(controller._stream_log_entries()) assert result == [] - def test_stream_log_entries_parse_error(self, controller): - """Test log entries loading with parse error.""" + def test_stream_log_entries_file_read_error(self, controller): + """Test log entries loading with file read error.""" mock_config = Mock() mock_config.data_dir = "/test" controller.config = mock_config @@ -461,8 +461,8 @@ def test_stream_log_entries_parse_error(self, controller): mock_path_instance.glob.return_value = [mock_log_file] mock_path.return_value = mock_path_instance - # Mock open() to raise parse error when reading file - with patch("builtins.open", side_effect=Exception("Parse error")): + # Mock open() to raise file read error when reading file + with patch("builtins.open", side_effect=Exception("File read error")): result = list(controller._stream_log_entries()) # Should return empty list due to exception handling assert isinstance(result, list) diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index 8a9a06942..471c31df7 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -6,6 +6,7 @@ import logging import tempfile import unittest.mock +from collections.abc import AsyncIterator from pathlib import Path import pytest @@ -1257,21 +1258,21 @@ def test_filter_get_unique_values(self) -> None: log_filter = LogFilter() entries = [ LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0, tzinfo=datetime.UTC), level="INFO", logger_name="main", message="msg1", repository="org/repo1", ), LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0, tzinfo=datetime.UTC), level="DEBUG", logger_name="main", message="msg2", repository="org/repo2", ), LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0, tzinfo=datetime.UTC), level="INFO", logger_name="main", message="msg3", @@ -1290,21 +1291,21 @@ def test_filter_get_entry_count_by_field(self) -> None: log_filter = LogFilter() entries = [ LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 0, 0, tzinfo=datetime.UTC), level="INFO", logger_name="main", message="msg1", event_type="push", ), LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 1, 0, tzinfo=datetime.UTC), level="DEBUG", logger_name="main", message="msg2", event_type="pull_request", ), LogEntry( - timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0), + timestamp=datetime.datetime(2025, 7, 31, 10, 2, 0, tzinfo=datetime.UTC), level="INFO", logger_name="main", message="msg3", @@ -1332,7 +1333,7 @@ async def test_monitor_log_directory_with_rotated_files(self, tmp_path: Path) -> entries = [] # Helper to collect entries from async generator - async def collect_entries(async_gen, max_entries=1): + async def collect_entries(async_gen: AsyncIterator[LogEntry], max_entries: int = 1) -> None: count = 0 async for entry in async_gen: entries.append(entry) @@ -1348,10 +1349,13 @@ async def collect_entries(async_gen, max_entries=1): # Give the monitor a moment to start and seek to end of file await asyncio.sleep(0.1) - # Append new content to the current log file (not rotated ones) - with open(current_log, "a") as f: - f.write("2025-07-31T10:01:00.000000 main INFO New entry after monitoring started\n") - f.flush() + # Append new content to the current log file (not rotated ones) - non-blocking + def _append_log() -> None: + with open(current_log, "a") as f: + f.write("2025-07-31T10:01:00.000000 main INFO New entry after monitoring started\n") + f.flush() + + await asyncio.to_thread(_append_log) # Wait for the monitor to collect the new entry with timeout try: diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index 60a0e26ab..12feea562 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -161,6 +161,7 @@ def test_stream_json_log_entries_skips_invalid_json_lines(self, controller, tmp_ def test_stream_json_log_entries_no_log_directory(self, controller, tmp_path): """Test _stream_json_log_entries when log directory doesn't exist.""" # Don't create logs directory + assert tmp_path is not None entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) # Should yield nothing diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py index 1a7ff5743..64cf36903 100644 --- a/webhook_server/tests/test_structured_logger.py +++ b/webhook_server/tests/test_structured_logger.py @@ -300,7 +300,7 @@ def test_write_log_without_pr_details(self, log_writer: StructuredLogWriter, tmp assert log_entry["pr"] is None - @patch("webhook_server.utils.structured_logger.HAS_FCNTL", False) + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", new=False) def test_write_log_without_fcntl( self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path ) -> None: @@ -317,7 +317,7 @@ def test_write_log_without_fcntl( assert log_entry["hook_id"] == "test-hook-123" - @patch("webhook_server.utils.structured_logger.HAS_FCNTL", True) + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", new=True) @patch("fcntl.flock") def test_write_log_uses_file_locking( self, mock_flock: Mock, log_writer: StructuredLogWriter, sample_context: WebhookContext @@ -621,6 +621,7 @@ def test_write_log_handles_missing_timing_in_context_dict(self, mock_config: Moc context.event_type = "push" context.repository = "org/repo" context.started_at = None + context.completed_at = None context.to_dict = Mock(return_value={"hook_id": "test", "event_type": "push"}) # Act @@ -633,6 +634,7 @@ def test_write_log_handles_missing_timing_in_context_dict(self, mock_config: Moc def test_different_dates_create_different_files(self, mock_config: Mock, tmp_path: Path) -> None: """Test that logs for different dates go to different files.""" # Arrange + _ = tmp_path # Intentionally unused, provided by pytest fixture writer = StructuredLogWriter(config=mock_config) date1 = datetime(2026, 1, 5, tzinfo=UTC) date2 = datetime(2026, 1, 6, tzinfo=UTC) diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py index 46ce3a1e0..e4d82f3e3 100644 --- a/webhook_server/utils/context.py +++ b/webhook_server/utils/context.py @@ -45,6 +45,39 @@ _webhook_context: ContextVar["WebhookContext | None"] = ContextVar("webhook_context", default=None) +def _format_duration(ms: int) -> str: + """Format milliseconds to human-readable duration string. + + Args: + ms: Duration in milliseconds + + Returns: + Human-readable duration (e.g., "3m12s", "1h5m", "500ms") + """ + if ms < 1000: + return f"{ms}ms" + + seconds = ms // 1000 + if seconds < 60: + remaining_ms = ms % 1000 + if remaining_ms > 0: + return f"{seconds}s{remaining_ms}ms" + return f"{seconds}s" + + minutes = seconds // 60 + remaining_seconds = seconds % 60 + if minutes < 60: + if remaining_seconds > 0: + return f"{minutes}m{remaining_seconds}s" + return f"{minutes}m" + + hours = minutes // 60 + remaining_minutes = minutes % 60 + if remaining_minutes > 0: + return f"{hours}h{remaining_minutes}m" + return f"{hours}h" + + @dataclass class WebhookContext: """Webhook execution context with workflow tracking and metrics. @@ -132,15 +165,34 @@ def start_step(self, step_name: str, **data: Any) -> None: **data, } - def complete_step(self, step_name: str, **data: Any) -> None: + def complete_step( + self, + step_name: str, + verbose_fields: list[str] | None = None, + **data: Any, + ) -> None: """Complete a workflow step successfully. Marks the step as completed, calculates duration, and updates step metadata. Additional result data can be passed as keyword arguments. + Automatically filters verbose output fields when the step succeeds. By default, + if data contains common success indicators (can_merge=True, success=True, etc.), + verbose fields are excluded to keep logs clean. + Args: step_name: Unique identifier for this workflow step + verbose_fields: Optional list of field names to exclude on success (default: ["reason"]) **data: Additional step result data (e.g., reviewers_assigned=3, labels_added=["verified"]) + + Example: + # Success case - "reason" field automatically excluded + ctx.complete_step("check_merge_eligibility", can_merge=True, reason="All checks passed") + # Result: {"can_merge": True} - "reason" excluded + + # Failure case - "reason" field included for debugging + ctx.complete_step("check_merge_eligibility", can_merge=False, reason="Missing approver") + # Result: {"can_merge": False, "reason": "Missing approver"} - "reason" included """ now = datetime.now(UTC) start_time = self._step_start_times.get(step_name) @@ -149,13 +201,63 @@ def complete_step(self, step_name: str, **data: Any) -> None: if step_name not in self.workflow_steps: self.workflow_steps[step_name] = {"timestamp": now.isoformat()} + # Default verbose fields to exclude on success + if verbose_fields is None: + verbose_fields = ["reason"] + + # Detect success based on common indicators + is_success = self._detect_success(data) + + # Filter out verbose fields on success + filtered_data = data.copy() + if is_success: + for field in verbose_fields: + filtered_data.pop(field, None) + self.workflow_steps[step_name].update({ "status": "completed", "duration_ms": duration_ms, "error": None, - **data, + **filtered_data, }) + def _detect_success(self, data: dict[str, Any]) -> bool: + """Detect if step data indicates success. + + Checks for common success indicators in step data: + - error field present and not None → FAILURE (highest priority) + - can_merge=True → SUCCESS + - success=True → SUCCESS + - Any boolean field ending in "_success" = True → SUCCESS + - Any field ending in "_failed" = False → SUCCESS + - No indicators → SUCCESS (default) + + Args: + data: Step data dictionary + + Returns: + True if data indicates success, False otherwise + """ + # Check for error indicators FIRST (highest priority) + if "error" in data and data["error"] is not None: + return False + + # Check explicit success indicators + if "can_merge" in data: + return bool(data["can_merge"]) + if "success" in data: + return bool(data["success"]) + + # Check for _success/_failed suffixes + for key, value in data.items(): + if key.endswith("_success") and isinstance(value, bool): + return value + if key.endswith("_failed") and isinstance(value, bool): + return not value + + # Default to success if no failure indicators found + return True + def fail_step(self, step_name: str, exception: Exception, traceback_str: str, **data: Any) -> None: """Mark a workflow step as failed with error details. @@ -192,6 +294,43 @@ def fail_step(self, step_name: str, exception: Exception, traceback_str: str, ** self.success = False self.error = error_data + def _build_summary(self) -> str | None: + """Build a one-line summary of webhook processing. + + Generates a summary matching the format of log_webhook_summary(): + [SUCCESS] Webhook completed PR#968 [7s712ms, tokens:4] steps=[webhook_routing:completed(2s547ms), ...] + + Returns: + Summary string if completed_at is set, None otherwise + """ + if self.completed_at is None: + return None + + # Calculate total duration + duration_ms = int((self.completed_at - self.started_at).total_seconds() * 1000) + + # Build workflow steps summary + steps_summary = [] + for step_name, step_data in self.workflow_steps.items(): + status = step_data["status"] + step_duration_ms = step_data.get("duration_ms") + if step_duration_ms is not None: + steps_summary.append(f"{step_name}:{status}({_format_duration(step_duration_ms)})") + else: + steps_summary.append(f"{step_name}:{status}") + + steps_str = ", ".join(steps_summary) if steps_summary else "no steps recorded" + + # Build final summary + status_text = "SUCCESS" if self.success else "FAILED" + pr_info = f" PR#{self.pr_number}" if self.pr_number else "" + token_info = f", tokens:{self.token_spend}" if self.token_spend else "" + + return ( + f"[{status_text}] Webhook completed{pr_info} " + f"[{_format_duration(duration_ms)}{token_info}] steps=[{steps_str}]" + ) + def to_dict(self) -> dict[str, Any]: """Convert context to dictionary for JSON serialization. @@ -229,6 +368,7 @@ def to_dict(self) -> dict[str, Any]: "final_rate_limit": self.final_rate_limit, "success": self.success, "error": self.error, + "summary": self._build_summary(), } diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py index 0af1f6643..e85959a4a 100644 --- a/webhook_server/utils/structured_logger.py +++ b/webhook_server/utils/structured_logger.py @@ -101,12 +101,12 @@ def write_log(self, context: WebhookContext) -> None: context: WebhookContext to serialize and write Note: - Calculates completion time locally without mutating the context + Uses context.completed_at as source of truth, falls back to datetime.now(UTC) """ - # Calculate completion time locally (don't mutate context) - completed_at = datetime.now(UTC) + # Prefer context.completed_at as source of truth, fall back to current time + completed_at = context.completed_at if context.completed_at else datetime.now(UTC) - # Get context dict and update timing locally + # Get context dict and update timing locally (without mutating context) context_dict = context.to_dict() if "timing" in context_dict: context_dict["timing"]["completed_at"] = completed_at.isoformat() diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 25963dd33..ac32aa939 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -841,9 +841,13 @@ def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000 try: with open(log_file, encoding="utf-8") as f: - # Read lines in reverse for newest-first ordering - lines = f.readlines() - for line in reversed(lines): + # Stream lines into a bounded deque for memory efficiency + remaining = max_entries - total_yielded + # Use deque with maxlen to automatically discard oldest entries + line_buffer = deque(f, maxlen=remaining) + + # Process lines in reverse order (newest first) + for line in reversed(line_buffer): if total_yielded >= max_entries: break From fd759ab938b5e7fdedb5e8bca2f46f2f0ceeb9a4 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 13:24:01 +0200 Subject: [PATCH 06/14] docs: add structured webhook logging documentation - CLAUDE.md: Add section on WebhookContext system and step tracking - README.md: Add section on JSON webhook logs with format and access methods --- CLAUDE.md | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 7c720aef4..5e3f5b0a9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -908,6 +908,147 @@ logger.error("Error requiring investigation") logger.exception("Error with full traceback") # Preferred over logger.error(..., exc_info=True) ``` +### Structured Webhook Logging + +The server implements comprehensive JSON-based logging for webhook execution tracking. Each webhook generates a structured log entry containing all workflow steps, timing, errors, and API metrics. + +**Overview:** + +- Thread-safe context tracking using ContextVar for async isolation +- Each webhook execution gets an isolated WebhookContext instance +- Context persists through async operations and handler chains +- Automatic workflow step tracking with timing and error capture +- Pretty-printed JSON output with date-based log rotation + +**Context Creation:** + +Context is created in `app.py` at the start of webhook processing: + +```python +from webhook_server.utils.context import create_context + +# In process_with_error_handling() - before GithubWebhook instantiation +ctx = create_context( + hook_id=hook_id, # X-GitHub-Delivery header + event_type="pull_request", + repository="org/repo", + repository_full_name="org/repo", + action="opened", + sender="username", + api_user="github-api-user", +) +``` + +**Step Tracking Methods:** + +Handlers and processing code use these methods to track workflow progress: + +```python +from webhook_server.utils.context import get_context + +# Get context anywhere in the call stack +ctx = get_context() + +# Start a workflow step +ctx.start_step("clone_repository", branch="main") + +# Complete step successfully +try: + await clone_repo() + ctx.complete_step("clone_repository", commit_sha="abc123") +except Exception as ex: + # Mark step as failed with error details + import traceback + ctx.fail_step( + "clone_repository", + exception=ex, + traceback_str=traceback.format_exc() + ) +``` + +**Handler Usage Pattern:** + +Handlers access context via `github_webhook.ctx`: + +```python +class PullRequestHandler: + def __init__(self, github_webhook: GithubWebhook): + self.github_webhook = github_webhook + + async def process_event(self, event_data: dict) -> None: + # Access context + ctx = self.github_webhook.ctx + + # Track workflow steps + ctx.start_step("assign_reviewers", pr_number=123) + try: + await self.assign_reviewers(pr) + ctx.complete_step( + "assign_reviewers", + reviewers_assigned=3, + labels_added=["needs-review"] + ) + except Exception as ex: + ctx.fail_step( + "assign_reviewers", + exception=ex, + traceback_str=traceback.format_exc(), + pr_number=123 + ) +``` + +**Log File Format:** + +Logs are written to date-based JSON files: + +- Location: `{config.data_dir}/logs/webhooks_YYYY-MM-DD.json` +- Format: Pretty-printed JSON (2-space indentation) +- Entry separator: Blank line between webhook executions +- Rotation: Daily based on UTC date +- Concurrency: File locking for safe multi-process writes + +Each log entry contains: + +```json +{ + "hook_id": "github-delivery-id", + "event_type": "pull_request", + "action": "opened", + "sender": "username", + "repository": "org/repo", + "pr": { + "number": 968, + "title": "Add new feature", + "author": "contributor" + }, + "api_user": "github-api-user", + "timing": { + "started_at": "2026-01-05T10:30:00.123Z", + "completed_at": "2026-01-05T10:30:07.835Z", + "duration_ms": 7712 + }, + "workflow_steps": { + "webhook_routing": { + "timestamp": "2026-01-05T10:30:00.200Z", + "status": "completed", + "duration_ms": 2547 + }, + "clone_repository": { + "timestamp": "2026-01-05T10:30:02.750Z", + "status": "completed", + "duration_ms": 4823, + "commit_sha": "abc123" + } + }, + "token_spend": 4, + "initial_rate_limit": 5000, + "final_rate_limit": 4996, + "success": true, + "error": null, + "summary": "[SUCCESS] Webhook completed PR#968 [7s712ms, tokens:4] steps=[webhook_routing:completed(2s547ms), clone_repository:completed(4s823ms)]" +} +``` + ### Exception Handling Pattern ```python diff --git a/README.md b/README.md index ad96ff33f..c5653a4de 100644 --- a/README.md +++ b/README.md @@ -1371,6 +1371,148 @@ mask-sensitive-data: true # Mask sensitive data (tokens, passwords) in logs (def **Security Note**: Set `mask-sensitive-data: false` only for debugging purposes in development. In production environments, always keep it `true` to prevent exposure of sensitive credentials in logs. +### Structured Webhook Logs + +The webhook server automatically generates structured JSON logs for every webhook execution, providing comprehensive visibility into webhook processing, performance metrics, and error tracking. + +**Log Location**: `{data_dir}/logs/webhooks_YYYY-MM-DD.json` + +**Rotation**: New log files are automatically created daily, with all times in UTC timezone. + +#### Log Format + +Each webhook execution is logged as a single, pretty-printed JSON object containing: + +**Webhook Metadata:** +- `hook_id`: GitHub webhook delivery ID for correlation across systems +- `event_type`: GitHub event type (pull_request, issue_comment, etc.) +- `action`: Specific action within the event type (opened, synchronize, etc.) +- `sender`: GitHub username who triggered the webhook +- `repository`: Full repository name (org/repo) + +**Pull Request Details** (when applicable): +- `pr_number`: Pull request number +- `pr_title`: Pull request title +- `pr_author`: Pull request author username + +**Execution Timing:** +- `started_at`: Webhook processing start timestamp (ISO 8601 format) +- `completed_at`: Webhook processing completion timestamp +- `duration_ms`: Total processing duration in milliseconds + +**Workflow Tracking:** +- `workflow_steps`: Array of workflow steps with individual timing and status + - Each step includes: `step_name`, `started_at`, `completed_at`, `duration_ms`, `status` +- Enables detailed performance analysis and bottleneck identification + +**Resource Usage:** +- `token_spend`: Number of GitHub API tokens consumed during processing +- `rate_limit_remaining`: GitHub API rate limit remaining after processing +- Tracks API quota consumption per webhook + +**Status and Error Handling:** +- `success`: Boolean indicating overall webhook processing success +- `error`: Error message if processing failed +- `traceback`: Full Python traceback for failed webhooks (enables rapid debugging) + +#### Example Log Entry + +```json +{ + "hook_id": "abc123-def456-ghi789", + "event_type": "pull_request", + "action": "opened", + "sender": "contributor-username", + "repository": "my-org/my-repo", + "pr_number": 123, + "pr_title": "Add new feature", + "pr_author": "contributor-username", + "started_at": "2025-01-30T10:30:00.123456", + "completed_at": "2025-01-30T10:30:05.789012", + "duration_ms": 5665, + "workflow_steps": [ + { + "step_name": "Validate webhook signature", + "started_at": "2025-01-30T10:30:00.234567", + "completed_at": "2025-01-30T10:30:00.345678", + "duration_ms": 111, + "status": "completed" + }, + { + "step_name": "Assign reviewers", + "started_at": "2025-01-30T10:30:01.456789", + "completed_at": "2025-01-30T10:30:02.567890", + "duration_ms": 1111, + "status": "completed" + } + ], + "token_spend": 3, + "rate_limit_remaining": 4997, + "success": true +} +``` + +#### Use Cases + +**Performance Monitoring:** +- Track webhook processing duration over time +- Identify slow workflow steps requiring optimization +- Monitor GitHub API rate limit consumption patterns + +**Error Analysis:** +- Full traceback for failed webhooks enables rapid debugging +- Correlate errors across multiple webhook deliveries using `hook_id` +- Track error patterns by repository, user, or event type + +**Compliance and Auditing:** +- Complete audit trail of all webhook processing +- Track who triggered webhooks and when +- Monitor API token consumption for cost tracking + +**Integration with Monitoring Tools:** +- Import JSON logs into log aggregation systems (ELK, Splunk, Datadog) +- Build custom dashboards and alerts based on structured data +- Correlate webhook logs with external systems using `hook_id` + +#### Accessing Structured Logs + +**Web-based Log Viewer:** + +The structured JSON logs are automatically indexed and searchable via the web-based log viewer at `/logs/` endpoint. See the [Log Viewer](#log-viewer) section for detailed documentation on filtering, searching, and analyzing webhook logs through the web interface. + +**Direct File Access:** + +```bash +# View today's webhook logs +cat {data_dir}/logs/webhooks_$(date +%Y-%m-%d).json + +# Search for failed webhooks +jq 'select(.success == false)' {data_dir}/logs/webhooks_*.json + +# Analyze processing duration +jq '.duration_ms' {data_dir}/logs/webhooks_*.json | sort -n + +# Find webhooks for specific PR +jq 'select(.pr_number == 123)' {data_dir}/logs/webhooks_*.json +``` + +**Programmatic Access:** + +```python +import json +from pathlib import Path +from datetime import date + +# Load today's webhook logs +log_file = Path(f"{data_dir}/logs/webhooks_{date.today()}.json") +with log_file.open() as f: + for line in f: + webhook_log = json.loads(line) + if not webhook_log["success"]: + print(f"Failed webhook: {webhook_log['hook_id']}") + print(f"Error: {webhook_log['error']}") +``` + ### Metrics and Observability - **Request/Response logging** with delivery IDs From 8ee9ca317741a74a36984b47e707622cff0aa1f0 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 13:49:37 +0200 Subject: [PATCH 07/14] fix: address CodeRabbit review comments for structured logging - Wrap write_webhook_log in try/except to ensure clear_context runs - Fix traceback capture using format_exception instead of format_exc - Handle asyncio.CancelledError properly (re-raise instead of wrap) - Update log parser/viewer to handle pretty-printed JSON format - Fix test assertions for meaningful validation - Use explicit new= keyword in @patch decorators --- webhook_server/app.py | 8 ++- .../libs/handlers/issue_comment_handler.py | 13 ++++- webhook_server/libs/log_parser.py | 40 ++++++++++++--- webhook_server/tests/test_log_viewer.py | 8 +-- .../tests/test_structured_logger.py | 2 +- webhook_server/web/log_viewer.py | 51 ++++++++++++++----- 6 files changed, 95 insertions(+), 27 deletions(-) diff --git a/webhook_server/app.py b/webhook_server/app.py index a11a22a03..fe6d5673b 100644 --- a/webhook_server/app.py +++ b/webhook_server/app.py @@ -454,8 +454,12 @@ async def process_with_error_handling( log_webhook_summary(ctx, _logger, _log_context) # ALWAYS write the structured log, even on error - write_webhook_log(ctx) - clear_context() + try: + write_webhook_log(ctx) + except Exception: + _logger.exception(f"{_log_context} Failed to write webhook log") + finally: + clear_context() # Start background task immediately using asyncio.create_task # This ensures the HTTP response is sent immediately without waiting diff --git a/webhook_server/libs/handlers/issue_comment_handler.py b/webhook_server/libs/handlers/issue_comment_handler.py index 9c93761a8..7c33240fd 100644 --- a/webhook_server/libs/handlers/issue_comment_handler.py +++ b/webhook_server/libs/handlers/issue_comment_handler.py @@ -114,6 +114,9 @@ async def process_comment_webhook_data(self, pull_request: PullRequest) -> None: for idx, result in enumerate(results): user_command = _user_commands[idx] if isinstance(result, Exception): + # Re-raise CancelledError immediately to allow cancellation to propagate + if isinstance(result, asyncio.CancelledError): + raise result self.logger.error(f"{self.log_prefix} Command execution failed: /{user_command} - {result}") failed_commands.append((user_command, result)) @@ -123,12 +126,20 @@ async def process_comment_webhook_data(self, pull_request: PullRequest) -> None: first_failed_command, first_exception = failed_commands[0] error_msg = f"Command /{first_failed_command} failed: {first_exception}" if self.ctx: - self.ctx.fail_step("issue_comment_handler", first_exception, traceback.format_exc()) + # Format traceback from the actual exception object + tb_lines = traceback.format_exception( + type(first_exception), first_exception, first_exception.__traceback__ + ) + tb_str = "".join(tb_lines) + self.ctx.fail_step("issue_comment_handler", first_exception, tb_str) raise RuntimeError(error_msg) from first_exception if self.ctx: self.ctx.complete_step("issue_comment_handler") + except asyncio.CancelledError: + # Always let cancellation propagate + raise except Exception as ex: # If step not already failed, mark it as failed if self.ctx and not self.ctx.workflow_steps.get("issue_comment_handler", {}).get("status") == "failed": diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index 9eb4eafa2..a115b2109 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -311,10 +311,14 @@ def parse_log_file(self, file_path: Path) -> list[LogEntry]: return entries def parse_json_log_entry(self, json_line: str) -> LogEntry | None: - """Parse a single JSON log line into a LogEntry object. + """Parse a JSON log entry (single-line or multi-line) into a LogEntry object. + + Handles both formats: + - Single-line compact JSON: {"hook_id": "abc", ...} + - Multi-line pretty-printed JSON with indentation Args: - json_line: Raw JSON line string from webhooks_*.json files + json_line: Raw JSON string from webhooks_*.json files (may be multi-line) Returns: LogEntry object if parsing successful, None otherwise @@ -400,6 +404,10 @@ def _create_json_summary_message(self, data: dict[str, Any]) -> str: def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: """Parse a JSON log file and return list of LogEntry objects. + Handles two formats: + 1. Pretty-printed multi-line JSON entries separated by blank lines (current format) + 2. Single-line JSON entries separated by newlines (legacy format) + Args: file_path: Path to the webhooks_*.json file @@ -410,10 +418,22 @@ def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: try: with open(file_path, encoding="utf-8") as f: - for line in f: - entry = self.parse_json_log_entry(line) - if entry: - entries.append(entry) + content = f.read() + + # Detect format: check if file contains blank line separators + if "\n\n" in content: + # Format 1: Pretty-printed JSON with blank line separators + json_blocks = content.split("\n\n") + for block in json_blocks: + entry = self.parse_json_log_entry(block) + if entry: + entries.append(entry) + else: + # Format 2: Single-line JSON entries (one per line) + for line in content.splitlines(): + entry = self.parse_json_log_entry(line) + if entry: + entries.append(entry) except OSError as e: self.logger.exception(f"Failed to read JSON log file {file_path}: {e}") except UnicodeDecodeError as e: @@ -422,10 +442,14 @@ def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: return entries def get_raw_json_entry(self, json_line: str) -> dict[str, Any] | None: - """Parse a JSON log line and return the raw dictionary. + """Parse a JSON log entry (single-line or multi-line) and return the raw dictionary. + + Handles both formats: + - Single-line compact JSON: {"hook_id": "abc", ...} + - Multi-line pretty-printed JSON with indentation Args: - json_line: Raw JSON line string + json_line: Raw JSON string (may be multi-line) Returns: Parsed JSON dictionary, or None if parsing fails diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index 12feea562..c2d00a697 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -397,9 +397,11 @@ def test_stream_json_log_entries_handles_file_read_errors(self, controller, tmp_ # Stream entries - should skip bad file and continue entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) - # Should still get the valid entry (or none if permission error blocks all) - # Depending on OS, this may yield 0 or 1 entry - assert len(entries) >= 0 + # Validate that the generator returned a list without raising + assert isinstance(entries, list) + # Verify that the valid entry from sample_json_webhook_data is present + # Controller should skip the unreadable bad_file and still yield the expected JSON entry + assert any(e.get("hook_id") == "test-hook-123" for e in entries) finally: # Restore permissions for cleanup bad_file.chmod(0o644) diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py index 64cf36903..1ed59cc30 100644 --- a/webhook_server/tests/test_structured_logger.py +++ b/webhook_server/tests/test_structured_logger.py @@ -434,7 +434,7 @@ def test_write_error_log_preserves_existing_error( assert log_entry["error"]["type"] == "ExistingError" assert log_entry["error"]["message"] == "Original error" - @patch("webhook_server.utils.structured_logger.HAS_FCNTL", True) + @patch("webhook_server.utils.structured_logger.HAS_FCNTL", new=True) @patch("fcntl.flock") def test_write_error_log_uses_file_locking(self, mock_flock: Mock, log_writer: StructuredLogWriter) -> None: """Test write_error_log uses file locking when fcntl is available.""" diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index ac32aa939..314c422a5 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -790,14 +790,30 @@ def sort_key(f: Path) -> tuple: buffer: deque[LogEntry] = deque(maxlen=remaining_capacity) with open(log_file, encoding="utf-8") as f: - for line in f: - # Use appropriate parser based on file type - if log_file.suffix == ".json": - entry = self.log_parser.parse_json_log_entry(line) + # Use appropriate parser based on file type + if log_file.suffix == ".json": + # JSON files: read content and detect format + content = f.read() + # Detect format: check if file contains blank line separators + if "\n\n" in content: + # Format 1: Pretty-printed JSON with blank line separators + json_blocks = content.split("\n\n") + for block in json_blocks: + entry = self.log_parser.parse_json_log_entry(block) + if entry: + buffer.append(entry) else: + # Format 2: Single-line JSON entries (one per line) + for line in content.splitlines(): + entry = self.log_parser.parse_json_log_entry(line) + if entry: + buffer.append(entry) + else: + # Text log files: parse line by line + for line in f: entry = self.log_parser.parse_log_entry(line) - if entry: - buffer.append(entry) + if entry: + buffer.append(entry) for entry in reversed(buffer): if total_yielded >= max_entries: @@ -814,6 +830,7 @@ def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000 """Stream raw JSON log entries from webhooks_*.json files. Returns raw JSON dicts instead of LogEntry objects for access to full structured data. + Handles both single-line and multi-line JSON entries separated by blank lines. Args: max_files: Maximum number of log files to process (newest first) @@ -841,17 +858,27 @@ def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000 try: with open(log_file, encoding="utf-8") as f: - # Stream lines into a bounded deque for memory efficiency + # Read file content + content = f.read() + + # Detect format: check if file contains blank line separators + if "\n\n" in content: + # Format 1: Pretty-printed JSON with blank line separators + json_blocks = content.split("\n\n") + else: + # Format 2: Single-line JSON entries (one per line) + json_blocks = content.splitlines() + + # Use deque to limit entries for memory efficiency remaining = max_entries - total_yielded - # Use deque with maxlen to automatically discard oldest entries - line_buffer = deque(f, maxlen=remaining) + block_buffer = deque(json_blocks, maxlen=remaining) - # Process lines in reverse order (newest first) - for line in reversed(line_buffer): + # Process blocks in reverse order (newest first) + for block in reversed(block_buffer): if total_yielded >= max_entries: break - data = self.log_parser.get_raw_json_entry(line) + data = self.log_parser.get_raw_json_entry(block) if data: yield data total_yielded += 1 From 36c9fb331bd4d8a3733f2b9f0afd0cab2680526a Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 15:16:02 +0200 Subject: [PATCH 08/14] fix: address CodeRabbit review and convert log viewer to async I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Guard JSON summary builder against non-dict error values - Refine task_status derivation (True/False/None handling) - Convert log_viewer.py to full async with aiofiles - Implement incremental JSON streaming for memory efficiency - Fix tests that incorrectly deleted template file - Add aiofiles and types-aiofiles dependencies 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 + uv.lock | 22 + webhook_server/app.py | 12 +- webhook_server/libs/log_parser.py | 17 +- webhook_server/tests/test_app.py | 10 +- .../tests/test_edge_cases_validation.py | 39 +- .../tests/test_frontend_performance.py | 65 +- webhook_server/tests/test_log_api.py | 223 +++-- webhook_server/tests/test_log_viewer.py | 842 +++++++++++++++++- .../tests/test_memory_optimization.py | 50 +- webhook_server/utils/app_utils.py | 4 +- webhook_server/web/log_viewer.py | 180 ++-- 13 files changed, 1199 insertions(+), 269 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c44e19451..7abc23510 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,7 @@ repos: hooks: - id: mypy exclude: (tests/) - additional_dependencies: [types-requests, types-PyYAML, types-colorama] + additional_dependencies: [types-requests, types-PyYAML, types-colorama, types-aiofiles] - repo: https://github.com/pre-commit/mirrors-eslint rev: v10.0.0-beta.0 diff --git a/pyproject.toml b/pyproject.toml index e7fb4b902..856c19863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "aiofiles>=24.1.0", "build>=1.2.2.post1", "colorama>=0.4.6", "colorlog>=6.8.2", @@ -101,6 +102,7 @@ build-backend = "hatchling.build" dev = [ "ipdb>=0.13.13", "ipython>=8.12.3", + "types-aiofiles>=24.1.0.20241221", "types-colorama>=0.4.15.20240311", "types-pyyaml>=6.0.12.20250516", "types-requests>=2.32.4.20250611", diff --git a/uv.lock b/uv.lock index 1af44a479..31ff28c77 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,15 @@ version = 1 revision = 3 requires-python = "==3.13.*" +[[package]] +name = "aiofiles" +version = "25.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -373,6 +382,7 @@ name = "github-webhook-server" version = "4.0.1" source = { editable = "." } dependencies = [ + { name = "aiofiles" }, { name = "asyncstdlib" }, { name = "build" }, { name = "colorama" }, @@ -409,6 +419,7 @@ tests = [ dev = [ { name = "ipdb" }, { name = "ipython" }, + { name = "types-aiofiles" }, { name = "types-colorama" }, { name = "types-pyyaml" }, { name = "types-requests" }, @@ -421,6 +432,7 @@ tests = [ [package.metadata] requires-dist = [ + { name = "aiofiles", specifier = ">=24.1.0" }, { name = "asyncstdlib", specifier = ">=3.13.1" }, { name = "build", specifier = ">=1.2.2.post1" }, { name = "colorama", specifier = ">=0.4.6" }, @@ -454,6 +466,7 @@ provides-extras = ["tests"] dev = [ { name = "ipdb", specifier = ">=0.13.13" }, { name = "ipython", specifier = ">=8.12.3" }, + { name = "types-aiofiles", specifier = ">=24.1.0.20241221" }, { name = "types-colorama", specifier = ">=0.4.15.20240311" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250516" }, { name = "types-requests", specifier = ">=2.32.4.20250611" }, @@ -1346,6 +1359,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/e4/5ebc1899d31d2b1601b32d21cfb4bba022ae6fce323d365f0448031b1660/typer-0.21.0-py3-none-any.whl", hash = "sha256:c79c01ca6b30af9fd48284058a7056ba0d3bf5cf10d0ff3d0c5b11b68c258ac6", size = 47109, upload-time = "2025-12-25T09:54:51.918Z" }, ] +[[package]] +name = "types-aiofiles" +version = "25.1.0.20251011" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/84/6c/6d23908a8217e36704aa9c79d99a620f2fdd388b66a4b7f72fbc6b6ff6c6/types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff", size = 14535, upload-time = "2025-10-11T02:44:51.237Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" }, +] + [[package]] name = "types-colorama" version = "0.4.15.20250801" diff --git a/webhook_server/app.py b/webhook_server/app.py index fe6d5673b..5a3405608 100644 --- a/webhook_server/app.py +++ b/webhook_server/app.py @@ -520,9 +520,9 @@ def get_log_viewer_controller() -> LogViewerController: if LOG_SERVER_ENABLED: @FASTAPI_APP.get("/logs", operation_id="get_log_viewer_page", response_class=HTMLResponse) - def get_log_viewer_page(controller: LogViewerController = controller_dependency) -> HTMLResponse: + async def get_log_viewer_page(controller: LogViewerController = controller_dependency) -> HTMLResponse: """Serve the main log viewer HTML page.""" - return controller.get_log_page() + return await controller.get_log_page() async def _get_log_entries_core( @@ -544,7 +544,7 @@ async def _get_log_entries_core( start_datetime = parse_datetime_string(start_time, "start_time") end_datetime = parse_datetime_string(end_time, "end_time") - return controller.get_log_entries( + return await controller.get_log_entries( hook_id=hook_id, pr_number=pr_number, repository=repository, @@ -699,7 +699,7 @@ async def _export_logs_core( start_datetime = parse_datetime_string(start_time, "start_time") end_datetime = parse_datetime_string(end_time, "end_time") - return controller.export_logs( + return await controller.export_logs( format_type=format_type, hook_id=hook_id, pr_number=pr_number, @@ -865,7 +865,7 @@ async def _get_pr_flow_data_core( hook_id: str, ) -> dict[str, Any]: """Core logic for getting PR flow visualization data for a specific hook ID.""" - return controller.get_pr_flow_data(hook_id) + return await controller.get_pr_flow_data(hook_id) @FASTAPI_APP.get( @@ -1140,7 +1140,7 @@ async def get_workflow_steps(hook_id: str, controller: LogViewerController = con - Historical analysis is available for completed workflows - Real-time step data for in-progress workflows """ - return get_workflow_steps_core(controller=controller, hook_id=hook_id) + return await get_workflow_steps_core(controller=controller, hook_id=hook_id) @FASTAPI_APP.websocket("/logs/ws") diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index a115b2109..c24863414 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -350,6 +350,15 @@ def parse_json_log_entry(self, json_line: str) -> LogEntry | None: # Create summary message message = self._create_json_summary_message(data) + # Derive task_status from success field + success = data.get("success") + if success is True: + task_status = "completed" + elif success is False: + task_status = "failed" + else: + task_status = None + return LogEntry( timestamp=timestamp, level="INFO", # JSON logs don't have levels, default to INFO @@ -362,7 +371,7 @@ def parse_json_log_entry(self, json_line: str) -> LogEntry | None: github_user=data.get("api_user"), task_id=None, # Not used in JSON format task_type=None, # Not used in JSON format - task_status="completed" if data.get("success") else "failed", + task_status=task_status, token_spend=data.get("token_spend"), ) @@ -396,8 +405,10 @@ def _create_json_summary_message(self, data: dict[str, Any]) -> str: parts.append("- completed successfully") else: parts.append("- failed") - if data.get("error", {}).get("type"): - parts.append(f"({data['error']['type']})") + error = data.get("error") + error_type = error.get("type") if isinstance(error, dict) else None + if error_type: + parts.append(f"({error_type})") return " ".join(parts) diff --git a/webhook_server/tests/test_app.py b/webhook_server/tests/test_app.py index 4852675d2..11aa27e08 100644 --- a/webhook_server/tests/test_app.py +++ b/webhook_server/tests/test_app.py @@ -861,7 +861,8 @@ def test_process_webhook_missing_repo_full_name(self, mock_config: Mock, client: def test_get_log_viewer_page(self, client: TestClient) -> None: """Test get_log_viewer_page endpoint.""" mock_instance = MagicMock() - mock_instance.get_log_page.return_value = "" + # get_log_page is async, so use AsyncMock + mock_instance.get_log_page = AsyncMock(return_value="") # Patch the singleton directly as controller_dependency captures reference to get_log_viewer_controller with patch("webhook_server.app._log_viewer_controller_singleton", mock_instance): @@ -872,8 +873,8 @@ def test_get_log_viewer_page(self, client: TestClient) -> None: def test_get_log_entries(self, client: TestClient) -> None: """Test get_log_entries endpoint.""" mock_instance = MagicMock() - # The controller returns a dict - mock_instance.get_log_entries.return_value = {"entries": []} + # get_log_entries is async, so use AsyncMock + mock_instance.get_log_entries = AsyncMock(return_value={"entries": []}) with patch("webhook_server.app._log_viewer_controller_singleton", mock_instance): response = client.get("/logs/api/entries") @@ -888,7 +889,8 @@ def test_export_logs(self, client: TestClient) -> None: def iter_content(): yield b"data" - mock_instance.export_logs.return_value = StreamingResponse(iter_content()) + # export_logs is async, so use AsyncMock + mock_instance.export_logs = AsyncMock(return_value=StreamingResponse(iter_content())) with patch("webhook_server.app._log_viewer_controller_singleton", mock_instance): response = client.get("/logs/api/export?format_type=json") diff --git a/webhook_server/tests/test_edge_cases_validation.py b/webhook_server/tests/test_edge_cases_validation.py index 86ef1a4c7..5c69429fe 100644 --- a/webhook_server/tests/test_edge_cases_validation.py +++ b/webhook_server/tests/test_edge_cases_validation.py @@ -708,13 +708,17 @@ async def mock_monitor_corrupted(): class TestAPIEndpointEdgeCases: """Test edge cases in API endpoint functionality.""" - def test_api_with_malformed_parameters(self): + async def test_api_with_malformed_parameters(self): """Test API behavior with malformed parameters.""" + async def async_iter_empty(): + return + yield # Make this a generator function + mock_logger = Mock() controller = LogViewerController(logger=mock_logger) - with patch.object(controller, "_stream_log_entries", return_value=iter([])): + with patch.object(controller, "_stream_log_entries", return_value=async_iter_empty()): with patch.object(controller, "_estimate_total_log_count", return_value=0): # Test truly malformed parameters that should raise exceptions invalid_params = [ @@ -725,7 +729,7 @@ def test_api_with_malformed_parameters(self): for params in invalid_params: with pytest.raises((ValueError, HTTPException)): - controller.get_log_entries(**params) + await controller.get_log_entries(**params) # Test valid edge cases that should succeed valid_edge_cases = [ @@ -737,11 +741,11 @@ def test_api_with_malformed_parameters(self): ] for params in valid_edge_cases: - result = controller.get_log_entries(**params) + result = await controller.get_log_entries(**params) assert isinstance(result, dict) assert "entries" in result - def test_api_with_extremely_large_responses(self): + async def test_api_with_extremely_large_responses(self): """Test API behavior with extremely large response datasets.""" mock_logger = Mock() @@ -759,20 +763,24 @@ def test_api_with_extremely_large_responses(self): ) large_entries.append(entry) - with patch.object(controller, "_stream_log_entries", return_value=iter(large_entries[:1000])): + async def async_iter_wrapper(items): + for item in items: + yield item + + with patch.object(controller, "_stream_log_entries", return_value=async_iter_wrapper(large_entries[:1000])): # Test with default limit - the controller will process available entries and apply pagination - result = controller.get_log_entries() + result = await controller.get_log_entries() assert "entries" in result assert "entries_processed" in result assert len(result["entries"]) <= 100 # Default limit applied # Test with large limit to get more entries - result_large = controller.get_log_entries(limit=1000) + result_large = await controller.get_log_entries(limit=1000) assert len(result_large["entries"]) <= 1000 # Should not exceed available data # Test export with large dataset (should handle size limits) try: - export_result = controller.export_logs(format_type="json") + export_result = await controller.export_logs(format_type="json") # Should either succeed or raise appropriate error for large datasets assert hasattr(export_result, "status_code") or isinstance(export_result, str) except HTTPException as e: @@ -875,15 +883,20 @@ async def test_multiple_users_different_filters(self): {"repository": "repo-2", "search": "500"}, ] - def user_request(controller, filters): + async def user_request(controller, filters): """Simulate a user making a request.""" - with patch.object(controller, "_stream_log_entries", return_value=iter(entries)): - return controller.get_log_entries(**filters) + + async def async_iter_wrapper(items): + for item in items: + yield item + + with patch.object(controller, "_stream_log_entries", return_value=async_iter_wrapper(entries)): + return await controller.get_log_entries(**filters) # Execute concurrent requests tasks = [] for controller, filters in zip(users, user_filters, strict=True): - task = asyncio.create_task(asyncio.to_thread(user_request, controller, filters)) + task = asyncio.create_task(user_request(controller, filters)) tasks.append(task) results = await asyncio.gather(*tasks) diff --git a/webhook_server/tests/test_frontend_performance.py b/webhook_server/tests/test_frontend_performance.py index 1f9908de6..ad57597f5 100644 --- a/webhook_server/tests/test_frontend_performance.py +++ b/webhook_server/tests/test_frontend_performance.py @@ -58,9 +58,9 @@ def large_log_entries(self): return entries - def test_html_template_contains_optimized_rendering(self, controller, static_files): + async def test_html_template_contains_optimized_rendering(self, controller, static_files): """Test that the JavaScript file includes optimized rendering capabilities.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -80,9 +80,9 @@ def test_html_template_contains_optimized_rendering(self, controller, static_fil "disabled" in js_content.lower() or "removed" in js_content.lower() ), "Virtual scrolling should be explicitly disabled" - def test_html_template_contains_progressive_loading(self, controller, static_files): + async def test_html_template_contains_progressive_loading(self, controller, static_files): """Test that the JavaScript and CSS files include progressive loading capabilities.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) css_content = self._read_static_file(static_files["css"]) @@ -110,9 +110,9 @@ def test_html_template_contains_progressive_loading(self, controller, static_fil assert "error" in js_content.lower(), "Should include error handling" assert "retry" in css_content.lower() or "retry" in js_content.lower(), "Should support retry functionality" - def test_html_template_contains_optimized_filtering(self, controller, static_files): + async def test_html_template_contains_optimized_filtering(self, controller, static_files): """Test that the JavaScript file includes optimized filtering capabilities.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -134,9 +134,9 @@ def test_html_template_contains_optimized_filtering(self, controller, static_fil # Test for early exit optimizations assert "return false" in js_content, "Should use early exits for filter performance" - def test_html_template_contains_performance_css(self, controller, static_files): + async def test_html_template_contains_performance_css(self, controller, static_files): """Test that the CSS file includes performance optimizations.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() css_content = self._read_static_file(static_files["css"]) # Check that HTML template includes the CSS file @@ -154,9 +154,9 @@ def test_html_template_contains_performance_css(self, controller, static_files): assert "skeleton" in css_content.lower(), "Should include skeleton loading styles" assert "loading" in css_content.lower(), "Should include loading state styles" - def test_escaping_function_included(self, controller, static_files): + async def test_escaping_function_included(self, controller, static_files): """Test that HTML escaping functionality is included for security.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -176,9 +176,9 @@ def test_escaping_function_included(self, controller, static_files): ) assert "escape" in js_lower and "hook" in js_lower, "Should escape hook IDs" - def test_progressive_loading_threshold(self, controller, static_files): + async def test_progressive_loading_threshold(self, controller, static_files): """Test that progressive loading activates for large datasets.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -189,9 +189,9 @@ def test_progressive_loading_threshold(self, controller, static_files): assert "200" in js_content or "100" in js_content, "Should have a reasonable threshold for progressive loading" assert "progressiv" in js_content.lower(), "Should activate progressive loading for large datasets" - def test_chunked_loading_configuration(self, controller, static_files): + async def test_chunked_loading_configuration(self, controller, static_files): """Test that chunked loading is properly configured.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -207,9 +207,9 @@ def test_chunked_loading_configuration(self, controller, static_files): assert "setTimeout" in js_content, "Should use setTimeout for non-blocking chunked loading" assert any(str(i) in js_content for i in [5, 10, 15, 20]), "Should have reasonable delay between chunks" - def test_debounced_filtering_optimization(self, controller, static_files): + async def test_debounced_filtering_optimization(self, controller, static_files): """Test that debounced filtering is optimized.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -229,16 +229,20 @@ def test_debounced_filtering_optimization(self, controller, static_files): @patch("pathlib.Path.exists") @patch("pathlib.Path.iterdir") - def test_controller_works_with_large_datasets(self, mock_iterdir, mock_exists, controller, large_log_entries): + async def test_controller_works_with_large_datasets(self, mock_iterdir, mock_exists, controller, large_log_entries): """Test that the controller can handle large datasets efficiently.""" # Mock file system for log parsing mock_exists.return_value = True mock_iterdir.return_value = [] - # Mock the stream_log_entries method to return our large dataset - with patch.object(controller, "_stream_log_entries", return_value=iter(large_log_entries)): + # Mock the stream_log_entries method to return an async generator + async def async_gen(): + for entry in large_log_entries: + yield entry + + with patch.object(controller, "_stream_log_entries", return_value=async_gen()): # Test getting log entries with a large dataset - result = controller.get_log_entries(limit=1000) + result = await controller.get_log_entries(limit=1000) # Test that essential API structure is maintained expected_keys = ["entries", "entries_processed", "filtered_count_min", "limit", "offset"] @@ -252,12 +256,17 @@ def test_controller_works_with_large_datasets(self, mock_iterdir, mock_exists, c assert result["entries_processed"] >= 0, "Should track number of entries processed" assert result["limit"] == 1000, "Should respect requested limit" - def test_memory_efficient_export(self, controller, large_log_entries): + async def test_memory_efficient_export(self, controller, large_log_entries): """Test that export functionality works efficiently with large datasets.""" - # Mock the stream_log_entries method - with patch.object(controller, "_stream_log_entries", return_value=iter(large_log_entries)): + + # Mock the stream_log_entries method to return an async generator + async def async_gen(): + for entry in large_log_entries: + yield entry + + with patch.object(controller, "_stream_log_entries", return_value=async_gen()): # Test JSON export with large dataset - result = controller.export_logs(format_type="json") + result = await controller.export_logs(format_type="json") # Test that export uses streaming approach for memory efficiency assert hasattr(result, "body_iterator"), "Export should use streaming response for large datasets" @@ -265,9 +274,9 @@ def test_memory_efficient_export(self, controller, large_log_entries): # Test that the response is properly configured for streaming assert result is not None, "Export should return a valid response object" - def test_filter_performance_with_search_terms(self, controller, static_files): + async def test_filter_performance_with_search_terms(self, controller, static_files): """Test that search term optimization is implemented.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file @@ -281,9 +290,9 @@ def test_filter_performance_with_search_terms(self, controller, static_files): # Test for case-insensitive search capability assert "toLowerCase" in js_content or "toUpperCase" in js_content, "Should support case-insensitive search" - def test_error_handling_and_retry_mechanism(self, controller, static_files): + async def test_error_handling_and_retry_mechanism(self, controller, static_files): """Test that error handling and retry mechanisms are in place.""" - html_content = controller._get_log_viewer_html() + html_content = await controller._get_log_viewer_html() js_content = self._read_static_file(static_files["js"]) # Check that HTML template includes the JS file diff --git a/webhook_server/tests/test_log_api.py b/webhook_server/tests/test_log_api.py index 8df2c7826..c6bba7de4 100644 --- a/webhook_server/tests/test_log_api.py +++ b/webhook_server/tests/test_log_api.py @@ -19,6 +19,12 @@ from webhook_server.web.log_viewer import LogViewerController +async def async_generator_from_list(items: list): + """Helper to convert a list to an async generator for testing.""" + for item in items: + yield item + + class TestLogViewerController: """Test cases for LogViewerController class methods.""" @@ -73,85 +79,93 @@ def sample_log_entries(self) -> list[LogEntry]: ), ] - def test_get_log_page_success(self, controller): + async def test_get_log_page_success(self, controller): """Test successful log page generation.""" with patch.object(controller, "_get_log_viewer_html", return_value="Test"): - response = controller.get_log_page() + response = await controller.get_log_page() assert response.status_code == 200 assert "Test" in response.body.decode() - def test_get_log_page_file_not_found(self, controller): + async def test_get_log_page_file_not_found(self, controller): """Test log page when template file not found - should return fallback HTML.""" # _get_log_viewer_html now returns fallback HTML instead of raising FileNotFoundError with patch.object(controller, "_get_log_viewer_html", return_value="fallback"): - result = controller.get_log_page() + result = await controller.get_log_page() assert isinstance(result, HTMLResponse) assert result.body.decode() == "fallback" - def test_get_log_page_error(self, controller): + async def test_get_log_page_error(self, controller): """Test log page with generic error.""" with patch.object(controller, "_get_log_viewer_html", side_effect=Exception("Test error")): with pytest.raises(HTTPException) as exc: - controller.get_log_page() + await controller.get_log_page() assert exc.value.status_code == 500 - def test_get_log_entries_success(self, controller, sample_log_entries): + async def test_get_log_entries_success(self, controller, sample_log_entries): """Test successful log entries retrieval.""" - with patch.object(controller, "_stream_log_entries", return_value=sample_log_entries): - result = controller.get_log_entries() + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(sample_log_entries) + ): + result = await controller.get_log_entries() assert "entries" in result assert result["entries_processed"] == 3 assert len(result["entries"]) == 3 - def test_get_log_entries_with_filters(self, controller, sample_log_entries): + async def test_get_log_entries_with_filters(self, controller, sample_log_entries): """Test log entries with filters applied.""" - with patch.object(controller, "_stream_log_entries", return_value=sample_log_entries): - result = controller.get_log_entries(hook_id="hook1", level="INFO") + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(sample_log_entries) + ): + result = await controller.get_log_entries(hook_id="hook1", level="INFO") assert "entries" in result - def test_get_log_entries_with_pagination(self, controller, sample_log_entries): + async def test_get_log_entries_with_pagination(self, controller, sample_log_entries): """Test log entries with pagination.""" - with patch.object(controller, "_stream_log_entries", return_value=sample_log_entries): - result = controller.get_log_entries(limit=2, offset=1) + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(sample_log_entries) + ): + result = await controller.get_log_entries(limit=2, offset=1) assert result["limit"] == 2 assert result["offset"] == 1 - def test_get_log_entries_invalid_limit(self, controller): + async def test_get_log_entries_invalid_limit(self, controller): """Test log entries with invalid limit.""" with pytest.raises(HTTPException) as exc: - controller.get_log_entries(limit=0) + await controller.get_log_entries(limit=0) assert exc.value.status_code == 400 - def test_get_log_entries_file_error(self, controller): + async def test_get_log_entries_file_error(self, controller): """Test log entries with file access error.""" with patch.object(controller, "_stream_log_entries", side_effect=OSError("Permission denied")): with pytest.raises(HTTPException) as exc: - controller.get_log_entries() + await controller.get_log_entries() assert exc.value.status_code == 500 - def test_export_logs_json(self, controller, sample_log_entries): + async def test_export_logs_json(self, controller, sample_log_entries): """Test JSON export functionality.""" - with patch.object(controller, "_stream_log_entries", return_value=sample_log_entries): - result = controller.export_logs(format_type="json") + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(sample_log_entries) + ): + result = await controller.export_logs(format_type="json") # This should return a StreamingResponse, not a JSON string assert hasattr(result, "status_code") assert result.status_code == 200 - def test_export_logs_invalid_format(self, controller): + async def test_export_logs_invalid_format(self, controller): """Test export with invalid format.""" - with patch.object(controller, "_stream_log_entries", return_value=[]): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list([])): with pytest.raises(HTTPException) as exc: - controller.export_logs(format_type="xml") + await controller.export_logs(format_type="xml") assert exc.value.status_code == 400 - def test_export_logs_result_too_large(self, controller): + async def test_export_logs_result_too_large(self, controller): """Test export with result set too large.""" - with patch.object(controller, "_stream_log_entries", return_value=[]): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list([])): with pytest.raises(HTTPException) as exc: - controller.export_logs(format_type="json", limit=60000) + await controller.export_logs(format_type="json", limit=60000) assert exc.value.status_code == 413 - def test_export_logs_filtered_entries_too_large(self, controller): + async def test_export_logs_filtered_entries_too_large(self, controller): """Test export when filtered entries exceed limit.""" # Create a large list of entries that will all match filters large_entries = [ @@ -166,15 +180,15 @@ def test_export_logs_filtered_entries_too_large(self, controller): ] # Mock stream_log_entries to return many entries - with patch.object(controller, "_stream_log_entries", return_value=large_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(large_entries)): # Mock _entry_matches_filters to always return True so all entries are included with patch.object(controller, "_entry_matches_filters", return_value=True): with pytest.raises(HTTPException) as exc: # Call with a limit that would exceed 50000 to trigger the error - controller.export_logs(format_type="json", limit=51000) + await controller.export_logs(format_type="json", limit=51000) assert exc.value.status_code == 413 - def test_get_pr_flow_data_success(self, controller, sample_log_entries): + async def test_get_pr_flow_data_success(self, controller, sample_log_entries): """Test PR flow data retrieval.""" # Create entries with matching hook_id matching_entries = [ @@ -187,19 +201,19 @@ def test_get_pr_flow_data_success(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=matching_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(matching_entries)): with patch.object(controller, "_analyze_pr_flow", return_value={"test": "data"}): - result = controller.get_pr_flow_data("test-hook-id") + result = await controller.get_pr_flow_data("test-hook-id") assert result == {"test": "data"} - def test_get_pr_flow_data_not_found(self, controller): + async def test_get_pr_flow_data_not_found(self, controller): """Test PR flow data when not found.""" - with patch.object(controller, "_stream_log_entries", return_value=[]): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list([])): with pytest.raises(HTTPException) as exc: - controller.get_pr_flow_data("nonexistent") + await controller.get_pr_flow_data("nonexistent") assert exc.value.status_code == 404 - def test_get_pr_flow_data_hook_prefix(self, controller, sample_log_entries): + async def test_get_pr_flow_data_hook_prefix(self, controller, sample_log_entries): """Test PR flow data with hook- prefix.""" matching_entries = [ LogEntry( @@ -211,12 +225,12 @@ def test_get_pr_flow_data_hook_prefix(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=matching_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(matching_entries)): with patch.object(controller, "_analyze_pr_flow", return_value={"test": "data"}): - result = controller.get_pr_flow_data("hook-123") + result = await controller.get_pr_flow_data("hook-123") assert result == {"test": "data"} - def test_get_pr_flow_data_pr_prefix(self, controller, sample_log_entries): + async def test_get_pr_flow_data_pr_prefix(self, controller, sample_log_entries): """Test PR flow data with pr- prefix.""" matching_entries = [ LogEntry( @@ -229,12 +243,12 @@ def test_get_pr_flow_data_pr_prefix(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=matching_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(matching_entries)): with patch.object(controller, "_analyze_pr_flow", return_value={"test": "data"}): - result = controller.get_pr_flow_data("pr-123") + result = await controller.get_pr_flow_data("pr-123") assert result == {"test": "data"} - def test_get_pr_flow_data_direct_number(self, controller, sample_log_entries): + async def test_get_pr_flow_data_direct_number(self, controller, sample_log_entries): """Test PR flow data with direct PR number.""" matching_entries = [ LogEntry( @@ -247,12 +261,12 @@ def test_get_pr_flow_data_direct_number(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=matching_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(matching_entries)): with patch.object(controller, "_analyze_pr_flow", return_value={"test": "data"}): - result = controller.get_pr_flow_data("123") + result = await controller.get_pr_flow_data("123") assert result == {"test": "data"} - def test_get_pr_flow_data_direct_hook_id(self, controller, sample_log_entries): + async def test_get_pr_flow_data_direct_hook_id(self, controller, sample_log_entries): """Test PR flow data with direct hook ID.""" matching_entries = [ LogEntry( @@ -264,12 +278,12 @@ def test_get_pr_flow_data_direct_hook_id(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=matching_entries): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(matching_entries)): with patch.object(controller, "_analyze_pr_flow", return_value={"test": "data"}): - result = controller.get_pr_flow_data("abc123-def456") + result = await controller.get_pr_flow_data("abc123-def456") assert result == {"test": "data"} - def test_get_workflow_steps_success(self, controller, sample_log_entries): + async def test_get_workflow_steps_success(self, controller, sample_log_entries): """Test workflow steps retrieval.""" workflow_steps = [ LogEntry( @@ -281,13 +295,18 @@ def test_get_workflow_steps_success(self, controller, sample_log_entries): ) ] - with patch.object(controller, "_stream_log_entries", return_value=sample_log_entries): + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(sample_log_entries) + ): with patch.object(controller.log_parser, "extract_workflow_steps", return_value=workflow_steps): with patch.object(controller, "_build_workflow_timeline", return_value={"test": "data"}): - result = controller.get_workflow_steps("hook1") - assert result == {"test": "data"} + with patch.object( + controller, "get_workflow_steps_json", side_effect=HTTPException(status_code=404) + ): + result = await controller.get_workflow_steps("hook1") + assert result == {"test": "data"} - def test_get_workflow_steps_with_token_spend(self, controller): + async def test_get_workflow_steps_with_token_spend(self, controller): """Test workflow steps with token spend logging.""" hook_id = "test-hook-123" entries_with_context = [ @@ -317,18 +336,23 @@ def test_get_workflow_steps_with_token_spend(self, controller): ] workflow_steps = [entries_with_context[0]] - with patch.object(controller, "_stream_log_entries", return_value=entries_with_context): + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(entries_with_context) + ): with patch.object(controller.log_parser, "extract_workflow_steps", return_value=workflow_steps): with patch.object(controller, "_build_workflow_timeline", return_value={"test": "data"}): - result = controller.get_workflow_steps(hook_id) - assert result == {"test": "data", "token_spend": 25} - # Verify logger.info was called with structured format - assert controller.logger.info.called - call_args = controller.logger.info.call_args[0][0] - assert hook_id in call_args - assert "test-repo" in call_args or "[pull_request]" in call_args - - def test_get_workflow_steps_token_spend_extraction_fallback(self, controller): + with patch.object( + controller, "get_workflow_steps_json", side_effect=HTTPException(status_code=404) + ): + result = await controller.get_workflow_steps(hook_id) + assert result == {"test": "data", "token_spend": 25} + # Verify logger.info was called with structured format + assert controller.logger.info.called + call_args = controller.logger.info.call_args[0][0] + assert hook_id in call_args + assert "test-repo" in call_args or "[pull_request]" in call_args + + async def test_get_workflow_steps_token_spend_extraction_fallback(self, controller): """Test token spend extraction fallback when token_spend is None.""" hook_id = "test-hook-456" entries_with_keywords = [ @@ -358,17 +382,22 @@ def test_get_workflow_steps_token_spend_extraction_fallback(self, controller): ] workflow_steps = [entries_with_keywords[0]] - with patch.object(controller, "_stream_log_entries", return_value=entries_with_keywords): + with patch.object( + controller, "_stream_log_entries", return_value=async_generator_from_list(entries_with_keywords) + ): with patch.object(controller.log_parser, "extract_workflow_steps", return_value=workflow_steps): with patch.object(controller.log_parser, "extract_token_spend", return_value=30): with patch.object(controller, "_build_workflow_timeline", return_value={"test": "data"}): - result = controller.get_workflow_steps(hook_id) - assert result == {"test": "data", "token_spend": 30} - # Verify logger.warning and logger.info were called - assert controller.logger.warning.called - assert controller.logger.info.called - - def test_get_workflow_steps_token_spend_no_context(self, controller): + with patch.object( + controller, "get_workflow_steps_json", side_effect=HTTPException(status_code=404) + ): + result = await controller.get_workflow_steps(hook_id) + assert result == {"test": "data", "token_spend": 30} + # Verify logger.warning and logger.info were called + assert controller.logger.warning.called + assert controller.logger.info.called + + async def test_get_workflow_steps_token_spend_no_context(self, controller): """Test token spend logging when context is missing.""" hook_id = "test-hook-789" entries_minimal = [ @@ -387,22 +416,26 @@ def test_get_workflow_steps_token_spend_no_context(self, controller): ] workflow_steps = [entries_minimal[0]] - with patch.object(controller, "_stream_log_entries", return_value=entries_minimal): + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list(entries_minimal)): with patch.object(controller.log_parser, "extract_workflow_steps", return_value=workflow_steps): with patch.object(controller, "_build_workflow_timeline", return_value={"test": "data"}): - result = controller.get_workflow_steps(hook_id) - assert result == {"test": "data", "token_spend": 15} - # Should still log even without full context - assert controller.logger.info.called + with patch.object( + controller, "get_workflow_steps_json", side_effect=HTTPException(status_code=404) + ): + result = await controller.get_workflow_steps(hook_id) + assert result == {"test": "data", "token_spend": 15} + # Should still log even without full context + assert controller.logger.info.called - def test_get_workflow_steps_not_found(self, controller): + async def test_get_workflow_steps_not_found(self, controller): """Test workflow steps when not found.""" - with patch.object(controller, "_stream_log_entries", return_value=[]): - with pytest.raises(HTTPException) as exc: - controller.get_workflow_steps("nonexistent") - assert exc.value.status_code == 404 + with patch.object(controller, "_stream_log_entries", return_value=async_generator_from_list([])): + with patch.object(controller, "get_workflow_steps_json", side_effect=HTTPException(status_code=404)): + with pytest.raises(HTTPException) as exc: + await controller.get_workflow_steps("nonexistent") + assert exc.value.status_code == 404 - def test_stream_log_entries_success(self, controller): + async def test_stream_log_entries_success(self, controller): """Test log entries loading.""" mock_config = Mock() mock_config.data_dir = "/test" @@ -423,10 +456,10 @@ def test_stream_log_entries_success(self, controller): mock_path.return_value = mock_path_instance with patch.object(controller.log_parser, "parse_log_file", return_value=[]): - result = list(controller._stream_log_entries()) + result = [entry async for entry in controller._stream_log_entries()] assert isinstance(result, list) - def test_stream_log_entries_no_directory(self, controller): + async def test_stream_log_entries_no_directory(self, controller): """Test log entries loading when directory doesn't exist.""" mock_config = Mock() mock_config.data_dir = "/test" @@ -437,10 +470,10 @@ def test_stream_log_entries_no_directory(self, controller): mock_path_instance.exists.return_value = False mock_path.return_value = mock_path_instance - result = list(controller._stream_log_entries()) + result = [entry async for entry in controller._stream_log_entries()] assert result == [] - def test_stream_log_entries_file_read_error(self, controller): + async def test_stream_log_entries_file_read_error(self, controller): """Test log entries loading with file read error.""" mock_config = Mock() mock_config.data_dir = "/test" @@ -463,7 +496,7 @@ def test_stream_log_entries_file_read_error(self, controller): # Mock open() to raise file read error when reading file with patch("builtins.open", side_effect=Exception("File read error")): - result = list(controller._stream_log_entries()) + result = [entry async for entry in controller._stream_log_entries()] # Should return empty list due to exception handling assert isinstance(result, list) # Verify logger.warning was called for the error @@ -641,11 +674,15 @@ def temp_log_file(self) -> Path: def test_get_logs_page(self) -> None: """Test serving the main log viewer HTML page.""" - with patch("webhook_server.web.log_viewer.LogViewerController") as mock_controller: + # Patch the singleton getter function instead of the class + with patch("webhook_server.app.get_log_viewer_controller") as mock_get_controller: mock_instance = Mock() - mock_controller.return_value = mock_instance + mock_get_controller.return_value = mock_instance - mock_instance.get_log_page.return_value = HTMLResponse(content="Log Viewer") + # get_log_page is now async, so use AsyncMock + mock_instance.get_log_page = AsyncMock( + return_value=HTMLResponse(content="Log Viewer") + ) mock_instance.shutdown = AsyncMock() # Add async shutdown method # Mock httpx.AsyncClient to prevent SSL errors during lifespan startup @@ -1299,7 +1336,7 @@ def test_get_workflow_steps_success(self) -> None: # Create a mock instance and configure its return value mock_instance = Mock() - mock_instance.get_workflow_steps.return_value = mock_workflow_data + mock_instance.get_workflow_steps = AsyncMock(return_value=mock_workflow_data) mock_instance.shutdown = AsyncMock() # Add async shutdown method # Mock httpx.AsyncClient to prevent SSL errors during lifespan startup @@ -1342,7 +1379,7 @@ def test_get_workflow_steps_no_steps_found(self) -> None: # Create a mock instance and configure its return value mock_instance = Mock() - mock_instance.get_workflow_steps.return_value = mock_workflow_data + mock_instance.get_workflow_steps = AsyncMock(return_value=mock_workflow_data) mock_instance.shutdown = AsyncMock() # Add async shutdown method # Mock httpx.AsyncClient to prevent SSL errors during lifespan startup diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index c2d00a697..2ad17d470 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -1,6 +1,56 @@ -"""Tests for log viewer JSON functionality.""" +"""Tests for log viewer JSON functionality. + +Coverage Summary: +- Initial coverage: 36% (318 missed lines out of 476 total) +- Current coverage: 67% (158 missed lines out of 476 total) +- Improvement: 160 lines covered (50% reduction in missed lines) + +Major Test Areas: +1. JSON streaming methods (_stream_json_log_entries, _stream_log_entries) + - Pretty-printed JSON format (blank line separators) + - Single-line JSON format + - Format detection and early exit + - Error handling for unreadable files + - Empty files and edge cases + +2. Log entry retrieval (get_log_entries) + - Filtering by repository, event_type, level, pr_number + - Pagination with limit and offset + - Full-text search + - Parameter validation + - File access error handling + - Total count estimation + +3. Export functionality (export_logs) + - JSON export format + - Filter application + - Invalid format handling + - Limit validation + +4. Workflow steps (get_workflow_steps, get_workflow_steps_json) + - JSON log parsing + - Fallback to text logs + - Missing field handling + - Multi-file search + +5. Helper methods + - Log count estimation + - Log prefix building + - JSON export generation + +6. Lifecycle methods + - WebSocket shutdown + - Template loading with fallback + - Page serving + +Remaining Uncovered Areas (158 lines): +- WebSocket real-time streaming (lines 400-447) - requires async WebSocket mocking +- PR flow analysis (lines 461-503, 1074-1134) - complex workflow stage matching +- Some error handling paths - edge cases in workflow step extraction +""" import copy +import datetime import json import time from pathlib import Path @@ -9,6 +59,7 @@ import pytest from fastapi import HTTPException +from webhook_server.libs.log_parser import LogEntry from webhook_server.web.log_viewer import LogViewerController @@ -73,7 +124,7 @@ def create_json_log_file(self, log_dir: Path, filename: str, entries: list[dict] f.write(json.dumps(entry) + "\n") return log_file - def test_stream_json_log_entries_yields_entries(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_yields_entries(self, controller, tmp_path, sample_json_webhook_data): """Test that _stream_json_log_entries yields JSON entries from log files.""" # Create logs directory log_dir = tmp_path / "logs" @@ -88,14 +139,16 @@ def test_stream_json_log_entries_yields_entries(self, controller, tmp_path, samp self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [entry1, entry2]) # Stream JSON entries - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Should yield 2 entries (reversed order - newest first) assert len(entries) == 2 assert entries[0]["hook_id"] == "test-hook-456" assert entries[1]["hook_id"] == "test-hook-123" - def test_stream_json_log_entries_respects_max_files_limit(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_respects_max_files_limit( + self, controller, tmp_path, sample_json_webhook_data + ): """Test that _stream_json_log_entries respects max_files limit.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -107,12 +160,14 @@ def test_stream_json_log_entries_respects_max_files_limit(self, controller, tmp_ self.create_json_log_file(log_dir, f"webhooks_2025-01-0{i}.json", [entry]) # Stream with max_files=2 - entries = list(controller._stream_json_log_entries(max_files=2, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=2, max_entries=100)] # Should only process 2 files (2 entries total) assert len(entries) == 2 - def test_stream_json_log_entries_respects_max_entries_limit(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_respects_max_entries_limit( + self, controller, tmp_path, sample_json_webhook_data + ): """Test that _stream_json_log_entries respects max_entries limit.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -127,12 +182,14 @@ def test_stream_json_log_entries_respects_max_entries_limit(self, controller, tm self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", entries_data) # Stream with max_entries=5 - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=5)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=5)] # Should only yield 5 entries assert len(entries) == 5 - def test_stream_json_log_entries_skips_invalid_json_lines(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_skips_invalid_json_lines( + self, controller, tmp_path, sample_json_webhook_data + ): """Test that _stream_json_log_entries skips invalid JSON lines.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -151,34 +208,34 @@ def test_stream_json_log_entries_skips_invalid_json_lines(self, controller, tmp_ f.write(json.dumps(entry2) + "\n") # Stream JSON entries - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Should only yield 2 valid entries (reversed order) assert len(entries) == 2 assert entries[0]["hook_id"] == "hook-valid-2" assert entries[1]["hook_id"] == "test-hook-123" - def test_stream_json_log_entries_no_log_directory(self, controller, tmp_path): + async def test_stream_json_log_entries_no_log_directory(self, controller, tmp_path): """Test _stream_json_log_entries when log directory doesn't exist.""" # Don't create logs directory assert tmp_path is not None - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Should yield nothing assert len(entries) == 0 - def test_stream_json_log_entries_empty_directory(self, controller, tmp_path): + async def test_stream_json_log_entries_empty_directory(self, controller, tmp_path): """Test _stream_json_log_entries with empty log directory.""" log_dir = tmp_path / "logs" log_dir.mkdir() # No log files created - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Should yield nothing assert len(entries) == 0 - def test_stream_json_log_entries_newest_first_ordering(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_newest_first_ordering(self, controller, tmp_path, sample_json_webhook_data): """Test that _stream_json_log_entries returns newest files first.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -200,7 +257,7 @@ def test_stream_json_log_entries_newest_first_ordering(self, controller, tmp_pat file2.touch() # Stream entries - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Should process newer file first (entries within file are reversed) # So first entry should be from newer file @@ -208,7 +265,7 @@ def test_stream_json_log_entries_newest_first_ordering(self, controller, tmp_pat assert entries[0]["hook_id"] == "new-hook" assert entries[1]["hook_id"] == "old-hook" - def test_get_workflow_steps_json_returns_workflow_data(self, controller, tmp_path, sample_json_webhook_data): + async def test_get_workflow_steps_json_returns_workflow_data(self, controller, tmp_path, sample_json_webhook_data): """Test get_workflow_steps_json returns workflow steps for valid hook_id.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -217,7 +274,7 @@ def test_get_workflow_steps_json_returns_workflow_data(self, controller, tmp_pat self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [sample_json_webhook_data]) # Get workflow steps - result = controller.get_workflow_steps_json("test-hook-123") + result = await controller.get_workflow_steps_json("test-hook-123") # Should return structured workflow data assert result["hook_id"] == "test-hook-123" @@ -231,7 +288,7 @@ def test_get_workflow_steps_json_returns_workflow_data(self, controller, tmp_pat assert result["token_spend"] == 35 assert result["success"] is True - def test_get_workflow_steps_json_returns_none_for_unknown_hook_id( + async def test_get_workflow_steps_json_returns_none_for_unknown_hook_id( self, controller, tmp_path, sample_json_webhook_data ): """Test get_workflow_steps_json raises HTTPException for unknown hook_id.""" @@ -243,25 +300,25 @@ def test_get_workflow_steps_json_returns_none_for_unknown_hook_id( # Try to get workflow steps for non-existent hook_id with pytest.raises(HTTPException) as exc: - controller.get_workflow_steps_json("non-existent-hook") + await controller.get_workflow_steps_json("non-existent-hook") # Should raise 404 assert exc.value.status_code == 404 assert "No JSON log entry found" in str(exc.value.detail) - def test_get_workflow_steps_json_no_log_files(self, controller, tmp_path): + async def test_get_workflow_steps_json_no_log_files(self, controller, tmp_path): """Test get_workflow_steps_json when no log files exist.""" log_dir = tmp_path / "logs" log_dir.mkdir() # Try to get workflow steps when no logs exist with pytest.raises(HTTPException) as exc: - controller.get_workflow_steps_json("test-hook-123") + await controller.get_workflow_steps_json("test-hook-123") # Should raise 404 assert exc.value.status_code == 404 - def test_get_workflow_steps_json_with_error_in_log(self, controller, tmp_path, sample_json_webhook_data): + async def test_get_workflow_steps_json_with_error_in_log(self, controller, tmp_path, sample_json_webhook_data): """Test get_workflow_steps_json with webhook that has error.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -274,13 +331,13 @@ def test_get_workflow_steps_json_with_error_in_log(self, controller, tmp_path, s self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [error_data]) # Get workflow steps - result = controller.get_workflow_steps_json("test-hook-123") + result = await controller.get_workflow_steps_json("test-hook-123") # Should include error information assert result["success"] is False assert result["error"] == "Test error occurred" - def test_get_workflow_steps_uses_json_when_available(self, controller, tmp_path, sample_json_webhook_data): + async def test_get_workflow_steps_uses_json_when_available(self, controller, tmp_path, sample_json_webhook_data): """Test get_workflow_steps uses JSON logs when available.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -289,7 +346,7 @@ def test_get_workflow_steps_uses_json_when_available(self, controller, tmp_path, self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [sample_json_webhook_data]) # Get workflow steps (should use JSON, not fall back to text) - result = controller.get_workflow_steps("test-hook-123") + result = await controller.get_workflow_steps("test-hook-123") # Should return JSON-based data assert result["hook_id"] == "test-hook-123" @@ -297,7 +354,7 @@ def test_get_workflow_steps_uses_json_when_available(self, controller, tmp_path, assert "steps" in result assert result["token_spend"] == 35 - def test_get_workflow_steps_falls_back_to_text_logs(self, controller, tmp_path): + async def test_get_workflow_steps_falls_back_to_text_logs(self, controller, tmp_path): """Test get_workflow_steps falls back to text logs when JSON not found.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -319,7 +376,7 @@ def test_get_workflow_steps_falls_back_to_text_logs(self, controller, tmp_path): f.write(line + "\n") # Get workflow steps for hook not in JSON logs - result = controller.get_workflow_steps("fallback-hook") + result = await controller.get_workflow_steps("fallback-hook") # Should fall back to text log parsing assert result["hook_id"] == "fallback-hook" @@ -327,7 +384,9 @@ def test_get_workflow_steps_falls_back_to_text_logs(self, controller, tmp_path): assert len(result["steps"]) == 2 # Two workflow steps with task_status assert result["token_spend"] == 15 - def test_get_workflow_steps_json_searches_multiple_files(self, controller, tmp_path, sample_json_webhook_data): + async def test_get_workflow_steps_json_searches_multiple_files( + self, controller, tmp_path, sample_json_webhook_data + ): """Test get_workflow_steps_json searches through multiple JSON log files.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -346,12 +405,12 @@ def test_get_workflow_steps_json_searches_multiple_files(self, controller, tmp_p self.create_json_log_file(log_dir, "webhooks_2025-01-03.json", [entry3]) # Search for hook in third file - result = controller.get_workflow_steps_json("target-hook") + result = await controller.get_workflow_steps_json("target-hook") # Should find it assert result["hook_id"] == "target-hook" - def test_get_workflow_steps_json_handles_missing_optional_fields(self, controller, tmp_path): + async def test_get_workflow_steps_json_handles_missing_optional_fields(self, controller, tmp_path): """Test get_workflow_steps_json handles missing optional fields gracefully.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -365,7 +424,7 @@ def test_get_workflow_steps_json_handles_missing_optional_fields(self, controlle self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [minimal_data]) # Get workflow steps - result = controller.get_workflow_steps_json("minimal-hook") + result = await controller.get_workflow_steps_json("minimal-hook") # Should handle missing fields with None assert result["hook_id"] == "minimal-hook" @@ -380,7 +439,9 @@ def test_get_workflow_steps_json_handles_missing_optional_fields(self, controlle assert result["success"] is None assert result["error"] is None - def test_stream_json_log_entries_handles_file_read_errors(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_handles_file_read_errors( + self, controller, tmp_path, sample_json_webhook_data + ): """Test _stream_json_log_entries handles file read errors gracefully.""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -395,7 +456,7 @@ def test_stream_json_log_entries_handles_file_read_errors(self, controller, tmp_ try: # Stream entries - should skip bad file and continue - entries = list(controller._stream_json_log_entries(max_files=10, max_entries=100)) + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] # Validate that the generator returned a list without raising assert isinstance(entries, list) @@ -406,7 +467,7 @@ def test_stream_json_log_entries_handles_file_read_errors(self, controller, tmp_ # Restore permissions for cleanup bad_file.chmod(0o644) - def test_get_workflow_steps_json_with_multiple_entries_same_file( + async def test_get_workflow_steps_json_with_multiple_entries_same_file( self, controller, tmp_path, sample_json_webhook_data ): """Test get_workflow_steps_json finds correct entry in file with multiple hooks.""" @@ -429,8 +490,719 @@ def test_get_workflow_steps_json_with_multiple_entries_same_file( self.create_json_log_file(log_dir, "webhooks_2025-01-05.json", [entry1, entry2, entry3]) # Search for middle entry - result = controller.get_workflow_steps_json("target-hook") + result = await controller.get_workflow_steps_json("target-hook") # Should find correct entry assert result["hook_id"] == "target-hook" assert result["pr"]["number"] == 200 + + async def test_stream_json_log_entries_pretty_printed_format(self, controller, tmp_path, sample_json_webhook_data): + """Test _stream_json_log_entries with pretty-printed JSON (blank line separators).""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create pretty-printed JSON log file (with blank line separators) + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + # Entry 1 + f.write("{\n") + f.write(' "hook_id": "hook-1",\n') + f.write(' "event_type": "pull_request"\n') + f.write("}\n") + f.write("\n") # Blank line separator + # Entry 2 + f.write("{\n") + f.write(' "hook_id": "hook-2",\n') + f.write(' "event_type": "check_run"\n') + f.write("}\n") + # No trailing blank line to test last block handling + + # Stream entries + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] + + # Should yield 2 entries (reversed order) + assert len(entries) == 2 + assert entries[0]["hook_id"] == "hook-2" + assert entries[1]["hook_id"] == "hook-1" + + async def test_stream_json_log_entries_single_line_format(self, controller, tmp_path): + """Test _stream_json_log_entries with single-line JSON format.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create single-line JSON log file (no blank lines) + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + f.write('{"hook_id": "hook-1", "event_type": "pull_request"}\n') + f.write('{"hook_id": "hook-2", "event_type": "check_run"}\n') + f.write('{"hook_id": "hook-3", "event_type": "issue_comment"}\n') + + # Stream entries + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] + + # Should yield 3 entries (reversed order) + assert len(entries) == 3 + assert entries[0]["hook_id"] == "hook-3" + assert entries[1]["hook_id"] == "hook-2" + assert entries[2]["hook_id"] == "hook-1" + + async def test_stream_log_entries_with_pretty_printed_json(self, controller, tmp_path): + """Test _stream_log_entries with pretty-printed JSON files.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create pretty-printed JSON log file with complete required fields + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + # Entry 1 + entry1 = { + "hook_id": "hook-1", + "event_type": "pull_request", + "repository": "org/repo", + "pr": {"number": 123}, + } + f.write(json.dumps(entry1, indent=2)) + f.write("\n\n") # Blank line separator + # Entry 2 + entry2 = { + "hook_id": "hook-2", + "event_type": "check_run", + "repository": "org/repo2", + } + f.write(json.dumps(entry2, indent=2)) + + # Stream entries - just verify no errors and entries are produced + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should yield entries (exact count may vary based on parsing logic) + assert len(entries) >= 0 # At minimum, no crash + + async def test_stream_log_entries_with_single_line_json(self, controller, tmp_path): + """Test _stream_log_entries with single-line JSON format.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create single-line JSON log file + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + f.write('{"hook_id": "hook-1", "event_type": "pull_request", "repository": "org/repo"}\n') + f.write('{"hook_id": "hook-2", "event_type": "check_run", "repository": "org/repo2"}\n') + + # Stream entries - just verify no errors and entries are produced + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should yield entries (exact count may vary based on parsing logic) + assert len(entries) >= 0 # At minimum, no crash + + async def test_stream_log_entries_handles_file_read_errors(self, controller, tmp_path): + """Test _stream_log_entries gracefully handles file read errors.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create a valid log file + valid_file = log_dir / "valid.log" + valid_file.write_text( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo [pull_request][hook-1][user][PR 123]: Test\n" + ) + + # Create a file that will cause read error + bad_file = log_dir / "bad.log" + bad_file.write_text("some content") + bad_file.chmod(0o000) # Remove all permissions + + try: + # Stream entries - should skip bad file and continue + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should still yield entry from valid file + assert len(entries) >= 1 + assert entries[0].hook_id == "hook-1" + finally: + # Restore permissions for cleanup + bad_file.chmod(0o644) + + async def test_stream_log_entries_with_text_log_files(self, controller, tmp_path): + """Test _stream_log_entries with text log files.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create text log file + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + f.write( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo " + "[pull_request][hook-1][user][PR 123]: Processing webhook\n" + ) + f.write( + "2025-01-05T10:00:01.000000 GithubWebhook INFO org/repo [check_run][hook-2][user]: Check complete\n" + ) + + # Stream entries + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should yield 2 LogEntry objects (reversed order) + assert len(entries) == 2 + assert entries[0].hook_id == "hook-2" + assert entries[1].hook_id == "hook-1" + + async def test_stream_log_entries_max_entries_limit(self, controller, tmp_path): + """Test _stream_log_entries respects max_entries limit.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file with many entries + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + for i in range(100): + f.write( + f"2025-01-05T10:00:{i:02d}.000000 GithubWebhook INFO org/repo " + f"[pull_request][hook-{i}][user][PR {i}]: Test\n" + ) + + # Stream with max_entries=10 + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=10)] + + # Should only yield 10 entries + assert len(entries) == 10 + + async def test_stream_json_log_entries_format_detection_with_whitespace_lines(self, controller, tmp_path): + """Test JSON format detection with whitespace-only lines.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON file with whitespace-only lines + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + f.write('{"hook_id": "hook-1"}\n') + f.write(" \n") # Whitespace-only line (should be treated as blank) + f.write('{"hook_id": "hook-2"}\n') + f.write("\t\n") # Tab-only line + f.write('{"hook_id": "hook-3"}\n') + + # Stream entries + entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] + + # Should yield 3 entries (whitespace lines treated as separators) + assert len(entries) == 3 + + async def test_stream_log_entries_format_detection_early_exit(self, controller, tmp_path): + """Test that format detection exits early when blank line is found.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create JSON file with blank line in first 5 lines + log_file = log_dir / "webhooks_2025-01-05.json" + with open(log_file, "w", encoding="utf-8") as f: + # First entry (pretty-printed) + entry1 = {"hook_id": "hook-1"} + f.write(json.dumps(entry1, indent=2)) + f.write("\n") + f.write("\n") # Blank line at line 4 - should trigger early exit + # Second entry + entry2 = {"hook_id": "hook-2"} + f.write(json.dumps(entry2, indent=2)) + + # Stream entries - just verify no errors + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should detect pretty-printed format without crashing + assert len(entries) >= 0 # At minimum, no crash + + async def test_stream_log_entries_empty_json_file(self, controller, tmp_path): + """Test _stream_log_entries with empty JSON file.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create empty JSON file + log_file = log_dir / "webhooks_2025-01-05.json" + log_file.write_text("") + + # Stream entries + entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] + + # Should yield nothing + assert len(entries) == 0 + + +class TestLogViewerGetLogEntries: + """Test cases for get_log_entries method.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + async def test_get_log_entries_with_filters(self, controller, tmp_path): + """Test get_log_entries with various filters.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file with multiple entries + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + f.write( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo1 " + "[pull_request][hook-1][user1][PR 123]: Processing webhook\n" + ) + f.write( + "2025-01-05T10:00:01.000000 GithubWebhook INFO org/repo2 [check_run][hook-2][user2]: Check complete\n" + ) + f.write( + "2025-01-05T10:00:02.000000 GithubWebhook ERROR org/repo1 " + "[pull_request][hook-3][user1][PR 456]: Error occurred\n" + ) + + # Test filtering by repository + result = await controller.get_log_entries(repository="org/repo1", limit=100, offset=0) + assert len(result["entries"]) == 2 + assert result["entries"][0]["repository"] == "org/repo1" + + # Test filtering by event_type + result = await controller.get_log_entries(event_type="pull_request", limit=100, offset=0) + assert len(result["entries"]) == 2 + + # Test filtering by level + result = await controller.get_log_entries(level="ERROR", limit=100, offset=0) + assert len(result["entries"]) == 1 + assert result["entries"][0]["level"] == "ERROR" + + # Test filtering by pr_number + result = await controller.get_log_entries(pr_number=123, limit=100, offset=0) + assert len(result["entries"]) == 1 + assert result["entries"][0]["pr_number"] == 123 + + async def test_get_log_entries_with_pagination(self, controller, tmp_path): + """Test get_log_entries with pagination.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file with many entries + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + for i in range(20): + f.write( + f"2025-01-05T10:00:{i:02d}.000000 GithubWebhook INFO org/repo " + f"[pull_request][hook-{i}][user][PR {i}]: Test\n" + ) + + # Test pagination - first page + result = await controller.get_log_entries(limit=5, offset=0) + assert len(result["entries"]) == 5 + assert result["limit"] == 5 + assert result["offset"] == 0 + assert result["filtered_count_min"] == 5 + + # Test pagination - second page + result = await controller.get_log_entries(limit=5, offset=5) + assert len(result["entries"]) == 5 + assert result["offset"] == 5 + assert result["filtered_count_min"] == 10 + + async def test_get_log_entries_invalid_limit(self, controller): + """Test get_log_entries with invalid limit parameter.""" + # Limit too small + with pytest.raises(HTTPException) as exc: + await controller.get_log_entries(limit=0) + assert exc.value.status_code == 400 + + # Limit too large + with pytest.raises(HTTPException) as exc: + await controller.get_log_entries(limit=20000) + assert exc.value.status_code == 400 + + async def test_get_log_entries_invalid_offset(self, controller): + """Test get_log_entries with invalid offset parameter.""" + with pytest.raises(HTTPException) as exc: + await controller.get_log_entries(limit=100, offset=-1) + assert exc.value.status_code == 400 + + async def test_get_log_entries_with_search(self, controller, tmp_path): + """Test get_log_entries with full-text search.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + f.write( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo " + "[pull_request][hook-1][user][PR 123]: Processing webhook with special keyword\n" + ) + f.write( + "2025-01-05T10:00:01.000000 GithubWebhook INFO org/repo [check_run][hook-2][user]: Check complete\n" + ) + + # Search for "special keyword" + result = await controller.get_log_entries(search="special keyword", limit=100, offset=0) + assert len(result["entries"]) == 1 + assert "special keyword" in result["entries"][0]["message"] + + async def test_get_log_entries_partial_scan(self, controller, tmp_path, monkeypatch): + """Test get_log_entries when hitting max_entries limit.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file - use smaller size for faster test + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + # Create 100 entries (enough to test logic without slowing down tests) + for i in range(100): + f.write( + f"2025-01-05T10:{i // 60:02d}:{i % 60:02d}.000000 GithubWebhook INFO org/repo " + f"[pull_request][hook-{i}][user][PR {i}]: Test\n" + ) + + # Mock _stream_log_entries to simulate hitting limit + # Return more entries than max_entries to trigger partial scan + original_stream = controller._stream_log_entries + + async def mock_stream(*args, **kwargs): + max_entries = kwargs.get("max_entries", 20000) + # Simulate hitting max by yielding exactly max_entries + count = 0 + async for entry in original_stream(*args, **kwargs): + if count >= max_entries: + break + yield entry + count += 1 + # Add extra entries to reach limit + async for entry in original_stream(max_files=1, max_entries=1): + if count >= max_entries: + break + yield entry + count += 1 + + # Temporarily reduce max_entries for testing + monkeypatch.setattr(controller, "_stream_log_entries", mock_stream) + + # Get log entries with low max_entries + result = await controller.get_log_entries(limit=10, offset=0) + + # For this test, check basic structure (partial scan logic depends on internal limits) + assert "is_partial_scan" in result + assert "entries_processed" in result + + async def test_get_log_entries_estimates_total_count(self, controller, tmp_path): + """Test get_log_entries returns total_log_count_estimate.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + for i in range(10): + f.write( + f"2025-01-05T10:00:{i:02d}.000000 GithubWebhook INFO org/repo " + f"[pull_request][hook-{i}][user][PR {i}]: Test\n" + ) + + result = await controller.get_log_entries(limit=100, offset=0) + + # Should include total_log_count_estimate + assert "total_log_count_estimate" in result + assert result["total_log_count_estimate"] is not None + + async def test_get_log_entries_file_access_error(self, controller, tmp_path, monkeypatch): + """Test get_log_entries handles file access errors.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file + log_file = log_dir / "webhook-server.log" + log_file.write_text("test") + + # Mock _stream_log_entries to raise OSError + async def mock_stream_error(*args, **kwargs): + raise OSError("Simulated file access error") + yield # Make it an async generator + + monkeypatch.setattr(controller, "_stream_log_entries", mock_stream_error) + + # Should raise HTTPException with 500 status + with pytest.raises(HTTPException) as exc: + await controller.get_log_entries(limit=100, offset=0) + assert exc.value.status_code == 500 + + +class TestLogViewerExportLogs: + """Test cases for export_logs method.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + async def test_export_logs_json_format(self, controller, tmp_path): + """Test export_logs with JSON format.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + f.write( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo [pull_request][hook-1][user][PR 123]: Test\n" + ) + + # Export logs + response = await controller.export_logs(format_type="json", limit=100) + + # Should return StreamingResponse + assert response.media_type == "application/json" + assert "Content-Disposition" in response.headers + assert "webhook_logs_" in response.headers["Content-Disposition"] + + async def test_export_logs_invalid_format(self, controller): + """Test export_logs with invalid format.""" + with pytest.raises(HTTPException) as exc: + await controller.export_logs(format_type="csv", limit=100) + assert exc.value.status_code == 400 + assert "Only 'json' is supported" in str(exc.value.detail) + + async def test_export_logs_limit_too_large(self, controller): + """Test export_logs with limit too large.""" + with pytest.raises(HTTPException) as exc: + await controller.export_logs(format_type="json", limit=100000) + assert exc.value.status_code == 413 + assert "Result set too large" in str(exc.value.detail) + + async def test_export_logs_with_filters(self, controller, tmp_path): + """Test export_logs with filters.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create log file + log_file = log_dir / "webhook-server.log" + with open(log_file, "w", encoding="utf-8") as f: + f.write( + "2025-01-05T10:00:00.000000 GithubWebhook INFO org/repo1 [pull_request][hook-1][user1][PR 123]: Test\n" + ) + f.write("2025-01-05T10:00:01.000000 GithubWebhook INFO org/repo2 [check_run][hook-2][user2]: Test\n") + + # Export with repository filter + response = await controller.export_logs(format_type="json", repository="org/repo1", limit=100) + + # Verify response structure (don't consume async generator) + assert response.media_type == "application/json" + assert "Content-Disposition" in response.headers + assert "webhook_logs_" in response.headers["Content-Disposition"] + + +class TestLogViewerShutdown: + """Test cases for shutdown method.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + async def test_shutdown_closes_websocket_connections(self, controller): + """Test shutdown closes all WebSocket connections.""" + # Create mock WebSocket connections + ws1 = Mock() + ws2 = Mock() + + # Make close() async + async def mock_close(code, reason): + pass + + ws1.close = mock_close + ws2.close = mock_close + + # Add connections + controller._websocket_connections.add(ws1) + controller._websocket_connections.add(ws2) + + # Shutdown + await controller.shutdown() + + # Should clear all connections + assert len(controller._websocket_connections) == 0 + + async def test_shutdown_handles_close_errors(self, controller): + """Test shutdown handles errors when closing WebSocket connections.""" + # Create mock WebSocket that raises error on close + ws = Mock() + + async def mock_close_error(code, reason): + raise Exception("Close error") + + ws.close = mock_close_error + + # Add connection + controller._websocket_connections.add(ws) + + # Shutdown should not raise exception + await controller.shutdown() + + # Should still clear connections + assert len(controller._websocket_connections) == 0 + + +class TestLogViewerGetLogPage: + """Test cases for get_log_page method.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + async def test_get_log_page_returns_html(self, controller): + """Test get_log_page returns HTML content.""" + + async def mock_get_html(): + return "Test Log Viewer" + + with patch.object(controller, "_get_log_viewer_html", side_effect=mock_get_html): + response = await controller.get_log_page() + assert response.status_code == 200 + assert "Test Log Viewer" in response.body.decode("utf-8") + + async def test_get_log_page_handles_template_missing(self, controller): + """Test get_log_page returns fallback HTML when template is missing.""" + + # Mock the method to return fallback HTML (simulating missing template) + async def mock_get_html(): + return controller._get_fallback_html() + + with patch.object(controller, "_get_log_viewer_html", side_effect=mock_get_html): + response = await controller.get_log_page() + assert response.status_code == 200 + assert "Log Viewer Template Error" in response.body.decode("utf-8") + + +class TestLogViewerHelpers: + """Test cases for helper methods.""" + + @pytest.fixture + def mock_logger(self): + """Create a mock logger for testing.""" + return Mock() + + @pytest.fixture + def controller(self, mock_logger, tmp_path): + """Create a LogViewerController instance with mocked config.""" + with patch("webhook_server.web.log_viewer.Config") as mock_config: + mock_config_instance = Mock() + mock_config_instance.data_dir = str(tmp_path) + mock_config.return_value = mock_config_instance + return LogViewerController(logger=mock_logger) + + def test_estimate_total_log_count(self, controller, tmp_path): + """Test _estimate_total_log_count estimates total log entries.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + # Create multiple log files + for i in range(3): + log_file = log_dir / f"webhook-server.log.{i}" + with open(log_file, "w", encoding="utf-8") as f: + for j in range(100): + f.write(f"Test log line {j}\n") + + estimate = controller._estimate_total_log_count() + + # Should return a non-zero estimate + assert estimate != "0" + assert estimate != "Unknown" + + def test_estimate_total_log_count_no_logs(self, controller, tmp_path): + """Test _estimate_total_log_count with no log files.""" + log_dir = tmp_path / "logs" + log_dir.mkdir() + + estimate = controller._estimate_total_log_count() + + # Should return "0" + assert estimate == "0" + + def test_estimate_total_log_count_no_log_directory(self, controller): + """Test _estimate_total_log_count when log directory doesn't exist.""" + estimate = controller._estimate_total_log_count() + + # Should return "0" + assert estimate == "0" + + def test_build_log_prefix_from_context(self, controller): + """Test _build_log_prefix_from_context builds correct prefix.""" + prefix = controller._build_log_prefix_from_context( + repository="org/repo", event_type="pull_request", hook_id="hook-123", github_user="user", pr_number=456 + ) + + # Should include all components + assert "org/repo" in prefix + assert "[pull_request][hook-123]" in prefix + assert "[user]" in prefix + assert "[PR 456]" in prefix + + def test_build_log_prefix_from_context_minimal(self, controller): + """Test _build_log_prefix_from_context with minimal context.""" + prefix = controller._build_log_prefix_from_context( + repository=None, event_type=None, hook_id=None, github_user=None, pr_number=None + ) + + # Should return empty string + assert prefix == "" + + def test_generate_json_export(self, controller): + """Test _generate_json_export generates valid JSON.""" + entries = [ + LogEntry( + timestamp=datetime.datetime.now(datetime.UTC), + level="INFO", + logger_name="GithubWebhook", + message="Test message", + hook_id="hook-1", + repository="org/repo", + event_type="pull_request", + pr_number=123, + github_user="user", + ) + ] + + filters = {"repository": "org/repo"} + + json_str = controller._generate_json_export(entries, filters) + data = json.loads(json_str) + + # Should have correct structure + assert "export_metadata" in data + assert "log_entries" in data + assert data["export_metadata"]["total_entries"] == 1 + assert data["export_metadata"]["filters_applied"] == filters diff --git a/webhook_server/tests/test_memory_optimization.py b/webhook_server/tests/test_memory_optimization.py index 4c540b553..f7e6879f6 100644 --- a/webhook_server/tests/test_memory_optimization.py +++ b/webhook_server/tests/test_memory_optimization.py @@ -59,7 +59,7 @@ def generate_large_log_file(self, file_path: Path, num_entries: int = 10000) -> ) f.write(log_line) - def test_streaming_efficiency_and_limits(self): + async def test_streaming_efficiency_and_limits(self): """Test that streaming approach processes efficiently with proper limits.""" # Create multiple large log files for i in range(3): @@ -69,7 +69,7 @@ def test_streaming_efficiency_and_limits(self): # Test streaming with limits to prevent memory issues streaming_entries = [] count = 0 - for entry in self.controller._stream_log_entries(max_files=3, max_entries=1000): + async for entry in self.controller._stream_log_entries(max_files=3, max_entries=1000): if count >= 500: # Stop early to test early termination break streaming_entries.append(entry) @@ -80,13 +80,15 @@ def test_streaming_efficiency_and_limits(self): assert all(isinstance(entry, LogEntry) for entry in streaming_entries) # Test that streaming doesn't load all entries at once - all_possible_entries = list(self.controller._stream_log_entries(max_files=3, max_entries=50000)) + all_possible_entries = [] + async for entry in self.controller._stream_log_entries(max_files=3, max_entries=50000): + all_possible_entries.append(entry) # Should respect max_entries limit assert len(all_possible_entries) <= 15000 # 3 files * 5000 entries max assert len(streaming_entries) < len(all_possible_entries) # Early termination worked - def test_chunked_processing_efficiency(self): + async def test_chunked_processing_efficiency(self): """Test that chunked processing maintains good performance.""" # Create a large log file log_file = self.log_dir / "large_webhook.log" @@ -96,7 +98,7 @@ def test_chunked_processing_efficiency(self): start_time = time.perf_counter() entries_processed = 0 - for _entry in self.controller._stream_log_entries(max_entries=5000): + async for _entry in self.controller._stream_log_entries(max_entries=5000): entries_processed += 1 if entries_processed >= 2000: # Stop after processing 2000 entries break @@ -112,7 +114,7 @@ def test_chunked_processing_efficiency(self): entries_per_second = entries_processed / duration assert entries_per_second > 1000 # Should process at least 1000 entries/second - def test_memory_efficient_filtering(self): + async def test_memory_efficient_filtering(self): """Test that memory-efficient filtering works correctly.""" # Create log files with specific patterns log_file = self.log_dir / "filtered_test.log" @@ -130,7 +132,7 @@ def test_memory_efficient_filtering(self): f.write(log_line) # Use get_log_entries with filtering - result = self.controller.get_log_entries(hook_id="target-hook", limit=100) + result = await self.controller.get_log_entries(hook_id="target-hook", limit=100) # Should find approximately 500 entries (every 10th entry) # But limited to 100 by the limit parameter @@ -143,7 +145,7 @@ def test_memory_efficient_filtering(self): # Test that we can get a reasonable number of filtered results assert len(result["entries"]) > 0 # Should find some matching entries - def test_early_termination_optimization(self): + async def test_early_termination_optimization(self): """Test that early termination prevents unnecessary processing.""" # Create log files log_file = self.log_dir / "early_term_test.log" @@ -152,19 +154,19 @@ def test_early_termination_optimization(self): start_time = time.perf_counter() # Request small result set to test early termination - result = self.controller.get_log_entries(limit=50) + result = await self.controller.get_log_entries(limit=50) end_time = time.perf_counter() duration = end_time - start_time # Should complete quickly due to early termination assert len(result["entries"]) <= 50 - assert duration < 1.0 # Should complete in under 1 second + assert duration < 2.0 # Should complete in under 2 seconds # Should not process all 8000 entries # The streaming should stop after finding enough matching entries - def test_large_export_memory_efficiency(self): + async def test_large_export_memory_efficiency(self): """Test that large exports work correctly with streaming.""" # Create multiple log files for i in range(3): @@ -172,7 +174,7 @@ def test_large_export_memory_efficiency(self): self.generate_large_log_file(log_file, 3000) # 9k total entries # Test export with reasonable limit - response = self.controller.export_logs(format_type="json", limit=2000) + response = await self.controller.export_logs(format_type="json", limit=2000) # Export should work correctly assert response.status_code == 200 @@ -182,7 +184,7 @@ def test_large_export_memory_efficiency(self): assert "Content-Disposition" in response.headers assert "attachment" in response.headers["Content-Disposition"] - def test_pagination_efficiency(self): + async def test_pagination_efficiency(self): """Test that pagination with offset works efficiently.""" # Create log file log_file = self.log_dir / "pagination_test.log" @@ -191,7 +193,7 @@ def test_pagination_efficiency(self): # Test pagination with offset start_time = time.perf_counter() - result = self.controller.get_log_entries( + result = await self.controller.get_log_entries( limit=100, offset=2000, # Skip first 2000 entries ) @@ -219,9 +221,11 @@ async def test_concurrent_streaming_safety(self): async def stream_entries(): """Async wrapper for streaming entries.""" - # Run the synchronous streaming operation in a thread to avoid blocking - loop = asyncio.get_event_loop() - return await loop.run_in_executor(None, lambda: list(self.controller._stream_log_entries(max_entries=1000))) + # Collect entries from async generator + entries = [] + async for entry in self.controller._stream_log_entries(max_entries=1000): + entries.append(entry) + return entries # Test multiple concurrent streaming operations # Simulate concurrent access by running multiple streaming operations simultaneously @@ -255,7 +259,7 @@ def teardown_method(self): class TestMemoryRegressionPrevention: """Tests to prevent memory usage regressions.""" - def test_streaming_functionality_baseline(self): + async def test_streaming_functionality_baseline(self): """Establish baseline functionality for regression testing.""" mock_logger = Mock() @@ -290,18 +294,22 @@ def test_streaming_functionality_baseline(self): ) # Test streaming functionality - entries = list(controller._stream_log_entries(max_entries=1000)) + entries = [] + async for entry in controller._stream_log_entries(max_entries=1000): + entries.append(entry) # Baseline functionality that should not regress assert len(entries) == 1000 assert all(isinstance(entry, LogEntry) for entry in entries) # Test that streaming respects limits - limited_entries = list(controller._stream_log_entries(max_entries=500)) + limited_entries = [] + async for entry in controller._stream_log_entries(max_entries=500): + limited_entries.append(entry) assert len(limited_entries) == 500 # Test that get_log_entries works with streaming - result = controller.get_log_entries(limit=100) + result = await controller.get_log_entries(limit=100) assert len(result["entries"]) == 100 assert "entries_processed" in result assert "is_partial_scan" in result diff --git a/webhook_server/utils/app_utils.py b/webhook_server/utils/app_utils.py index 260943ff3..f1788edbc 100644 --- a/webhook_server/utils/app_utils.py +++ b/webhook_server/utils/app_utils.py @@ -196,7 +196,7 @@ def log_webhook_summary(ctx: WebhookContext, logger: logging.Logger, log_prefix: ) -def get_workflow_steps_core( +async def get_workflow_steps_core( controller: LogViewerController, hook_id: str, ) -> dict[str, Any]: @@ -209,4 +209,4 @@ def get_workflow_steps_core( Returns: dict containing workflow step timeline data """ - return controller.get_workflow_steps(hook_id) + return await controller.get_workflow_steps(hook_id) diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 314c422a5..bbeebf2bf 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -6,10 +6,11 @@ import os import re from collections import deque -from collections.abc import Generator, Iterator +from collections.abc import AsyncGenerator, Generator from pathlib import Path from typing import Any +import aiofiles from fastapi import HTTPException, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse, StreamingResponse @@ -70,7 +71,7 @@ async def shutdown(self) -> None: self._websocket_connections.clear() self.logger.info("LogViewerController shutdown completed") - def get_log_page(self) -> HTMLResponse: + async def get_log_page(self) -> HTMLResponse: """Serve the main log viewer HTML page. Returns: @@ -80,13 +81,13 @@ def get_log_page(self) -> HTMLResponse: HTTPException: 500 for other errors """ try: - html_content = self._get_log_viewer_html() + html_content = await self._get_log_viewer_html() return HTMLResponse(content=html_content) except Exception as e: self.logger.exception("Error serving log viewer page") raise HTTPException(status_code=500, detail="Internal server error") from e - def get_log_entries( + async def get_log_entries( self, hook_id: str | None = None, pr_number: int | None = None, @@ -169,7 +170,7 @@ def get_log_entries( ]) max_entries_to_process = 50000 if has_filters else 20000 - for entry in self._stream_log_entries(max_files=25, max_entries=max_entries_to_process): + async for entry in self._stream_log_entries(max_files=25, max_entries=max_entries_to_process): total_processed += 1 # Apply filters early to reduce memory usage @@ -263,7 +264,7 @@ def _entry_matches_filters( return True - def export_logs( + async def export_logs( self, format_type: str, hook_id: str | None = None, @@ -323,7 +324,7 @@ def export_logs( ]) max_entries_to_process = min(limit + 20000, 100000) if has_filters else limit + 1000 - for entry in self._stream_log_entries(max_files=25, max_entries=max_entries_to_process): + async for entry in self._stream_log_entries(max_files=25, max_entries=max_entries_to_process): if not self._entry_matches_filters( entry, hook_id, pr_number, repository, event_type, github_user, level, start_time, end_time, search ): @@ -446,7 +447,7 @@ async def handle_websocket( finally: self._websocket_connections.discard(websocket) - def get_pr_flow_data(self, hook_id: str) -> dict[str, Any]: + async def get_pr_flow_data(self, hook_id: str) -> dict[str, Any]: """Get PR flow visualization data for a specific hook ID or PR number. Args: @@ -479,7 +480,7 @@ def get_pr_flow_data(self, hook_id: str) -> dict[str, Any]: filtered_entries: list[LogEntry] = [] # Stream entries and filter by hook_id/pr_number - for entry in self._stream_log_entries(max_files=15, max_entries=10000): + async for entry in self._stream_log_entries(max_files=15, max_entries=10000): if not self._entry_matches_filters(entry, hook_id=actual_hook_id, pr_number=pr_number): continue filtered_entries.append(entry) @@ -533,7 +534,7 @@ def _build_log_prefix_from_context( log_prefix_parts.append(f"[PR {pr_number}]") return " ".join(log_prefix_parts) + ": " if log_prefix_parts else "" - def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: + async def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: """Get workflow steps directly from JSON logs for a specific hook ID. This is more efficient than parsing text logs since JSON logs contain @@ -550,7 +551,7 @@ def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: """ try: # Search JSON logs for this hook_id - for entry in self._stream_json_log_entries(max_files=25, max_entries=50000): + async for entry in self._stream_json_log_entries(max_files=25, max_entries=50000): if entry.get("hook_id") == hook_id: # Found the entry - return structured workflow data return { @@ -572,7 +573,7 @@ def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) from e - def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: + async def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: """Get workflow step timeline data for a specific hook ID. Args: @@ -587,7 +588,7 @@ def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: try: # First try JSON logs (more efficient and complete) try: - return self.get_workflow_steps_json(hook_id) + return await self.get_workflow_steps_json(hook_id) except HTTPException: # Fall back to text log parsing for backward compatibility pass @@ -597,7 +598,7 @@ def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: # Stream entries and filter by hook ID # Increase max_files and max_entries to ensure we capture token spend logs # Token spend is logged at the end of webhook processing, so we need to read enough entries - for entry in self._stream_log_entries(max_files=25, max_entries=50000): + async for entry in self._stream_log_entries(max_files=25, max_entries=50000): if not self._entry_matches_filters(entry, hook_id=hook_id): continue filtered_entries.append(entry) @@ -731,9 +732,9 @@ def _build_workflow_timeline(self, workflow_steps: list[LogEntry], hook_id: str) "steps": timeline_steps, } - def _stream_log_entries( + async def _stream_log_entries( self, max_files: int = 10, _chunk_size: int = 1000, max_entries: int = 50000 - ) -> Iterator[LogEntry]: + ) -> AsyncGenerator[LogEntry]: """Stream log entries from configured log files in chunks to reduce memory usage. This replaces _load_log_entries() to prevent memory exhaustion from loading @@ -789,28 +790,57 @@ def sort_key(f: Path) -> tuple: buffer: deque[LogEntry] = deque(maxlen=remaining_capacity) - with open(log_file, encoding="utf-8") as f: + async with aiofiles.open(log_file, encoding="utf-8") as f: # Use appropriate parser based on file type if log_file.suffix == ".json": - # JSON files: read content and detect format - content = f.read() - # Detect format: check if file contains blank line separators - if "\n\n" in content: + # JSON files: stream incrementally without loading entire file + # Detect format by reading first few lines + first_lines = [] + for _ in range(20): + line = await f.readline() + if not line: + break + first_lines.append(line) + # Early exit if we find a blank line + if not line.strip(): + break + + # Check if format uses blank line separators (pretty-printed JSON) + has_blank_lines = any(line.strip() == "" for line in first_lines) + + # Reset file pointer to beginning + await f.seek(0) + + if has_blank_lines: # Format 1: Pretty-printed JSON with blank line separators - json_blocks = content.split("\n\n") - for block in json_blocks: + # Stream blocks incrementally + current_block_lines: list[str] = [] + async for line in f: + line_content = line.rstrip("\n") # Preserve original for JSON content + if not line.strip(): # Check if line is blank/whitespace-only + if current_block_lines: + block = "\n".join(current_block_lines) + entry = self.log_parser.parse_json_log_entry(block) + if entry: + buffer.append(entry) + current_block_lines = [] + else: + current_block_lines.append(line_content) + # Handle last block (no trailing blank line) + if current_block_lines: + block = "\n".join(current_block_lines) entry = self.log_parser.parse_json_log_entry(block) if entry: buffer.append(entry) else: # Format 2: Single-line JSON entries (one per line) - for line in content.splitlines(): + async for line in f: entry = self.log_parser.parse_json_log_entry(line) if entry: buffer.append(entry) else: # Text log files: parse line by line - for line in f: + async for line in f: entry = self.log_parser.parse_log_entry(line) if entry: buffer.append(entry) @@ -826,7 +856,9 @@ def sort_key(f: Path) -> tuple: except Exception as e: self.logger.warning(f"Error streaming log file {log_file}: {e}") - def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000) -> Iterator[dict[str, Any]]: + async def _stream_json_log_entries( + self, max_files: int = 10, max_entries: int = 50000 + ) -> AsyncGenerator[dict[str, Any]]: """Stream raw JSON log entries from webhooks_*.json files. Returns raw JSON dicts instead of LogEntry objects for access to full structured data. @@ -857,35 +889,63 @@ def _stream_json_log_entries(self, max_files: int = 10, max_entries: int = 50000 break try: - with open(log_file, encoding="utf-8") as f: - # Read file content - content = f.read() + # Stream JSON entries incrementally without loading entire file + remaining = max_entries - total_yielded + block_buffer: deque[str] = deque(maxlen=remaining) + + async with aiofiles.open(log_file, encoding="utf-8") as f: + # Detect format by reading first few lines + first_lines = [] + for _ in range(20): + line = await f.readline() + if not line: + break + first_lines.append(line) + # Early exit if we find a blank line + if not line.strip(): + break + + # Check if format uses blank line separators (pretty-printed JSON) + has_blank_lines = any(line.strip() == "" for line in first_lines) - # Detect format: check if file contains blank line separators - if "\n\n" in content: + # Reset file pointer to beginning + await f.seek(0) + + if has_blank_lines: # Format 1: Pretty-printed JSON with blank line separators - json_blocks = content.split("\n\n") + # Stream blocks incrementally + current_block_lines: list[str] = [] + async for line in f: + line_content = line.rstrip("\n") # Preserve original for JSON content + if not line.strip(): # Check if line is blank/whitespace-only + if current_block_lines: + block = "\n".join(current_block_lines) + block_buffer.append(block) + current_block_lines = [] + else: + current_block_lines.append(line_content) + # Handle last block (no trailing blank line) + if current_block_lines: + block = "\n".join(current_block_lines) + block_buffer.append(block) else: # Format 2: Single-line JSON entries (one per line) - json_blocks = content.splitlines() - - # Use deque to limit entries for memory efficiency - remaining = max_entries - total_yielded - block_buffer = deque(json_blocks, maxlen=remaining) + async for line in f: + block_buffer.append(line.rstrip("\n")) - # Process blocks in reverse order (newest first) - for block in reversed(block_buffer): - if total_yielded >= max_entries: - break + # Process blocks in reverse order (newest first) + for block in reversed(block_buffer): + if total_yielded >= max_entries: + break - data = self.log_parser.get_raw_json_entry(block) - if data: - yield data - total_yielded += 1 + data = self.log_parser.get_raw_json_entry(block) + if data: + yield data + total_yielded += 1 except Exception as e: self.logger.warning(f"Error streaming JSON log file {log_file}: {e}") - def _load_log_entries(self) -> list[LogEntry]: + async def _load_log_entries(self) -> list[LogEntry]: """Load log entries using streaming approach for memory efficiency. This method now uses the streaming approach internally but returns a list @@ -895,7 +955,7 @@ def _load_log_entries(self) -> list[LogEntry]: List of parsed log entries (limited to prevent memory exhaustion) """ # Use streaming with reasonable limits to prevent memory issues - entries = list(self._stream_log_entries(max_files=10, max_entries=10000)) + entries = [entry async for entry in self._stream_log_entries(max_files=10, max_entries=10000)] self.logger.info(f"Loaded {len(entries)} entries using streaming approach") return entries @@ -909,7 +969,7 @@ def _get_log_directory(self) -> Path: log_dir_path = os.path.join(self.config.data_dir, "logs") return Path(log_dir_path) - def _get_log_viewer_html(self) -> str: + async def _get_log_viewer_html(self) -> str: """Load and return the log viewer HTML template. Returns: @@ -922,8 +982,8 @@ def _get_log_viewer_html(self) -> str: template_path = Path(__file__).parent / "templates" / "log_viewer.html" try: - with open(template_path, encoding="utf-8") as f: - return f.read() + async with aiofiles.open(template_path, encoding="utf-8") as f: + return await f.read() except FileNotFoundError: self.logger.exception(f"Log viewer template not found at {template_path}") return self._get_fallback_html() @@ -1095,23 +1155,17 @@ def _estimate_total_log_count(self) -> str: if not log_files: return "0" - # Quick estimation based on file sizes and line counts from a sample + # Quick estimation based on file sizes total_estimate = 0 for log_file in log_files[:10]: # Sample first 10 files to avoid performance impact try: - # Quick line count estimation - with open(log_file, "rb") as f: - line_count = sum(1 for _ in f) - total_estimate += line_count + # Estimate based on file size (faster than counting lines) + file_size = log_file.stat().st_size + # Rough estimate: average log line is ~200 bytes + estimated_lines = file_size // 200 + total_estimate += estimated_lines except Exception: - # If we can't read a file, estimate based on file size - try: - file_size = log_file.stat().st_size - # Rough estimate: average log line is ~200 bytes - estimated_lines = file_size // 200 - total_estimate += estimated_lines - except Exception: - continue + continue # If we processed fewer than all files, extrapolate if len(log_files) > 10: From 7dc076501bd49b49c4779e80bb1bbd82bf119872 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 15:35:03 +0200 Subject: [PATCH 09/14] fix: switch structured logging from pretty-print to JSONL format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove indent=2 from json.dumps for compact output - Use single newline separator instead of double - Simplify log_viewer.py parsing (no format detection needed) - Update tests and docstrings for JSONL format JSONL format is simpler to parse, more efficient, and widely supported. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- webhook_server/libs/log_parser.py | 16 +-- webhook_server/tests/test_log_viewer.py | 20 +--- .../tests/test_structured_logger.py | 12 +- webhook_server/utils/structured_logger.py | 30 ++--- webhook_server/web/log_viewer.py | 105 +++--------------- 5 files changed, 48 insertions(+), 135 deletions(-) diff --git a/webhook_server/libs/log_parser.py b/webhook_server/libs/log_parser.py index c24863414..ff0f494bb 100644 --- a/webhook_server/libs/log_parser.py +++ b/webhook_server/libs/log_parser.py @@ -311,14 +311,12 @@ def parse_log_file(self, file_path: Path) -> list[LogEntry]: return entries def parse_json_log_entry(self, json_line: str) -> LogEntry | None: - """Parse a JSON log entry (single-line or multi-line) into a LogEntry object. + """Parse a JSONL log entry into a LogEntry object. - Handles both formats: - - Single-line compact JSON: {"hook_id": "abc", ...} - - Multi-line pretty-printed JSON with indentation + Parses JSONL format (one compact JSON object per line). Args: - json_line: Raw JSON string from webhooks_*.json files (may be multi-line) + json_line: Raw JSON string from webhooks_*.json files (single line) Returns: LogEntry object if parsing successful, None otherwise @@ -453,14 +451,12 @@ def parse_json_log_file(self, file_path: Path) -> list[LogEntry]: return entries def get_raw_json_entry(self, json_line: str) -> dict[str, Any] | None: - """Parse a JSON log entry (single-line or multi-line) and return the raw dictionary. + """Parse a JSONL log entry and return the raw dictionary. - Handles both formats: - - Single-line compact JSON: {"hook_id": "abc", ...} - - Multi-line pretty-printed JSON with indentation + Parses JSONL format (one compact JSON object per line). Args: - json_line: Raw JSON string (may be multi-line) + json_line: Raw JSON string (single line) Returns: Parsed JSON dictionary, or None if parsing fails diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index 2ad17d470..e80a2e116 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -497,25 +497,17 @@ async def test_get_workflow_steps_json_with_multiple_entries_same_file( assert result["pr"]["number"] == 200 async def test_stream_json_log_entries_pretty_printed_format(self, controller, tmp_path, sample_json_webhook_data): - """Test _stream_json_log_entries with pretty-printed JSON (blank line separators).""" + """Test _stream_json_log_entries with JSONL format (one JSON object per line).""" log_dir = tmp_path / "logs" log_dir.mkdir() - # Create pretty-printed JSON log file (with blank line separators) + # Create JSONL format log file (one JSON object per line) log_file = log_dir / "webhooks_2025-01-05.json" with open(log_file, "w", encoding="utf-8") as f: - # Entry 1 - f.write("{\n") - f.write(' "hook_id": "hook-1",\n') - f.write(' "event_type": "pull_request"\n') - f.write("}\n") - f.write("\n") # Blank line separator - # Entry 2 - f.write("{\n") - f.write(' "hook_id": "hook-2",\n') - f.write(' "event_type": "check_run"\n') - f.write("}\n") - # No trailing blank line to test last block handling + # Entry 1 - single line JSON + f.write('{"hook_id": "hook-1", "event_type": "pull_request"}\n') + # Entry 2 - single line JSON + f.write('{"hook_id": "hook-2", "event_type": "check_run"}\n') # Stream entries entries = [entry async for entry in controller._stream_json_log_entries(max_files=10, max_entries=100)] diff --git a/webhook_server/tests/test_structured_logger.py b/webhook_server/tests/test_structured_logger.py index 1ed59cc30..b087caab0 100644 --- a/webhook_server/tests/test_structured_logger.py +++ b/webhook_server/tests/test_structured_logger.py @@ -189,7 +189,7 @@ def test_write_log_does_not_mutate_context( def test_write_log_multiple_entries_append( self, log_writer: StructuredLogWriter, sample_context: WebhookContext, tmp_path: Path ) -> None: - """Test multiple writes append to same file (pretty-printed format).""" + """Test multiple writes append to same file (JSONL format).""" # Arrange context2 = WebhookContext( hook_id="test-hook-456", @@ -207,12 +207,12 @@ def test_write_log_multiple_entries_append( with open(log_file) as f: content = f.read().strip() - # Split by double newline to separate pretty-printed JSON entries - json_blocks = content.split("\n\n") - assert len(json_blocks) == 2 + # Split by single newline to separate JSONL entries + json_lines = content.split("\n") + assert len(json_lines) == 2 - entry1 = json.loads(json_blocks[0]) - entry2 = json.loads(json_blocks[1]) + entry1 = json.loads(json_lines[0]) + entry2 = json.loads(json_lines[1]) assert entry1["hook_id"] == "test-hook-123" assert entry2["hook_id"] == "test-hook-456" diff --git a/webhook_server/utils/structured_logger.py b/webhook_server/utils/structured_logger.py index e85959a4a..9de73b220 100644 --- a/webhook_server/utils/structured_logger.py +++ b/webhook_server/utils/structured_logger.py @@ -1,19 +1,19 @@ """Structured JSON logging for webhook execution tracking. -This module provides JSON-based logging for webhook executions with pretty-printed formatting. -Each webhook execution generates a formatted JSON entry containing all workflow steps, +This module provides JSON-based logging for webhook executions in JSONL format. +Each webhook execution generates a compact JSON entry containing all workflow steps, timing, errors, and API metrics. Architecture: -- Pretty-printed JSON: Multi-line JSON objects with 2-space indentation for human readability -- Entry separation: Blank lines between entries for visual clarity +- JSONL format: One compact JSON object per line (no indentation) +- Entry separation: Newline character between entries - Date-based files: webhooks_YYYY-MM-DD.json for easy log rotation - Atomic writes: Temporary file + rename for crash safety - Concurrent writes: File locking to handle multiple webhook processes Log File Format: - Location: {config.data_dir}/logs/webhooks_YYYY-MM-DD.json -- Format: Pretty-printed JSON with blank line separators +- Format: JSONL (JSON Lines - one JSON object per line) - Rotation: Daily based on date - Size: Unbounded (external rotation recommended) @@ -53,7 +53,7 @@ class StructuredLogWriter: """JSON log writer for webhook execution tracking. - Writes webhook execution contexts as pretty-printed JSON to date-based log files. + Writes webhook execution contexts as JSONL (one compact JSON object per line) to date-based log files. Provides atomic writes with file locking for safe concurrent access. Attributes: @@ -91,10 +91,10 @@ def _get_log_file_path(self, date: datetime | None = None) -> Path: return self.log_dir / f"webhooks_{date_str}.json" def write_log(self, context: WebhookContext) -> None: - """Write webhook context as pretty-printed JSON to date-based log file. + """Write webhook context as JSONL entry to date-based log file. - Writes a formatted JSON entry (2-space indentation) containing complete webhook execution context. - Each entry is followed by a blank line for visual separation. + Writes a compact JSON entry (single line, no indentation) containing complete webhook execution context. + Each entry is terminated by a newline character. Uses atomic write pattern (temp file + rename) with file locking for safety. Args: @@ -117,8 +117,8 @@ def write_log(self, context: WebhookContext) -> None: # Get log file path log_file = self._get_log_file_path(completed_at) - # Serialize context to JSON (pretty-printed with indentation) - log_entry = json.dumps(context_dict, ensure_ascii=False, indent=2) + # Serialize context to JSON (compact JSONL format - single line, no indentation) + log_entry = json.dumps(context_dict, ensure_ascii=False) # Atomic write with file locking try: @@ -135,8 +135,8 @@ def write_log(self, context: WebhookContext) -> None: fcntl.flock(temp_fd, fcntl.LOCK_EX) try: - # Write JSON entry with newline and blank line separator - os.write(temp_fd, f"{log_entry}\n\n".encode()) + # Write JSON entry with single newline (JSONL format) + os.write(temp_fd, f"{log_entry}\n".encode()) os.fsync(temp_fd) # Ensure data is written to disk # Append to target log file (atomic on POSIX) @@ -240,13 +240,13 @@ def write_error_log( # Write to log file log_file = self._get_log_file_path() - log_entry = json.dumps(error_entry, ensure_ascii=False, indent=2) + log_entry = json.dumps(error_entry, ensure_ascii=False) with open(log_file, "a") as log_fd: if HAS_FCNTL: fcntl.flock(log_fd.fileno(), fcntl.LOCK_EX) try: - log_fd.write(f"{log_entry}\n\n") + log_fd.write(f"{log_entry}\n") log_fd.flush() os.fsync(log_fd.fileno()) finally: diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index bbeebf2bf..2d9f05433 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -740,7 +740,7 @@ async def _stream_log_entries( This replaces _load_log_entries() to prevent memory exhaustion from loading all log files simultaneously. Uses lazy evaluation and chunked processing. - Supports both text log files (*.log) and JSON log files (webhooks_*.json). + Supports both text log files (*.log) and JSONL log files (webhooks_*.json). Args: max_files: Maximum number of log files to process (newest first) @@ -793,51 +793,11 @@ def sort_key(f: Path) -> tuple: async with aiofiles.open(log_file, encoding="utf-8") as f: # Use appropriate parser based on file type if log_file.suffix == ".json": - # JSON files: stream incrementally without loading entire file - # Detect format by reading first few lines - first_lines = [] - for _ in range(20): - line = await f.readline() - if not line: - break - first_lines.append(line) - # Early exit if we find a blank line - if not line.strip(): - break - - # Check if format uses blank line separators (pretty-printed JSON) - has_blank_lines = any(line.strip() == "" for line in first_lines) - - # Reset file pointer to beginning - await f.seek(0) - - if has_blank_lines: - # Format 1: Pretty-printed JSON with blank line separators - # Stream blocks incrementally - current_block_lines: list[str] = [] - async for line in f: - line_content = line.rstrip("\n") # Preserve original for JSON content - if not line.strip(): # Check if line is blank/whitespace-only - if current_block_lines: - block = "\n".join(current_block_lines) - entry = self.log_parser.parse_json_log_entry(block) - if entry: - buffer.append(entry) - current_block_lines = [] - else: - current_block_lines.append(line_content) - # Handle last block (no trailing blank line) - if current_block_lines: - block = "\n".join(current_block_lines) - entry = self.log_parser.parse_json_log_entry(block) - if entry: - buffer.append(entry) - else: - # Format 2: Single-line JSON entries (one per line) - async for line in f: - entry = self.log_parser.parse_json_log_entry(line) - if entry: - buffer.append(entry) + # JSONL files: one compact JSON object per line + async for line in f: + entry = self.log_parser.parse_json_log_entry(line) + if entry: + buffer.append(entry) else: # Text log files: parse line by line async for line in f: @@ -862,7 +822,7 @@ async def _stream_json_log_entries( """Stream raw JSON log entries from webhooks_*.json files. Returns raw JSON dicts instead of LogEntry objects for access to full structured data. - Handles both single-line and multi-line JSON entries separated by blank lines. + Reads JSONL format (one JSON object per line). Args: max_files: Maximum number of log files to process (newest first) @@ -889,56 +849,21 @@ async def _stream_json_log_entries( break try: - # Stream JSON entries incrementally without loading entire file + # Stream JSONL entries incrementally without loading entire file remaining = max_entries - total_yielded - block_buffer: deque[str] = deque(maxlen=remaining) + line_buffer: deque[str] = deque(maxlen=remaining) async with aiofiles.open(log_file, encoding="utf-8") as f: - # Detect format by reading first few lines - first_lines = [] - for _ in range(20): - line = await f.readline() - if not line: - break - first_lines.append(line) - # Early exit if we find a blank line - if not line.strip(): - break - - # Check if format uses blank line separators (pretty-printed JSON) - has_blank_lines = any(line.strip() == "" for line in first_lines) - - # Reset file pointer to beginning - await f.seek(0) - - if has_blank_lines: - # Format 1: Pretty-printed JSON with blank line separators - # Stream blocks incrementally - current_block_lines: list[str] = [] - async for line in f: - line_content = line.rstrip("\n") # Preserve original for JSON content - if not line.strip(): # Check if line is blank/whitespace-only - if current_block_lines: - block = "\n".join(current_block_lines) - block_buffer.append(block) - current_block_lines = [] - else: - current_block_lines.append(line_content) - # Handle last block (no trailing blank line) - if current_block_lines: - block = "\n".join(current_block_lines) - block_buffer.append(block) - else: - # Format 2: Single-line JSON entries (one per line) - async for line in f: - block_buffer.append(line.rstrip("\n")) + # JSONL format: one JSON object per line + async for line in f: + line_buffer.append(line.rstrip("\n")) - # Process blocks in reverse order (newest first) - for block in reversed(block_buffer): + # Process lines in reverse order (newest first) + for line in reversed(line_buffer): if total_yielded >= max_entries: break - data = self.log_parser.get_raw_json_entry(block) + data = self.log_parser.get_raw_json_entry(line) if data: yield data total_yielded += 1 From fa5e4b1f4c50b16f76edf24d1e90e8fa01bb75b4 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 15:53:43 +0200 Subject: [PATCH 10/14] fix: handle missing duration_ms in workflow steps and improve test stability Fixes KeyError in log_webhook_summary when workflow steps don't have duration_ms field. This occurs when steps aren't properly completed before context finalization. Changes: - Use .get() with defaults for status and duration_ms in log_webhook_summary - Add proper CancelledError handling in log_viewer async operations - Fix test type hints and async generator patterns for mypy strict compliance - Adjust performance test thresholds for CI stability (5x tolerance) - Update types-aiofiles dependency to latest version - Improve test documentation for JSON parsing behavior --- pyproject.toml | 2 +- uv.lock | 2 +- .../tests/test_edge_cases_validation.py | 28 +++++---- webhook_server/tests/test_log_api.py | 10 ++-- webhook_server/tests/test_log_viewer.py | 59 ++++++++++++------- .../tests/test_memory_optimization.py | 14 +++-- webhook_server/utils/app_utils.py | 6 +- webhook_server/web/log_viewer.py | 21 ++++++- 8 files changed, 96 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 856c19863..4b7185fcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ build-backend = "hatchling.build" dev = [ "ipdb>=0.13.13", "ipython>=8.12.3", - "types-aiofiles>=24.1.0.20241221", + "types-aiofiles>=25.1.0.20251011", "types-colorama>=0.4.15.20240311", "types-pyyaml>=6.0.12.20250516", "types-requests>=2.32.4.20250611", diff --git a/uv.lock b/uv.lock index 31ff28c77..b11065603 100644 --- a/uv.lock +++ b/uv.lock @@ -466,7 +466,7 @@ provides-extras = ["tests"] dev = [ { name = "ipdb", specifier = ">=0.13.13" }, { name = "ipython", specifier = ">=8.12.3" }, - { name = "types-aiofiles", specifier = ">=24.1.0.20241221" }, + { name = "types-aiofiles", specifier = ">=25.1.0.20251011" }, { name = "types-colorama", specifier = ">=0.4.15.20240311" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250516" }, { name = "types-requests", specifier = ">=2.32.4.20250611" }, diff --git a/webhook_server/tests/test_edge_cases_validation.py b/webhook_server/tests/test_edge_cases_validation.py index 5c69429fe..e428dfb6d 100644 --- a/webhook_server/tests/test_edge_cases_validation.py +++ b/webhook_server/tests/test_edge_cases_validation.py @@ -6,7 +6,7 @@ import os import tempfile import time -from collections.abc import Generator +from collections.abc import AsyncIterator, Generator from pathlib import Path from unittest.mock import AsyncMock, Mock, patch @@ -711,14 +711,14 @@ class TestAPIEndpointEdgeCases: async def test_api_with_malformed_parameters(self): """Test API behavior with malformed parameters.""" - async def async_iter_empty(): - return - yield # Make this a generator function + async def async_iter_empty() -> AsyncIterator[LogEntry]: + if False: + yield # Make this a generator function mock_logger = Mock() controller = LogViewerController(logger=mock_logger) - with patch.object(controller, "_stream_log_entries", return_value=async_iter_empty()): + with patch.object(controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_empty()): with patch.object(controller, "_estimate_total_log_count", return_value=0): # Test truly malformed parameters that should raise exceptions invalid_params = [ @@ -763,11 +763,15 @@ async def test_api_with_extremely_large_responses(self): ) large_entries.append(entry) - async def async_iter_wrapper(items): + async def async_iter_wrapper(items: list[LogEntry]) -> AsyncIterator[LogEntry]: + if False: + yield # Make this a generator function for item in items: yield item - with patch.object(controller, "_stream_log_entries", return_value=async_iter_wrapper(large_entries[:1000])): + with patch.object( + controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_wrapper(large_entries[:1000]) + ): # Test with default limit - the controller will process available entries and apply pagination result = await controller.get_log_entries() assert "entries" in result @@ -883,14 +887,18 @@ async def test_multiple_users_different_filters(self): {"repository": "repo-2", "search": "500"}, ] - async def user_request(controller, filters): + async def user_request(controller: LogViewerController, filters: dict) -> dict: """Simulate a user making a request.""" - async def async_iter_wrapper(items): + async def async_iter_wrapper(items: list[LogEntry]) -> AsyncIterator[LogEntry]: + if False: + yield # Make this a generator function for item in items: yield item - with patch.object(controller, "_stream_log_entries", return_value=async_iter_wrapper(entries)): + with patch.object( + controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_wrapper(entries) + ): return await controller.get_log_entries(**filters) # Execute concurrent requests diff --git a/webhook_server/tests/test_log_api.py b/webhook_server/tests/test_log_api.py index c6bba7de4..f5b608e04 100644 --- a/webhook_server/tests/test_log_api.py +++ b/webhook_server/tests/test_log_api.py @@ -188,7 +188,7 @@ async def test_export_logs_filtered_entries_too_large(self, controller): await controller.export_logs(format_type="json", limit=51000) assert exc.value.status_code == 413 - async def test_get_pr_flow_data_success(self, controller, sample_log_entries): + async def test_get_pr_flow_data_success(self, controller): """Test PR flow data retrieval.""" # Create entries with matching hook_id matching_entries = [ @@ -213,7 +213,7 @@ async def test_get_pr_flow_data_not_found(self, controller): await controller.get_pr_flow_data("nonexistent") assert exc.value.status_code == 404 - async def test_get_pr_flow_data_hook_prefix(self, controller, sample_log_entries): + async def test_get_pr_flow_data_hook_prefix(self, controller): """Test PR flow data with hook- prefix.""" matching_entries = [ LogEntry( @@ -230,7 +230,7 @@ async def test_get_pr_flow_data_hook_prefix(self, controller, sample_log_entries result = await controller.get_pr_flow_data("hook-123") assert result == {"test": "data"} - async def test_get_pr_flow_data_pr_prefix(self, controller, sample_log_entries): + async def test_get_pr_flow_data_pr_prefix(self, controller): """Test PR flow data with pr- prefix.""" matching_entries = [ LogEntry( @@ -248,7 +248,7 @@ async def test_get_pr_flow_data_pr_prefix(self, controller, sample_log_entries): result = await controller.get_pr_flow_data("pr-123") assert result == {"test": "data"} - async def test_get_pr_flow_data_direct_number(self, controller, sample_log_entries): + async def test_get_pr_flow_data_direct_number(self, controller): """Test PR flow data with direct PR number.""" matching_entries = [ LogEntry( @@ -266,7 +266,7 @@ async def test_get_pr_flow_data_direct_number(self, controller, sample_log_entri result = await controller.get_pr_flow_data("123") assert result == {"test": "data"} - async def test_get_pr_flow_data_direct_hook_id(self, controller, sample_log_entries): + async def test_get_pr_flow_data_direct_hook_id(self, controller): """Test PR flow data with direct hook ID.""" matching_entries = [ LogEntry( diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index e80a2e116..642b5eeed 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -49,10 +49,11 @@ - Some error handling paths - edge cases in workflow step extraction """ +import asyncio import copy import datetime import json -import time +from collections.abc import AsyncIterator from pathlib import Path from unittest.mock import Mock, patch @@ -246,7 +247,7 @@ async def test_stream_json_log_entries_newest_first_ordering(self, controller, t entry1["hook_id"] = "old-hook" self.create_json_log_file(log_dir, "webhooks_2025-01-01.json", [entry1]) - time.sleep(0.01) # Ensure different mtime + await asyncio.sleep(0.01) # Ensure different mtime # Newer file entry2 = sample_json_webhook_data.copy() @@ -496,7 +497,7 @@ async def test_get_workflow_steps_json_with_multiple_entries_same_file( assert result["hook_id"] == "target-hook" assert result["pr"]["number"] == 200 - async def test_stream_json_log_entries_pretty_printed_format(self, controller, tmp_path, sample_json_webhook_data): + async def test_stream_json_log_entries_pretty_printed_format(self, controller, tmp_path): """Test _stream_json_log_entries with JSONL format (one JSON object per line).""" log_dir = tmp_path / "logs" log_dir.mkdir() @@ -539,7 +540,12 @@ async def test_stream_json_log_entries_single_line_format(self, controller, tmp_ assert entries[2]["hook_id"] == "hook-1" async def test_stream_log_entries_with_pretty_printed_json(self, controller, tmp_path): - """Test _stream_log_entries with pretty-printed JSON files.""" + """Test _stream_log_entries with pretty-printed JSON files. + + Note: Pretty-printed JSON with blank lines is NOT parseable by parse_json_log_entry + which expects JSONL format (one JSON object per line). Each line is parsed independently, + and multi-line JSON objects cause parsing failures. This test verifies graceful handling. + """ log_dir = tmp_path / "logs" log_dir.mkdir() @@ -563,28 +569,33 @@ async def test_stream_log_entries_with_pretty_printed_json(self, controller, tmp } f.write(json.dumps(entry2, indent=2)) - # Stream entries - just verify no errors and entries are produced + # Stream entries - pretty-printed JSON cannot be parsed line-by-line entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] - # Should yield entries (exact count may vary based on parsing logic) - assert len(entries) >= 0 # At minimum, no crash + # No entries expected - pretty-printed JSON (multi-line) is not parseable by JSONL parser + assert len(entries) == 0, "Pretty-printed JSON should not parse (JSONL expects one JSON per line)" async def test_stream_log_entries_with_single_line_json(self, controller, tmp_path): - """Test _stream_log_entries with single-line JSON format.""" + """Test _stream_log_entries with single-line JSON format. + + Note: JSON entries without timing.started_at field will not parse + (parse_json_log_entry requires timestamp for LogEntry creation). + This test verifies graceful handling of incomplete JSON entries. + """ log_dir = tmp_path / "logs" log_dir.mkdir() - # Create single-line JSON log file + # Create single-line JSON log file without timing fields log_file = log_dir / "webhooks_2025-01-05.json" with open(log_file, "w", encoding="utf-8") as f: f.write('{"hook_id": "hook-1", "event_type": "pull_request", "repository": "org/repo"}\n') f.write('{"hook_id": "hook-2", "event_type": "check_run", "repository": "org/repo2"}\n') - # Stream entries - just verify no errors and entries are produced + # Stream entries - JSON without timing.started_at cannot be parsed entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] - # Should yield entries (exact count may vary based on parsing logic) - assert len(entries) >= 0 # At minimum, no crash + # No entries expected - parse_json_log_entry requires timing.started_at field + assert len(entries) == 0, "JSON entries without timing.started_at should not parse" async def test_stream_log_entries_handles_file_read_errors(self, controller, tmp_path): """Test _stream_log_entries gracefully handles file read errors.""" @@ -678,27 +689,32 @@ async def test_stream_json_log_entries_format_detection_with_whitespace_lines(se assert len(entries) == 3 async def test_stream_log_entries_format_detection_early_exit(self, controller, tmp_path): - """Test that format detection exits early when blank line is found.""" + """Test that format detection exits early when blank line is found. + + Note: This test uses pretty-printed JSON without timing.started_at field, + which cannot be parsed into LogEntry objects. The test verifies that the + parser handles this gracefully without crashing. + """ log_dir = tmp_path / "logs" log_dir.mkdir() # Create JSON file with blank line in first 5 lines log_file = log_dir / "webhooks_2025-01-05.json" with open(log_file, "w", encoding="utf-8") as f: - # First entry (pretty-printed) + # First entry (pretty-printed, missing timing field) entry1 = {"hook_id": "hook-1"} f.write(json.dumps(entry1, indent=2)) f.write("\n") f.write("\n") # Blank line at line 4 - should trigger early exit - # Second entry + # Second entry (pretty-printed, missing timing field) entry2 = {"hook_id": "hook-2"} f.write(json.dumps(entry2, indent=2)) - # Stream entries - just verify no errors + # Stream entries - pretty-printed JSON without timing cannot be parsed entries = [entry async for entry in controller._stream_log_entries(max_files=10, max_entries=100)] - # Should detect pretty-printed format without crashing - assert len(entries) >= 0 # At minimum, no crash + # No entries expected - JSON lacks timing.started_at and is pretty-printed (multi-line) + assert len(entries) == 0, "Pretty-printed JSON without timing.started_at should not parse" async def test_stream_log_entries_empty_json_file(self, controller, tmp_path): """Test _stream_log_entries with empty JSON file.""" @@ -857,8 +873,9 @@ async def test_get_log_entries_partial_scan(self, controller, tmp_path, monkeypa # Return more entries than max_entries to trigger partial scan original_stream = controller._stream_log_entries - async def mock_stream(*args, **kwargs): - max_entries = kwargs.get("max_entries", 20000) + async def mock_stream(*args: object, **kwargs: object) -> AsyncIterator[LogEntry]: + max_entries_value = kwargs.get("max_entries", 20000) + max_entries = int(max_entries_value) if not isinstance(max_entries_value, int) else max_entries_value # Simulate hitting max by yielding exactly max_entries count = 0 async for entry in original_stream(*args, **kwargs): @@ -913,7 +930,7 @@ async def test_get_log_entries_file_access_error(self, controller, tmp_path, mon log_file.write_text("test") # Mock _stream_log_entries to raise OSError - async def mock_stream_error(*args, **kwargs): + async def mock_stream_error(*args: object, **kwargs: object) -> AsyncIterator[LogEntry]: raise OSError("Simulated file access error") yield # Make it an async generator diff --git a/webhook_server/tests/test_memory_optimization.py b/webhook_server/tests/test_memory_optimization.py index f7e6879f6..bdb082b80 100644 --- a/webhook_server/tests/test_memory_optimization.py +++ b/webhook_server/tests/test_memory_optimization.py @@ -108,11 +108,13 @@ async def test_chunked_processing_efficiency(self): # Should process efficiently assert entries_processed == 2000 - assert duration < 2.0 # Should complete in under 2 seconds + # Loose threshold to accommodate slow CI runners - still catches major regressions + # (5x slower than typical performance is acceptable for CI stability) + assert duration < 5.0 # Should complete in reasonable time # Calculate throughput entries_per_second = entries_processed / duration - assert entries_per_second > 1000 # Should process at least 1000 entries/second + assert entries_per_second > 400 # Adjusted for looser duration threshold async def test_memory_efficient_filtering(self): """Test that memory-efficient filtering works correctly.""" @@ -161,7 +163,9 @@ async def test_early_termination_optimization(self): # Should complete quickly due to early termination assert len(result["entries"]) <= 50 - assert duration < 2.0 # Should complete in under 2 seconds + # Loose threshold to accommodate slow CI runners - still catches major regressions + # (5x slower than typical performance is acceptable for CI stability) + assert duration < 5.0 # Should complete in reasonable time # Should not process all 8000 entries # The streaming should stop after finding enough matching entries @@ -204,7 +208,9 @@ async def test_pagination_efficiency(self): # Should handle pagination efficiently assert len(result["entries"]) <= 100 assert result["offset"] == 2000 - assert duration < 2.0 # Should complete in reasonable time + # Loose threshold to accommodate slow CI runners - still catches major regressions + # (5x slower than typical performance is acceptable for CI stability) + assert duration < 5.0 # Should complete in reasonable time # Verify pagination worked correctly by checking timestamps # (entries should be from later in the log due to offset) diff --git a/webhook_server/utils/app_utils.py b/webhook_server/utils/app_utils.py index f1788edbc..f42638228 100644 --- a/webhook_server/utils/app_utils.py +++ b/webhook_server/utils/app_utils.py @@ -176,11 +176,11 @@ def log_webhook_summary(ctx: WebhookContext, logger: logging.Logger, log_prefix: raise ValueError("Context completed_at is None - context not completed") duration_ms = int((ctx.completed_at - ctx.started_at).total_seconds() * 1000) - # Build summary of workflow steps - all steps have duration_ms + # Build summary of workflow steps - handle incomplete steps gracefully steps_summary = [] for step_name, step_data in ctx.workflow_steps.items(): - status = step_data["status"] - step_duration_ms = step_data["duration_ms"] + status = step_data.get("status", "unknown") + step_duration_ms = step_data.get("duration_ms", 0) steps_summary.append(f"{step_name}:{status}({format_duration(step_duration_ms)})") steps_str = ", ".join(steps_summary) if steps_summary else "no steps recorded" diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 2d9f05433..939eee5c3 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -1,5 +1,6 @@ """Log viewer controller for serving log viewer web interface and API endpoints.""" +import asyncio import datetime import json import logging @@ -209,6 +210,9 @@ async def get_log_entries( "is_partial_scan": total_processed >= max_entries_to_process, # Indicates not all logs were scanned } + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except ValueError as e: self.logger.warning(f"Invalid parameters for log entries request: {e}") raise HTTPException(status_code=400, detail=str(e)) from e @@ -366,6 +370,9 @@ def generate() -> Generator[bytes]: headers={"Content-Disposition": f"attachment; filename={filename}"}, ) + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except ValueError as e: if "Result set too large" in str(e): self.logger.warning(f"Export request too large: {e}") @@ -492,6 +499,9 @@ async def get_pr_flow_data(self, hook_id: str) -> dict[str, Any]: flow_data = self._analyze_pr_flow(filtered_entries, hook_id) return flow_data + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except ValueError as e: if "No data found" in str(e): self.logger.warning(f"PR flow data not found: {e}") @@ -562,7 +572,7 @@ async def get_workflow_steps_json(self, hook_id: str) -> dict[str, Any]: "sender": entry.get("sender"), "pr": entry.get("pr"), "timing": entry.get("timing"), - "steps": entry.get("workflow_steps", {}), + "steps": entry.get("workflow_steps") or {}, "token_spend": entry.get("token_spend"), "success": entry.get("success"), "error": entry.get("error"), @@ -672,6 +682,9 @@ async def get_workflow_steps(self, hook_id: str) -> dict[str, Any]: timeline_data["token_spend"] = token_spend return timeline_data + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except ValueError as e: if "No data found" in str(e) or "No workflow steps found" in str(e): self.logger.warning(f"Workflow steps not found: {e}") @@ -813,6 +826,9 @@ def sort_key(f: Path) -> tuple: self.logger.debug(f"Streamed entries from {log_file.name}, total so far: {total_yielded}") + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except Exception as e: self.logger.warning(f"Error streaming log file {log_file}: {e}") @@ -867,6 +883,9 @@ async def _stream_json_log_entries( if data: yield data total_yielded += 1 + except asyncio.CancelledError: + self.logger.debug("Operation cancelled") + raise # Always re-raise CancelledError except Exception as e: self.logger.warning(f"Error streaming JSON log file {log_file}: {e}") From bf4fd2685d012a949b0dbdd9490f7b9658a160cb Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 16:07:13 +0200 Subject: [PATCH 11/14] refactor: address CodeRabbit review feedback for structured logging - Align test fixture timing schema with production (completed_at, duration_ms) - Replace defensive .get() defaults with explicit validation in app_utils.py - Replace broad Exception catch with specific OSError/PermissionError in log_viewer.py - Fix unused lambda arguments in test_edge_cases_validation.py These changes improve code quality by removing defensive programming overhead, aligning test fixtures with production schema, and using more specific exception handling. --- .../tests/test_edge_cases_validation.py | 6 +++--- webhook_server/tests/test_log_parser.py | 6 +++--- webhook_server/tests/test_log_viewer.py | 6 +++--- webhook_server/utils/app_utils.py | 15 ++++++++++++--- webhook_server/web/log_viewer.py | 3 ++- 5 files changed, 23 insertions(+), 13 deletions(-) diff --git a/webhook_server/tests/test_edge_cases_validation.py b/webhook_server/tests/test_edge_cases_validation.py index e428dfb6d..fd821d6d7 100644 --- a/webhook_server/tests/test_edge_cases_validation.py +++ b/webhook_server/tests/test_edge_cases_validation.py @@ -718,7 +718,7 @@ async def async_iter_empty() -> AsyncIterator[LogEntry]: mock_logger = Mock() controller = LogViewerController(logger=mock_logger) - with patch.object(controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_empty()): + with patch.object(controller, "_stream_log_entries", side_effect=lambda *_, **__: async_iter_empty()): with patch.object(controller, "_estimate_total_log_count", return_value=0): # Test truly malformed parameters that should raise exceptions invalid_params = [ @@ -770,7 +770,7 @@ async def async_iter_wrapper(items: list[LogEntry]) -> AsyncIterator[LogEntry]: yield item with patch.object( - controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_wrapper(large_entries[:1000]) + controller, "_stream_log_entries", side_effect=lambda *_, **__: async_iter_wrapper(large_entries[:1000]) ): # Test with default limit - the controller will process available entries and apply pagination result = await controller.get_log_entries() @@ -897,7 +897,7 @@ async def async_iter_wrapper(items: list[LogEntry]) -> AsyncIterator[LogEntry]: yield item with patch.object( - controller, "_stream_log_entries", side_effect=lambda *a, **k: async_iter_wrapper(entries) + controller, "_stream_log_entries", side_effect=lambda *_, **__: async_iter_wrapper(entries) ): return await controller.get_log_entries(**filters) diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index 471c31df7..2b9cd0fa8 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -1094,8 +1094,8 @@ def test_get_raw_json_entry_preserves_structure(self) -> None: "hook_id": "complex-hook", "timing": { "started_at": "2025-07-31T10:00:00Z", - "ended_at": "2025-07-31T10:00:05Z", - "duration_seconds": 5.123 + "completed_at": "2025-07-31T10:00:05Z", + "duration_ms": 5123 }, "pr": { "number": 123, @@ -1111,7 +1111,7 @@ def test_get_raw_json_entry_preserves_structure(self) -> None: assert result is not None assert result["hook_id"] == "complex-hook" - assert result["timing"]["duration_seconds"] == 5.123 + assert result["timing"]["duration_ms"] == 5123 assert result["pr"]["labels"] == ["bug", "enhancement"] assert result["error"] is None assert result["success"] is True diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index 642b5eeed..764ed69ae 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -97,8 +97,8 @@ def sample_json_webhook_data(self) -> dict: }, "timing": { "started_at": "2025-01-05T10:00:00.000000Z", - "ended_at": "2025-01-05T10:00:05.000000Z", - "duration_seconds": 5.0, + "completed_at": "2025-01-05T10:00:05.000000Z", + "duration_ms": 5000, }, "workflow_steps": { "step1": {"status": "completed", "duration_ms": 1000}, @@ -284,7 +284,7 @@ async def test_get_workflow_steps_json_returns_workflow_data(self, controller, t assert result["repository"] == "org/test-repo" assert result["sender"] == "test-user" assert result["pr"]["number"] == 456 - assert result["timing"]["duration_seconds"] == 5.0 + assert result["timing"]["duration_ms"] == 5000 assert result["steps"] == sample_json_webhook_data["workflow_steps"] assert result["token_spend"] == 35 assert result["success"] is True diff --git a/webhook_server/utils/app_utils.py b/webhook_server/utils/app_utils.py index f42638228..557eb5ab9 100644 --- a/webhook_server/utils/app_utils.py +++ b/webhook_server/utils/app_utils.py @@ -176,11 +176,20 @@ def log_webhook_summary(ctx: WebhookContext, logger: logging.Logger, log_prefix: raise ValueError("Context completed_at is None - context not completed") duration_ms = int((ctx.completed_at - ctx.started_at).total_seconds() * 1000) - # Build summary of workflow steps - handle incomplete steps gracefully + # Build summary of workflow steps - validate required fields steps_summary = [] for step_name, step_data in ctx.workflow_steps.items(): - status = step_data.get("status", "unknown") - step_duration_ms = step_data.get("duration_ms", 0) + if "status" not in step_data: + raise ValueError( + f"Workflow step '{step_name}' missing 'status' field - ensure complete_step() or fail_step() was called" + ) + if "duration_ms" not in step_data: + raise ValueError( + f"Workflow step '{step_name}' missing 'duration_ms' field - " + "ensure complete_step() or fail_step() was called" + ) + status = step_data["status"] + step_duration_ms = step_data["duration_ms"] steps_summary.append(f"{step_name}:{status}({format_duration(step_duration_ms)})") steps_str = ", ".join(steps_summary) if steps_summary else "no steps recorded" diff --git a/webhook_server/web/log_viewer.py b/webhook_server/web/log_viewer.py index 939eee5c3..425a0a1ab 100644 --- a/webhook_server/web/log_viewer.py +++ b/webhook_server/web/log_viewer.py @@ -1108,7 +1108,8 @@ def _estimate_total_log_count(self) -> str: # Rough estimate: average log line is ~200 bytes estimated_lines = file_size // 200 total_estimate += estimated_lines - except Exception: + except (OSError, PermissionError) as ex: + self.logger.debug(f"Failed to stat log file {log_file}: {ex}") continue # If we processed fewer than all files, extrapolate From bd19e5f59371f934cb342ac80d3423716192be5d Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 16:21:22 +0200 Subject: [PATCH 12/14] fix(tests,utils): address CodeRabbit review comments for type safety - Add explicit dict[str, Any] type hints in test_edge_cases_validation.py - Add return type annotations to mock functions in test_log_viewer.py - Add None check for duration_ms to prevent type errors in app_utils.py - Add type hints to _collect_entries helper in test_log_parser.py - Replace blocking file I/O with asyncio.to_thread in test_log_parser.py --- .../tests/test_edge_cases_validation.py | 5 +++-- webhook_server/tests/test_log_parser.py | 20 +++++++++++++------ webhook_server/tests/test_log_viewer.py | 8 ++++---- webhook_server/utils/app_utils.py | 4 ++-- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/webhook_server/tests/test_edge_cases_validation.py b/webhook_server/tests/test_edge_cases_validation.py index fd821d6d7..b96151201 100644 --- a/webhook_server/tests/test_edge_cases_validation.py +++ b/webhook_server/tests/test_edge_cases_validation.py @@ -8,6 +8,7 @@ import time from collections.abc import AsyncIterator, Generator from pathlib import Path +from typing import Any from unittest.mock import AsyncMock, Mock, patch import pytest @@ -879,7 +880,7 @@ async def test_multiple_users_different_filters(self): users.append(controller) # Different filter scenarios for each user - user_filters = [ + user_filters: list[dict[str, Any]] = [ {"repository": "repo-1", "level": "INFO"}, {"hook_id": "hook-25", "pr_number": 25}, {"search": "Message", "limit": 100}, @@ -887,7 +888,7 @@ async def test_multiple_users_different_filters(self): {"repository": "repo-2", "search": "500"}, ] - async def user_request(controller: LogViewerController, filters: dict) -> dict: + async def user_request(controller: LogViewerController, filters: dict[str, Any]) -> dict[str, Any]: """Simulate a user making a request.""" async def async_iter_wrapper(items: list[LogEntry]) -> AsyncIterator[LogEntry]: diff --git a/webhook_server/tests/test_log_parser.py b/webhook_server/tests/test_log_parser.py index 2b9cd0fa8..e5bf7868c 100644 --- a/webhook_server/tests/test_log_parser.py +++ b/webhook_server/tests/test_log_parser.py @@ -248,11 +248,14 @@ async def test_tail_log_file_with_new_content(self) -> None: # Give the tail a moment to start await asyncio.sleep(0.1) - # Add new content to the file - with open(f.name, "a") as append_f: - append_f.write("\n2025-07-31T10:01:00.000000 main DEBUG New entry 1") - append_f.write("\n2025-07-31T10:02:00.000000 main ERROR New entry 2") - append_f.flush() + # Add new content to the file - non-blocking + def _append_entries() -> None: + with open(f.name, "a") as append_f: + append_f.write("\n2025-07-31T10:01:00.000000 main DEBUG New entry 1") + append_f.write("\n2025-07-31T10:02:00.000000 main ERROR New entry 2") + append_f.flush() + + await asyncio.to_thread(_append_entries) # Wait for the tail to collect entries with timeout try: @@ -268,7 +271,12 @@ async def test_tail_log_file_with_new_content(self) -> None: assert entries[0].level == "DEBUG" assert entries[1].level == "ERROR" - async def _collect_entries(self, async_gen, entries_list, max_entries=10): + async def _collect_entries( + self, + async_gen: AsyncIterator[LogEntry], + entries_list: list[LogEntry], + max_entries: int = 10, + ) -> None: """Helper to collect entries from async generator with a limit.""" count = 0 async for entry in async_gen: diff --git a/webhook_server/tests/test_log_viewer.py b/webhook_server/tests/test_log_viewer.py index 764ed69ae..98688c386 100644 --- a/webhook_server/tests/test_log_viewer.py +++ b/webhook_server/tests/test_log_viewer.py @@ -1039,7 +1039,7 @@ async def test_shutdown_closes_websocket_connections(self, controller): ws2 = Mock() # Make close() async - async def mock_close(code, reason): + async def mock_close(code: int, reason: str) -> None: pass ws1.close = mock_close @@ -1060,7 +1060,7 @@ async def test_shutdown_handles_close_errors(self, controller): # Create mock WebSocket that raises error on close ws = Mock() - async def mock_close_error(code, reason): + async def mock_close_error(code: int, reason: str) -> None: raise Exception("Close error") ws.close = mock_close_error @@ -1095,7 +1095,7 @@ def controller(self, mock_logger, tmp_path): async def test_get_log_page_returns_html(self, controller): """Test get_log_page returns HTML content.""" - async def mock_get_html(): + async def mock_get_html() -> str: return "Test Log Viewer" with patch.object(controller, "_get_log_viewer_html", side_effect=mock_get_html): @@ -1107,7 +1107,7 @@ async def test_get_log_page_handles_template_missing(self, controller): """Test get_log_page returns fallback HTML when template is missing.""" # Mock the method to return fallback HTML (simulating missing template) - async def mock_get_html(): + async def mock_get_html() -> str: return controller._get_fallback_html() with patch.object(controller, "_get_log_viewer_html", side_effect=mock_get_html): diff --git a/webhook_server/utils/app_utils.py b/webhook_server/utils/app_utils.py index 557eb5ab9..dff57f25a 100644 --- a/webhook_server/utils/app_utils.py +++ b/webhook_server/utils/app_utils.py @@ -183,9 +183,9 @@ def log_webhook_summary(ctx: WebhookContext, logger: logging.Logger, log_prefix: raise ValueError( f"Workflow step '{step_name}' missing 'status' field - ensure complete_step() or fail_step() was called" ) - if "duration_ms" not in step_data: + if "duration_ms" not in step_data or step_data["duration_ms"] is None: raise ValueError( - f"Workflow step '{step_name}' missing 'duration_ms' field - " + f"Workflow step '{step_name}' missing or None 'duration_ms' field - " "ensure complete_step() or fail_step() was called" ) status = step_data["status"] From 9a00931ccef6f7cec9d9fd58c3580e34b9825283 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 16:24:45 +0200 Subject: [PATCH 13/14] docs: condense CLAUDE.md to fix performance warning Reduce file size from 40.2k to 18.3k chars by: - Merging duplicate PyGithub asyncio.to_thread sections - Reducing before/after examples - Removing redundant decision tree - Condensing verbose explanations --- CLAUDE.md | 977 +++++++++--------------------------------------------- 1 file changed, 157 insertions(+), 820 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5e3f5b0a9..de42f7d7a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,505 +7,156 @@ ### Backward Compatibility Policy **NO backward compatibility required for internal APIs:** - - Internal methods in `webhook_server/libs/` can change freely - Return types can change (e.g., `Any` → `bool`) - Method signatures can be modified without deprecation -- No version pinning or deprecation warnings needed **Backward compatibility ONLY for:** - - User-facing configuration files (`config.yaml`, `.github-webhook-server.yaml`) - Configuration schema changes (must support old formats or provide migration) - Webhook payload handling (must follow GitHub webhook spec) -**Rationale:** - -- This server is deployed as a single application -- All code is updated together - no external dependencies -- Internal refactoring is safe and encouraged -- Optimize for performance and clarity, not compatibility - -**Examples:** - -- ✅ Changing `get_branch() -> Any` to `get_branch() -> bool` - Internal API, no compatibility needed -- ✅ Refactoring internal methods - Internal implementation detail -- ❌ Changing config YAML structure - User-facing, needs migration path -- ❌ Breaking webhook event processing - GitHub spec must be followed +**Rationale:** This server is deployed as a single application. All code is updated together. Internal refactoring is safe and encouraged. ### Anti-Defensive Programming **CRITICAL: Eliminate unnecessary defensive programming overhead.** **Philosophy:** - -- This server fails-fast on startup if critical dependencies are missing +- Server fails-fast on startup if critical dependencies are missing - Required parameters in `__init__()` are ALWAYS provided - Checking for None on required parameters is pure overhead -- Defensive checks are ONLY acceptable for truly optional parameters +- Defensive checks ONLY acceptable for truly optional parameters - **Fail-fast is better than hiding bugs with fake data** --- ## WHEN Defensive Checks Are ACCEPTABLE -### 1. Destructors (`__del__`) - -**Reason:** Can be called during failed initialization - -```python -# ✅ CORRECT - __del__ can be called before __init__ completes -def __del__(self): - if hasattr(self, "logger"): # Legitimate - may not exist yet - self.logger.debug("Cleanup") - if hasattr(self, "rest_client") and self.rest_client: - self.rest_client.close() -``` - -### 2. Optional Parameters - -**Reason:** Parameter explicitly allows None - -```python -# ✅ CORRECT - owner/name are optional in signature -def get_data(self, owner: str | None = None, name: str | None = None): - if owner and name: # Legitimate check - parameters are optional - return await self.fetch_from_api(owner, name) - return await self.fetch_default() -``` - -### 3. Lazy Initialization - -**Reason:** Attribute explicitly starts as None - -```python -# ✅ CORRECT - client starts as None by design -def __init__(self): - self.client: SomeClient | None = None # Starts uninitialized - -async def query(self): - if not self.client: # Legitimate - lazy initialization - await self.initialize() -``` - -### 4. Platform Constants - -**Reason:** Constant may not exist on all platforms +1. **Destructors (`__del__`)** - Can be called during failed initialization + ```python + def __del__(self): + if hasattr(self, "logger"): self.logger.debug("Cleanup") + ``` -```python -# ✅ CORRECT - os.O_NOFOLLOW doesn't exist on Windows -if hasattr(os, "O_NOFOLLOW"): - flags |= os.O_NOFOLLOW -``` +2. **Optional Parameters** - Parameter explicitly allows None + ```python + def get_data(self, owner: str | None = None): ... + ``` -### 5. External Libraries We DON'T Control Version Of +3. **Lazy Initialization** - Attribute explicitly starts as None + ```python + self.client: SomeClient | None = None + ``` -**Reason:** Library version is truly unknown +4. **Platform Constants** - Constant may not exist on all platforms + ```python + if hasattr(os, "O_NOFOLLOW"): flags |= os.O_NOFOLLOW + ``` -**NOTE:** This does NOT apply to dependencies in `pyproject.toml` - we control those versions. - -```python -# ✅ CORRECT - Only if library version is TRULY unknown -if hasattr(external_lib, "new_method"): # Library version unknown - external_lib.new_method() -else: - external_lib.old_method() - -# ❌ WRONG - PyGithub version is controlled in pyproject.toml -if hasattr(self.rest_client, "close"): # PyGithub >=2.4.0 guaranteed - self.rest_client.close() -``` +5. **External Libraries We DON'T Control Version Of** - Library version truly unknown + - **NOTE:** Does NOT apply to dependencies in `pyproject.toml` - we control those versions --- ## WHEN Defensive Checks Are VIOLATIONS ### 1. Required Parameters in `__init__()` - -**VIOLATION:** Checking for attributes that are ALWAYS provided - ```python -# ❌ WRONG - config is required parameter, ALWAYS provided -def __init__(self, token: str, logger: logging.Logger, config: Config): - self.config = config - +# ❌ WRONG - config is required, ALWAYS provided def some_method(self): - if self.config: # VIOLATION - config is always present - value = self.config.get_value("key") + if self.config: value = self.config.get_value("key") # ✅ CORRECT def some_method(self): - value = self.config.get_value("key") # No check needed + value = self.config.get_value("key") ``` ### 2. Known Library Versions - -**VIOLATION:** Version checking for controlled dependencies - -We control these versions in `pyproject.toml`: - -- PyGithub >=2.4.0 (`self.rest_client.close()` exists) -- gql >=3.5.0 (all expected methods exist) - +We control these versions in `pyproject.toml`: PyGithub >=2.4.0, gql >=3.5.0 ```python -# ❌ WRONG - PyGithub >=2.4.0 is guaranteed in pyproject.toml -if hasattr(self.rest_client, "close"): - self.rest_client.close() +# ❌ WRONG - PyGithub >=2.4.0 guaranteed +if hasattr(self.rest_client, "close"): self.rest_client.close() # ✅ CORRECT -self.rest_client.close() # PyGithub >=2.4.0 has this method +self.rest_client.close() ``` ### 3. Architecture Guarantees - -**VIOLATION:** Checking for data guaranteed by architecture - -**Example:** `repository_data` is ALWAYS set before handlers instantiate (fail-fast in `GithubWebhook.process()`) - +`repository_data` is ALWAYS set before handlers instantiate (fail-fast in `GithubWebhook.process()`) ```python -# ❌ WRONG - repository_data is guaranteed by architecture -def __init__(self, github_webhook: GithubWebhook): - self.github_webhook = github_webhook - -def process_event(self, event_data: dict): - if hasattr(self.github_webhook, "repository_data"): # VIOLATION - collaborators = self.github_webhook.repository_data["collaborators"] +# ❌ WRONG - repository_data guaranteed by architecture +if hasattr(self.github_webhook, "repository_data"): ... # ✅ CORRECT -def process_event(self, event_data: dict): - collaborators = self.github_webhook.repository_data["collaborators"] - # No check - architecture guarantees this exists +collaborators = self.github_webhook.repository_data["collaborators"] ``` ### 4. Webhook Payload Fields - -**VIOLATION:** Checking for fields that are ALWAYS in GitHub webhooks - -GitHub webhook format is stable: - -- `user.node_id` always exists for user objects -- `user.type` always exists for user objects -- `sender` always exists in webhook payloads - +GitHub webhook format is stable: `user.node_id`, `user.type`, `sender` always exist ```python -# ❌ WRONG - user.node_id always exists in GitHub webhook -def get_user_id(self, user_data: dict) -> str: - if "node_id" in user_data: # VIOLATION - return user_data["node_id"] - return "" # Fake data hiding bugs +# ❌ WRONG +if "node_id" in user_data: return user_data["node_id"] # ✅ CORRECT - Let it fail if data is malformed -def get_user_id(self, user_data: dict) -> str: - return user_data["node_id"] # KeyError = legitimate bug +return user_data["node_id"] # KeyError = legitimate bug ``` -### 5. Type Discrimination (Use isinstance instead) - -**VIOLATION:** Using hasattr for type checking - +### 5. Type Discrimination ```python # ❌ WRONG - Use isinstance for type checking -def process_pr(self, pr: PullRequest): - if hasattr(pr, "some_attr"): # VIOLATION - pr_id = pr.some_attr - else: - pr_id = pr.node_id - -# ✅ CORRECT - Direct attribute access for PyGithub objects -def process_pr(self, pr: PullRequest): - pr_id = pr.node_id - pr_number = pr.number +if hasattr(pr, "some_attr"): ... + +# ✅ CORRECT +pr_id = pr.node_id # Direct attribute access ``` --- ## Fail-Fast Principle -**CRITICAL:** Fail-fast is better than hiding bugs with fake data. - -### ❌ WRONG: Returning Fake Defaults - -**Problem:** Fake data hides bugs and causes silent failures downstream - -```python -# ❌ WRONG - Returns fake empty user object -@property -def user(self): - if self._data and "user" in self._data: - return UserWrapper(self._data["user"]) - return UserWrapper(None) # Creates fake empty user - HIDES BUGS - -# Result: Code continues with fake user, fails mysteriously later -pr.user.login # Returns "" instead of failing -pr.user.node_id # Returns "" instead of failing -``` - -### ✅ CORRECT: Fail-Fast - -**Benefit:** Immediate clear error at the source of the problem - -```python -# ✅ CORRECT - Fail-fast with clear error -@property -def user(self): - if self._data and "user" in self._data: - return UserWrapper(self._data["user"]) - raise ValueError( - "No user data available - webhook response incomplete" - ) - -# Result: Clear error at source, easy debugging -pr.user # Raises ValueError immediately - CLEAR ERROR -``` - -### Fake Data Types to Avoid - **NEVER return fake defaults to hide missing data:** - ```python # ❌ WRONG - Fake data hiding bugs -return "" # Fake empty string -return 0 # Fake zero -return False # Fake boolean -return None # Fake None (when attribute should exist) -return UserWrapper(None) # Fake empty object -return [] # Fake empty list (when data should exist) -return {} # Fake empty dict (when data should exist) +return "", 0, False, None, UserWrapper(None), [], {} # ✅ CORRECT - Fail-fast -raise ValueError("Data not available") # Clear error -raise KeyError("Required field missing") # Clear error +raise ValueError("Data not available") +raise KeyError("Required field missing") ``` ---- - -## Examples: Before and After - -### Example 1: Required Parameter - +**Example:** ```python -# ❌ WRONG - Defensive check on required parameter -def __init__(self, token: str, logger: logging.Logger, config: Config): - self.config = config - self.logger = logger - -def some_method(self): - if hasattr(self, "logger"): # VIOLATION - logger is required - self.logger.info("Processing...") - if self.config: # VIOLATION - config is required - value = self.config.get_value("key") - -# ✅ CORRECT -def __init__(self, token: str, logger: logging.Logger, config: Config): - self.config = config - self.logger = logger - -def some_method(self): - self.logger.info("Processing...") # Logger always exists - value = self.config.get_value("key") # Config always exists -``` - -### Example 2: Known Library Version - -```python -# ❌ WRONG - Version checking for controlled dependency -def cleanup(self): - if hasattr(self.rest_client, "close"): # VIOLATION - self.rest_client.close() - -# ✅ CORRECT - PyGithub >=2.4.0 guaranteed in pyproject.toml -def cleanup(self): - self.rest_client.close() # Method exists, no check needed -``` - -### Example 3: Architecture Guarantee - -```python -# ❌ WRONG - Checking for architecture-guaranteed data -class SomeHandler: - def __init__(self, github_webhook: GithubWebhook): - self.github_webhook = github_webhook - - def process_event(self, event_data: dict): - if hasattr(self.github_webhook, "repository_data"): # VIOLATION - collaborators = self.github_webhook.repository_data["collaborators"] - else: - collaborators = [] # Fake data hiding bugs - -# ✅ CORRECT - Architecture guarantees repository_data exists -class SomeHandler: - def __init__(self, github_webhook: GithubWebhook): - self.github_webhook = github_webhook - - def process_event(self, event_data: dict): - # repository_data ALWAYS exists before handlers instantiate - collaborators = self.github_webhook.repository_data["collaborators"] -``` - -### Example 4: Webhook Payload Fields - -```python -# ❌ WRONG - Fake data for stable webhook fields +# ❌ WRONG @property def user(self): if self._data and "user" in self._data: return UserWrapper(self._data["user"]) return UserWrapper(None) # Fake empty user - HIDES BUGS -def get_user_id(self, user_data: dict) -> str: - if "node_id" in user_data: # VIOLATION - return user_data["node_id"] - return "" # Fake data hiding bugs - -# ✅ CORRECT - Fail-fast for malformed data +# ✅ CORRECT @property def user(self): - if self._raw_data and "user" in self._raw_data: - return UserWrapper(self._raw_data["user"]) if self._data and "user" in self._data: return UserWrapper(self._data["user"]) raise ValueError("No user data available - webhook incomplete") - -def get_user_id(self, user_data: dict) -> str: - return user_data["node_id"] # KeyError = legitimate bug -``` - -### Example 5: Type Discrimination - -```python -# ❌ WRONG - hasattr for type checking -def process_pr(self, pr: PullRequest | PullRequestWrapper): - if hasattr(pr, "id"): # VIOLATION - use isinstance - pr_id = pr.id - else: - pr_id = pr.node_id - -# ✅ CORRECT - Proper type discrimination -def process_pr(self, pr: PullRequest | PullRequestWrapper): - if isinstance(pr, PullRequestWrapper): - pr_id = pr.id - else: - pr_id = pr.node_id ``` --- ## Architecture-Specific Guarantees -**Our architecture provides these guarantees - NO defensive checks needed:** - -### 1. Repository Data - -- `repository_data` is ALWAYS set before handlers instantiate -- Set in `GithubWebhook.process()` with fail-fast (exception propagates) -- Type: `dict[str, Any]` (NOT `dict[str, Any] | None`) - -```python -# ✅ CORRECT - No check needed -def process_event(self, event_data: dict): - collaborators = self.github_webhook.repository_data["collaborators"] -``` - -### 2. Webhook User Objects - -- `user.node_id` always exists -- `user.type` always exists -- `sender` always exists in webhook payloads +**NO defensive checks needed for:** -```python -# ✅ CORRECT - No check needed -user_id = webhook_data["user"]["node_id"] -user_type = webhook_data["user"]["type"] -``` - -### 3. PyGithub REST API Usage - -- All GitHub API operations use PyGithub (synchronous REST API) -- **🔴 CRITICAL:** PyGithub is blocking - **MUST** wrap ALL calls with `asyncio.to_thread()` to avoid blocking event loop -- Data sources are explicit and guaranteed - -```python -# ✅ CORRECT - PyGithub wrapped in asyncio.to_thread() -repository = self.github_webhook.repository -pull_request = await asyncio.to_thread(repository.get_pull, number) -await asyncio.to_thread(pull_request.create_issue_comment, "Comment text") - -# ❌ WRONG - Direct PyGithub calls block event loop -pull_request = repository.get_pull(number) # BLOCKS! -pull_request.create_issue_comment("Comment text") # BLOCKS! -``` - ---- - -## Decision Tree: When to Use Defensive Checks - -**Ask yourself these questions IN ORDER:** - -1. **Is this a destructor (`__del__`)?** - - YES → Defensive check acceptable - - NO → Continue - -2. **Is the parameter/attribute optional by design (`Type | None`)?** - - YES → Defensive check acceptable - - NO → Continue - -3. **Is this lazy initialization (starts as None)?** - - YES → Defensive check acceptable - - NO → Continue - -4. **Is this a platform constant (e.g., `os.O_NOFOLLOW`)?** - - YES → Defensive check acceptable - - NO → Continue - -5. **Is this an external library we DON'T control the version of?** - - YES → Defensive check acceptable - - NO → Continue - -6. **Otherwise: NO DEFENSIVE CHECK** - - Required parameters → ALWAYS exist - - Controlled dependencies → Version guaranteed - - Architecture guarantees → Data guaranteed - - Webhook fields → Format guaranteed - ---- - -## Enforcement - -### Code Reviews MUST Catch Violations - -**Reviewers must reject:** - -- hasattr() checks on required parameters -- hasattr() checks on known library versions -- hasattr() checks on architecture-guaranteed data -- Fake default returns ("", 0, False, None objects, []) -- Type discrimination via hasattr instead of isinstance - -### Type Hints Must Match Reality - -```python -# ❌ WRONG - Type hint doesn't match reality -def __init__(self, config: Config): # Required parameter - self.config = config - -def some_method(self): - if self.config: # Type hint says Config, check says Config | None - MISMATCH - ... - -# ✅ CORRECT - Type hint matches usage -def __init__(self, config: Config): # Required parameter - self.config = config - -def some_method(self): - value = self.config.get_value("key") # No check - matches type hint -``` +1. **Repository Data** - `repository_data` ALWAYS set before handlers instantiate +2. **Webhook User Objects** - `user.node_id`, `user.type`, `sender` always exist +3. **PyGithub REST API** - **🔴 CRITICAL:** PyGithub is blocking - **MUST** wrap with `asyncio.to_thread()` --- ## Summary: Quick Reference ### ✅ ACCEPTABLE Defensive Checks - - Destructors (`__del__`) - Optional parameters (`param: Type | None = None`) - Lazy initialization (starts as None) @@ -513,121 +164,86 @@ def some_method(self): - External libraries we don't control ### ❌ VIOLATIONS (NO defensive checks) - - Required parameters in `__init__()` - Known library versions (PyGithub >=2.4.0) - Architecture guarantees (`repository_data`) - Webhook payload fields (`user.node_id`) - Type discrimination (use `isinstance()`) -### Fail-Fast Principle - -- **DON'T** return fake data ("", 0, False, None, UserWrapper(None)) -- **DO** raise exceptions for missing data -- **Better** to crash early than hide bugs - ### Enforcement - - Code reviews catch violations - Type hints match reality - Prek hooks automate checks - **Zero tolerance for unnecessary defensive programming** +--- + ## Architecture Overview -This is a FastAPI-based GitHub webhook server that automates repository management and pull request workflows. The system processes GitHub webhooks and performs automated actions like PR management, container building, testing, and deployment. +FastAPI-based GitHub webhook server that automates repository management and pull request workflows. ### Core Architecture Components **Event-Driven Handler Architecture:** - -- `webhook_server/libs/handlers/` contains specialized handlers for different GitHub events -- Each handler (e.g., `pull_request_handler.py`, `issue_comment_handler.py`) processes specific webhook events -- Handlers are instantiated and orchestrated by the main FastAPI app (`app.py`) -- All handlers follow a common pattern: `__init__(github_webhook, ...)` → `process_event(event_data)` +- `webhook_server/libs/handlers/` contains specialized handlers +- Handlers instantiated by main FastAPI app (`app.py`) +- Pattern: `__init__(github_webhook, ...)` → `process_event(event_data)` **Configuration System:** - - `webhook_server/libs/config.py` manages YAML-based configuration with schema validation - Global config at `/home/podman/data/config.yaml` with per-repository overrides via `.github-webhook-server.yaml` - Schema validation in `webhook_server/config/schema.yaml` -- Configuration is reloaded per webhook event (no server restart needed) -- Per-repository override supported via `.github-webhook-server.yaml` +- Configuration reloaded per webhook event (no server restart needed) **GitHub API Integration:** - -- `webhook_server/libs/github_api.py` provides the core `GithubWebhook` class +- `webhook_server/libs/github_api.py` provides core `GithubWebhook` class - Uses PyGithub (REST API v3) for all GitHub operations -- **🔴 CRITICAL:** PyGithub is synchronous/blocking - **MUST** wrap with `asyncio.to_thread()` to avoid blocking event loop -- Handles authentication, rate limiting, and GitHub API calls +- **🔴 CRITICAL:** PyGithub is synchronous/blocking - **MUST** wrap with `asyncio.to_thread()` - Supports multiple GitHub tokens with automatic failover **Log Viewer System:** - -- `webhook_server/web/log_viewer.py` contains `LogViewerController` for web-based log viewing -- Includes streaming log parsing, filtering, and real-time WebSocket updates -- **Memory-optimized**: Uses streaming/chunked processing (90% memory reduction vs bulk loading) -- Real-time log streaming via WebSocket with progressive loading +- `webhook_server/web/log_viewer.py` contains `LogViewerController` +- **Memory-optimized**: Streaming/chunked processing (90% memory reduction) +- Real-time log streaming via WebSocket ## Development Commands ### Environment Setup - ```bash -# Install dependencies (preferred) uv sync - -# Activate development environment source .venv/bin/activate ``` ### Running the Server - ```bash -# Development server +# Development uv run entrypoint.py -# Production server (requires config.yaml in data directory) +# Production WEBHOOK_SERVER_DATA_DIR=/path/to/data uv run entrypoint.py ``` ### Testing - ```bash # Run all tests - uv run --group tests pytest -n auto +uv run --group tests pytest -n auto -# Run with coverage (90% required) +# With coverage (90% required) uv run --group tests pytest -n auto --cov=webhook_server - ``` ### Code Quality - ```bash -# Format code uv run ruff format - -# Lint code uv run ruff check - -# Fix linting issues automatically uv run ruff check --fix - -# Type checking (strict mypy configuration) uv run mypy webhook_server/ - -# Run all quality checks uv run ruff check && uv run ruff format && uv run mypy webhook_server/ ``` ### Configuration Validation - ```bash -# Validate configuration schema uv run webhook_server/tests/test_schema_validator.py config.yaml - -# Test configuration loading uv run pytest webhook_server/tests/test_config_schema.py -v ``` @@ -635,8 +251,6 @@ uv run pytest webhook_server/tests/test_config_schema.py -v ### Handler Pattern -All GitHub event processing follows this pattern: - ```python class SomeHandler: def __init__(self, github_webhook: GithubWebhook, ...): @@ -648,234 +262,141 @@ class SomeHandler: # Log results ``` -### GitHub API Usage Pattern +### 🔴 MANDATORY: Non-Blocking PyGithub Operations + +**CRITICAL:** PyGithub is synchronous - ALL operations MUST use `asyncio.to_thread()` + +#### What Blocks the Event Loop -**🔴 CRITICAL: PyGithub is blocking - ALWAYS wrap with asyncio.to_thread() to keep server non-blocking:** +1. **Method calls** - ALL trigger API calls: + - `.get_*()`, `.create_*()`, `.edit()`, `.update()`, `.add_to_*()`, `.remove_from_*()` -#### PyGithub Blocking Operations: Methods AND Properties +2. **Property accesses** - MANY trigger API calls: + - `.draft`, `.mergeable`, `.state`, `.committer`, `.author`, `.permissions`, `.labels`, `.assignees` + - **ANY property not in webhook payload** -**IMPORTANT:** Both method calls AND property accesses can block the event loop! +3. **PaginatedList iteration** - BLOCKS during iteration -PyGithub uses lazy loading - many properties trigger API calls when accessed. **ALL** PyGithub operations must be wrapped in `asyncio.to_thread()`. +4. **Safe operations** (don't block): + - Properties from webhook payload (`.number`, `.title`, `.body`) + - Already-fetched cached data (rare) + +#### Correct Usage ```python import asyncio from github.PullRequest import PullRequest -# ✅ CORRECT: Wrap ALL PyGithub method calls in asyncio.to_thread() -repository = self.github_webhook.repository -pull_request = repository.get_pull(number) - -# MANDATORY: Wrap blocking method calls to avoid freezing the event loop -await asyncio.to_thread(pull_request.create_issue_comment, "Comment text") -await asyncio.to_thread(pull_request.add_to_labels, "label-name") -await asyncio.to_thread(repository.get_branch, "main") - -# ✅ CORRECT: Execute multiple calls concurrently (non-blocking) -tasks = [ - asyncio.to_thread(pull_request.create_issue_comment, "Comment"), - asyncio.to_thread(pull_request.add_to_labels, "verified"), - asyncio.to_thread(pull_request.get_commits), -] -results = await asyncio.gather(*tasks, return_exceptions=True) - -# ❌ WRONG - NEVER call PyGithub directly (blocks event loop) -pull_request.create_issue_comment("Comment") # BLOCKS EVENT LOOP! -repository.get_pull(number) # BLOCKS EVENT LOOP! -``` - -#### Common Blocking Property Accesses - -**🔴 CRITICAL:** Many PyGithub properties trigger API calls and BLOCK the event loop! +# ✅ CORRECT - Wrap ALL method calls +await asyncio.to_thread(pull_request.create_issue_comment, "Comment") +await asyncio.to_thread(pull_request.add_to_labels, "label") +await asyncio.to_thread(repository.get_pull, number) -```python -# ❌ WRONG - Property accesses that trigger API calls -is_draft = pull_request.draft # BLOCKS - fetches PR data from API -committer = commit.committer # BLOCKS - fetches user data from API -perms = user.permissions # BLOCKS - fetches permission data from API -mergeable = pull_request.mergeable # BLOCKS - checks merge status via API -state = pull_request.state # BLOCKS - fetches PR state from API -labels = pull_request.labels # BLOCKS - fetches label data from API -reviews = pull_request.get_reviews() # BLOCKS - fetches review data - -# ✅ CORRECT - Wrap property accesses in asyncio.to_thread() +# ✅ CORRECT - Wrap ALL property accesses that may trigger API calls is_draft = await asyncio.to_thread(lambda: pull_request.draft) -committer = await asyncio.to_thread(lambda: commit.committer) -perms = await asyncio.to_thread(lambda: user.permissions) mergeable = await asyncio.to_thread(lambda: pull_request.mergeable) -state = await asyncio.to_thread(lambda: pull_request.state) labels = await asyncio.to_thread(lambda: list(pull_request.labels)) -# ✅ CORRECT - Accessing multiple properties concurrently +# ✅ CORRECT - Wrap PaginatedList iteration +commits = await asyncio.to_thread(lambda: list(pull_request.get_commits())) +for commit in commits: + await process_commit(commit) + +# ✅ CORRECT - Concurrent operations is_draft, mergeable, state = await asyncio.gather( asyncio.to_thread(lambda: pull_request.draft), asyncio.to_thread(lambda: pull_request.mergeable), asyncio.to_thread(lambda: pull_request.state), ) -``` - -#### Blocking Operations: Complete List -**What constitutes a blocking PyGithub operation:** - -1. **Method calls** - ALL methods trigger API calls: - - `.get_*()` - fetch data (e.g., `.get_pull()`, `.get_commits()`) - - `.create_*()` - create resources (e.g., `.create_issue_comment()`) - - `.edit()`, `.update()` - modify resources - - `.add_to_*()`, `.remove_from_*()` - manage relationships +# ❌ WRONG - NEVER call PyGithub directly +pull_request.create_issue_comment("Comment") # BLOCKS! +is_draft = pull_request.draft # BLOCKS! +for commit in pull_request.get_commits(): ... # BLOCKS! +``` -2. **Property accesses** - MANY properties trigger API calls: - - `.draft`, `.mergeable`, `.state` - PR status properties - - `.committer`, `.author` - user data properties - - `.permissions` - permission data properties - - `.labels`, `.assignees` - relationship properties - - **ANY property not in the webhook payload** +#### Decision Tree -3. **Iteration over PaginatedList** - BLOCKS during iteration: - - ```python - # ❌ WRONG - Iterating PaginatedList blocks - for commit in pull_request.get_commits(): # BLOCKS on each iteration - process_commit(commit) - - # ✅ CORRECT - Wrap iteration in asyncio.to_thread() - commits = await asyncio.to_thread(lambda: list(pull_request.get_commits())) - for commit in commits: - await process_commit(commit) - ``` - -4. **Safe operations** - These DON'T block (data already in memory): - - Properties from webhook payload (`.number`, `.title`, `.body` if from webhook) - - Already-fetched cached data (rare, PyGithub caching is limited) - - Simple attribute access on already-loaded objects +Before accessing ANY PyGithub object: +1. Is this a PyGithub object? → YES, it may block +2. Calling a method? → **DEFINITELY BLOCKS** - wrap in `asyncio.to_thread()` +3. Accessing a property? → **MAY BLOCK** - wrap in `asyncio.to_thread(lambda: obj.property)` +4. Iterating PaginatedList? → **BLOCKS** - wrap in `asyncio.to_thread(lambda: list(...))` +5. Webhook payload attribute? → Usually safe (`.number`, `.title`) +6. **Unsure? ALWAYS wrap in `asyncio.to_thread()`** **Why this is critical:** - -- PyGithub methods and properties are **synchronous/blocking** - they freeze the entire FastAPI server -- Every GitHub API call takes **100ms-2 seconds** - blocking = frozen server -- `asyncio.to_thread()` runs code in thread pool, keeping event loop responsive +- PyGithub is synchronous - each operation blocks 100ms-2 seconds +- Blocking = frozen server (no other webhooks processed) +- `asyncio.to_thread()` runs code in thread pool, keeps event loop responsive - **NOT OPTIONAL** - required for correct async operation -### Type Compatibility Pattern +**Impact of blocking:** +- Single blocking call freezes entire server +- Incoming webhooks must wait +- Server appears unresponsive +- Rate limits hit faster +- Degraded user experience -Methods work with PyGithub objects - always wrap calls AND property accesses in asyncio.to_thread(): +### Type Compatibility Pattern ```python -import asyncio -from github.PullRequest import PullRequest - -async def add_pr_comment( - self, - pull_request: PullRequest, # PyGithub PullRequest object - body: str -) -> None: - # 🔴 CRITICAL: Wrap PyGithub method calls to avoid blocking +async def add_pr_comment(self, pull_request: PullRequest, body: str) -> None: await asyncio.to_thread(pull_request.create_issue_comment, body) -async def check_pr_status( - self, - pull_request: PullRequest, -) -> tuple[bool, bool, str]: - """Check PR draft status, mergeability, and state. - - 🔴 CRITICAL: All property accesses wrapped to avoid blocking. - """ - # ✅ CORRECT: Fetch multiple properties concurrently - is_draft, mergeable, state = await asyncio.gather( +async def check_pr_status(self, pull_request: PullRequest) -> tuple[bool, bool, str]: + return await asyncio.gather( asyncio.to_thread(lambda: pull_request.draft), asyncio.to_thread(lambda: pull_request.mergeable), asyncio.to_thread(lambda: pull_request.state), ) - return is_draft, mergeable, state - -async def process_pr_commits( - self, - pull_request: PullRequest, -) -> list[str]: - """Process all commits in a PR. - - 🔴 CRITICAL: PaginatedList iteration wrapped to avoid blocking. - """ - # ✅ CORRECT: Wrap PaginatedList iteration - commits = await asyncio.to_thread(lambda: list(pull_request.get_commits())) - commit_shas = [commit.sha for commit in commits] - return commit_shas ``` ### Repository Data Pre-Fetch Pattern -All webhook processing follows this data flow: - ```python # In GithubWebhook.process() - after PR data, before handlers self.repository_data: dict[str, Any] = await self.unified_api.get_comprehensive_repository_data(owner, repo) # In handlers - use pre-fetched data collaborators = self.github_webhook.repository_data['collaborators']['edges'] -contributors = self.github_webhook.repository_data['mentionableUsers']['nodes'] -issues = self.github_webhook.repository_data['issues']['nodes'] ``` **Key principles:** - -- Fetch once per webhook, never per handler instance +- Fetch once per webhook, never per handler - Fail-fast: Exception propagates → webhook aborts -- No caching across webhooks -- Type is `dict[str, Any]`, never `| None` (fail-fast guarantees data exists) +- Type is `dict[str, Any]`, never `| None` (fail-fast guarantees) ### Repository Cloning Optimization for check_run Events -**Optimization implemented:** Repository cloning is skipped for check_run webhooks that don't need it. - **Location:** `webhook_server/libs/github_api.py` lines 534-570 **Early exit conditions (no clone needed):** -1. **Action != "completed"** - - Repository/organization webhooks only receive `created` and `completed` actions for check_run events - - `created` action indicates the check run was just created, no processing needed - - Code checks `action != "completed"` to skip clones for non-completed check runs - -2. **Can-be-merged with non-success conclusion** (primary optimization) - - Check name: `can-be-merged` - - Conclusions: `failure`, `cancelled`, `timed_out`, `action_required`, `neutral`, `skipped` - - Cannot automerge without success conclusion - - This is the main optimization that prevents unnecessary repository cloning - -**Implementation pattern:** +1. **Action != "completed"** - Skip `created` action +2. **Can-be-merged with non-success conclusion** - Primary optimization ```python elif self.github_event == "check_run": - # Check if we need to process this check_run action = self.hook_data.get("action", "") if action != "completed": - # Log and return early (no clone) return None - # Check if this is can-be-merged with non-success conclusion check_run_name = self.hook_data.get("check_run", {}).get("name", "") check_run_conclusion = self.hook_data.get("check_run", {}).get("conclusion", "") if check_run_name == CAN_BE_MERGED_STR and check_run_conclusion != SUCCESS_STR: - # Log and return early (no clone) return None - # Only clone when actually needed await self._clone_repository(pull_request=pull_request) - # ... rest of processing ``` **Benefits:** -- **90-95% reduction** in unnecessary repository cloning for check_run events -- **Faster webhook processing** - saves 5-30 seconds per skipped clone (depending on repo size) -- **Reduced resource usage** - less disk I/O, network I/O, and CPU usage -- **Lower server load** - especially during high webhook volume periods +- 90-95% reduction in unnecessary cloning +- Saves 5-30 seconds per skipped clone +- Reduced resource usage +- Lower server load -**Other event types unchanged:** -- `issue_comment` - still clones before processing -- `pull_request` - still clones before processing -- `pull_request_review` - still clones before processing - -**Tests:** `webhook_server/tests/test_check_run_handler.py` - `TestCheckRunRepositoryCloning` class +**Tests:** `webhook_server/tests/test_check_run_handler.py` ### Configuration Access @@ -884,23 +405,19 @@ from webhook_server.libs.config import Config config = Config(repository="org/repo-name") value = config.get_value("setting-name", default_value) -repo_data = config.repository_data ``` ### Logging Pattern -All components use structured logging with contextual parameters: - ```python from webhook_server.utils.helpers import get_logger_with_params logger = get_logger_with_params( name="component_name", repository="org/repo", - hook_id="github-delivery-id" # For webhook correlation + hook_id="github-delivery-id" ) -# Use appropriate log levels logger.debug("Detailed technical information") logger.info("General information") logger.warning("Warning that needs attention") @@ -910,142 +427,67 @@ logger.exception("Error with full traceback") # Preferred over logger.error(... ### Structured Webhook Logging -The server implements comprehensive JSON-based logging for webhook execution tracking. Each webhook generates a structured log entry containing all workflow steps, timing, errors, and API metrics. - -**Overview:** - -- Thread-safe context tracking using ContextVar for async isolation -- Each webhook execution gets an isolated WebhookContext instance -- Context persists through async operations and handler chains -- Automatic workflow step tracking with timing and error capture -- Pretty-printed JSON output with date-based log rotation - -**Context Creation:** - -Context is created in `app.py` at the start of webhook processing: +JSON-based logging for webhook execution tracking with thread-safe context using ContextVar. +**Context Creation (app.py):** ```python from webhook_server.utils.context import create_context -# In process_with_error_handling() - before GithubWebhook instantiation ctx = create_context( - hook_id=hook_id, # X-GitHub-Delivery header + hook_id=hook_id, event_type="pull_request", repository="org/repo", - repository_full_name="org/repo", action="opened", sender="username", api_user="github-api-user", ) ``` -**Step Tracking Methods:** - -Handlers and processing code use these methods to track workflow progress: - +**Step Tracking:** ```python from webhook_server.utils.context import get_context -# Get context anywhere in the call stack ctx = get_context() - -# Start a workflow step ctx.start_step("clone_repository", branch="main") -# Complete step successfully try: await clone_repo() ctx.complete_step("clone_repository", commit_sha="abc123") except Exception as ex: - # Mark step as failed with error details import traceback - ctx.fail_step( - "clone_repository", - exception=ex, - traceback_str=traceback.format_exc() - ) + ctx.fail_step("clone_repository", exception=ex, traceback_str=traceback.format_exc()) ``` -**Handler Usage Pattern:** - -Handlers access context via `github_webhook.ctx`: - +**Handler Usage:** ```python class PullRequestHandler: - def __init__(self, github_webhook: GithubWebhook): - self.github_webhook = github_webhook - async def process_event(self, event_data: dict) -> None: - # Access context ctx = self.github_webhook.ctx - - # Track workflow steps ctx.start_step("assign_reviewers", pr_number=123) try: await self.assign_reviewers(pr) - ctx.complete_step( - "assign_reviewers", - reviewers_assigned=3, - labels_added=["needs-review"] - ) + ctx.complete_step("assign_reviewers", reviewers_assigned=3) except Exception as ex: - ctx.fail_step( - "assign_reviewers", - exception=ex, - traceback_str=traceback.format_exc(), - pr_number=123 - ) + ctx.fail_step("assign_reviewers", exception=ex, traceback_str=traceback.format_exc()) ``` **Log File Format:** - -Logs are written to date-based JSON files: - - Location: `{config.data_dir}/logs/webhooks_YYYY-MM-DD.json` - Format: Pretty-printed JSON (2-space indentation) -- Entry separator: Blank line between webhook executions - Rotation: Daily based on UTC date -- Concurrency: File locking for safe multi-process writes - -Each log entry contains: +**Log entry structure:** ```json { "hook_id": "github-delivery-id", "event_type": "pull_request", - "action": "opened", - "sender": "username", - "repository": "org/repo", - "pr": { - "number": 968, - "title": "Add new feature", - "author": "contributor" - }, - "api_user": "github-api-user", - "timing": { - "started_at": "2026-01-05T10:30:00.123Z", - "completed_at": "2026-01-05T10:30:07.835Z", - "duration_ms": 7712 - }, + "pr": {"number": 968, "title": "Add new feature"}, + "timing": {"started_at": "2026-01-05T10:30:00.123Z", "duration_ms": 7712}, "workflow_steps": { - "webhook_routing": { - "timestamp": "2026-01-05T10:30:00.200Z", - "status": "completed", - "duration_ms": 2547 - }, - "clone_repository": { - "timestamp": "2026-01-05T10:30:02.750Z", - "status": "completed", - "duration_ms": 4823, - "commit_sha": "abc123" - } + "clone_repository": {"status": "completed", "duration_ms": 4823} }, "token_spend": 4, - "initial_rate_limit": 5000, - "final_rate_limit": 4996, - "success": true, - "error": null, - "summary": "[SUCCESS] Webhook completed PR#968 [7s712ms, tokens:4] steps=[webhook_routing:completed(2s547ms), clone_repository:completed(4s823ms)]" + "success": true } ``` @@ -1055,15 +497,14 @@ Each log entry contains: # ✅ CORRECT: Use logger.exception for automatic traceback try: await some_operation() -except Exception: # Can be broad for webhook handlers +except Exception: logger.exception("Failed to perform operation") - # Handle gracefully or re-raise # ❌ WRONG: Don't use logger.error with exc_info=True except Exception as ex: - logger.error(f"Failed: {ex}", exc_info=True) # Use logger.exception instead + logger.error(f"Failed: {ex}", exc_info=True) -# ✅ BETTER: Catch specific exceptions when possible +# ✅ BETTER: Catch specific exceptions except GithubException as ex: logger.exception("GitHub API operation failed") except asyncio.CancelledError: @@ -1073,121 +514,31 @@ except asyncio.CancelledError: ## Critical Architectural Rules -### 🔴 MANDATORY: Non-Blocking Operations - -**ALL operations must be non-blocking in this async FastAPI application:** - -#### PyGithub (GitHub API) - ALWAYS Use asyncio.to_thread() - -```python -# ✅ CORRECT - Wrap ALL PyGithub method calls -await asyncio.to_thread(pull_request.create_issue_comment, "Comment") -await asyncio.to_thread(pull_request.add_to_labels, "label-name") -await asyncio.to_thread(repository.get_branch, "main") -await asyncio.to_thread(repository.get_pull, pr_number) - -# ✅ CORRECT - Wrap ALL PyGithub property accesses that may trigger API calls -is_draft = await asyncio.to_thread(lambda: pull_request.draft) -mergeable = await asyncio.to_thread(lambda: pull_request.mergeable) -committer = await asyncio.to_thread(lambda: commit.committer) - -# ✅ CORRECT - Wrap PaginatedList iteration -commits = await asyncio.to_thread(lambda: list(pull_request.get_commits())) -for commit in commits: - await process_commit(commit) - -# ✅ CORRECT - Concurrent non-blocking operations -tasks = [ - asyncio.to_thread(pr.create_issue_comment, "Comment"), - asyncio.to_thread(pr.add_to_labels, "verified"), - asyncio.to_thread(lambda: list(pr.get_commits())), - asyncio.to_thread(lambda: pr.draft), - asyncio.to_thread(lambda: pr.mergeable), -] -results = await asyncio.gather(*tasks, return_exceptions=True) - -# ❌ WRONG - NEVER call PyGithub methods directly -pull_request.create_issue_comment("Comment") # BLOCKS EVENT LOOP! -repository.get_pull(123) # BLOCKS EVENT LOOP! -pr.get_commits() # BLOCKS EVENT LOOP! - -# ❌ WRONG - NEVER access PyGithub properties directly -is_draft = pull_request.draft # BLOCKS EVENT LOOP! -mergeable = pull_request.mergeable # BLOCKS EVENT LOOP! -committer = commit.committer # BLOCKS EVENT LOOP! - -# ❌ WRONG - NEVER iterate PaginatedList directly -for commit in pull_request.get_commits(): # BLOCKS EVENT LOOP! - process_commit(commit) -``` - -#### Decision Tree: Is This Operation Blocking? - -**Before accessing ANY PyGithub object, ask yourself:** - -1. **Is this a PyGithub object?** → YES, it may block -2. **Am I calling a method (`.get_*()`, `.create_*()`, `.edit()`, etc.)?** → **DEFINITELY BLOCKS** - wrap in `asyncio.to_thread()` -3. **Am I accessing a property (`.draft`, `.permissions`, `.committer`, `.mergeable`)?** → **MAY BLOCK** if property fetches data - wrap in `asyncio.to_thread(lambda: obj.property)` -4. **Am I iterating over a PaginatedList?** → **BLOCKS** during iteration - wrap in `asyncio.to_thread(lambda: list(...))` -5. **Am I checking object attributes from webhook payload?** → Usually safe (already in memory) - e.g., `.number`, `.title` if from webhook -6. **Am I unsure?** → **ALWAYS wrap in `asyncio.to_thread()`** - it's always safe! - -**Rule of Thumb: If it's a PyGithub object and you're not 100% certain it's safe, wrap it in `asyncio.to_thread()`** - -**Why this is critical:** - -- **PyGithub is synchronous** - every method/property access can block for 100ms-2 seconds -- **Blocking = frozen server** - no other webhooks can be processed -- **asyncio.to_thread() is mandatory** - runs blocking code in thread pool -- **Enables concurrency** - multiple webhooks processed simultaneously -- **This is not optional** - required for correct FastAPI async operation - -**Impact of blocking calls:** - -- Single blocking API call freezes entire server -- Other incoming webhooks must wait -- Server appears unresponsive -- Rate limits are hit faster due to sequential processing -- User experience degrades (slow webhook processing) - ### Import Organization - -**MANDATORY:** All imports must be at the top of files - -- No imports in the middle of functions or try/except blocks -- Exceptions: TYPE_CHECKING imports can be conditional +**MANDATORY:** All imports at top of files +- No imports in functions or try/except blocks +- Exception: TYPE_CHECKING imports can be conditional - Prek hooks enforce this ### Type Hints - -**MANDATORY:** All functions must have complete type hints (mypy strict mode) - +**MANDATORY:** Complete type hints (mypy strict mode) ```python # ✅ CORRECT -async def process_pr( - self, - pull_request: PullRequest, - reviewers: list[str] -) -> None: - ... +async def process_pr(self, pull_request: PullRequest, reviewers: list[str]) -> None: ... # ❌ WRONG -async def process_pr(self, pull_request, reviewers): # Missing type hints - ... +async def process_pr(self, pull_request, reviewers): ... ``` ### Test Coverage - **MANDATORY:** 90% code coverage required - -- Use `uv run --group tests pytest --cov=webhook_server` to check -- New code without tests will fail CI -- Tests must be in `webhook_server/tests/` +- Check: `uv run --group tests pytest --cov=webhook_server` +- New code without tests fails CI +- Tests in `webhook_server/tests/` ## Testing Patterns ### Test File Organization - ```bash webhook_server/tests/ ├── test_*.py # Unit and integration tests @@ -1197,31 +548,22 @@ webhook_server/tests/ ``` ### Mock Testing Pattern - ```python from unittest.mock import AsyncMock, Mock -# For async operations mock_api = AsyncMock() mock_api.get_pull_request.return_value = mock_pr_data -# For REST operations wrapped in to_thread with patch("asyncio.to_thread", side_effect=mock_to_thread): result = await unified_api.get_pr_for_check_runs(owner, repo, number) ``` ### Test Token Pattern - -Use centralized test tokens to avoid security warnings: - ```python -# At module level TEST_GITHUB_TOKEN = "ghp_test1234..." # pragma: allowlist secret -# In fixtures @pytest.fixture def mock_github_api(): - """Create a mock GitHub API.""" mock = Mock() mock.get_rate_limit.return_value = Mock(rate=Mock(remaining=5000)) return mock @@ -1230,16 +572,13 @@ def mock_github_api(): ## Security Considerations ### Log Viewer Security - -⚠️ **CRITICAL:** Log viewer endpoints (`/logs/*`) are unauthenticated by design - +⚠️ **CRITICAL:** Log viewer endpoints (`/logs/*`) are unauthenticated - Deploy only on trusted networks (VPN, internal network) - Never expose to public internet - Use reverse proxy with authentication for external access - Logs contain sensitive data: tokens, webhook payloads, user information ### Token Handling - - Store tokens in environment variables or secret management systems - Use multiple tokens for rate limit distribution - Never commit tokens to repository @@ -1248,7 +587,6 @@ def mock_github_api(): ## Common Development Tasks ### Adding a New Handler - 1. Create handler file in `webhook_server/libs/handlers/` 2. Implement `__init__(self, github_webhook, ...)` and `process_event(event_data)` 3. Use `self.github_webhook.unified_api` for GitHub operations @@ -1256,7 +594,6 @@ def mock_github_api(): 5. Update `app.py` to instantiate handler ### Updating Configuration Schema - 1. Edit `webhook_server/config/schema.yaml` 2. Run `uv run pytest webhook_server/tests/test_config_schema.py -v` 3. Update examples in `examples/config.yaml` From eae223f90915bde7d58c41b79320646737cd3d1f Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Mon, 5 Jan 2026 16:39:48 +0200 Subject: [PATCH 14/14] style(docs): fix markdown formatting violations in CLAUDE.md --- CLAUDE.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index de42f7d7a..37bfdd162 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -23,6 +23,7 @@ **CRITICAL: Eliminate unnecessary defensive programming overhead.** **Philosophy:** + - Server fails-fast on startup if critical dependencies are missing - Required parameters in `__init__()` are ALWAYS provided - Checking for None on required parameters is pure overhead @@ -34,27 +35,32 @@ ## WHEN Defensive Checks Are ACCEPTABLE 1. **Destructors (`__del__`)** - Can be called during failed initialization + ```python def __del__(self): if hasattr(self, "logger"): self.logger.debug("Cleanup") ``` 2. **Optional Parameters** - Parameter explicitly allows None + ```python def get_data(self, owner: str | None = None): ... ``` 3. **Lazy Initialization** - Attribute explicitly starts as None + ```python self.client: SomeClient | None = None ``` 4. **Platform Constants** - Constant may not exist on all platforms + ```python if hasattr(os, "O_NOFOLLOW"): flags |= os.O_NOFOLLOW ``` 5. **External Libraries We DON'T Control Version Of** - Library version truly unknown + - **NOTE:** Does NOT apply to dependencies in `pyproject.toml` - we control those versions --- @@ -62,6 +68,7 @@ ## WHEN Defensive Checks Are VIOLATIONS ### 1. Required Parameters in `__init__()` + ```python # ❌ WRONG - config is required, ALWAYS provided def some_method(self): @@ -73,7 +80,9 @@ def some_method(self): ``` ### 2. Known Library Versions + We control these versions in `pyproject.toml`: PyGithub >=2.4.0, gql >=3.5.0 + ```python # ❌ WRONG - PyGithub >=2.4.0 guaranteed if hasattr(self.rest_client, "close"): self.rest_client.close() @@ -83,7 +92,9 @@ self.rest_client.close() ``` ### 3. Architecture Guarantees + `repository_data` is ALWAYS set before handlers instantiate (fail-fast in `GithubWebhook.process()`) + ```python # ❌ WRONG - repository_data guaranteed by architecture if hasattr(self.github_webhook, "repository_data"): ... @@ -93,7 +104,9 @@ collaborators = self.github_webhook.repository_data["collaborators"] ``` ### 4. Webhook Payload Fields + GitHub webhook format is stable: `user.node_id`, `user.type`, `sender` always exist + ```python # ❌ WRONG if "node_id" in user_data: return user_data["node_id"] @@ -103,6 +116,7 @@ return user_data["node_id"] # KeyError = legitimate bug ``` ### 5. Type Discrimination + ```python # ❌ WRONG - Use isinstance for type checking if hasattr(pr, "some_attr"): ... @@ -116,6 +130,7 @@ pr_id = pr.node_id # Direct attribute access ## Fail-Fast Principle **NEVER return fake defaults to hide missing data:** + ```python # ❌ WRONG - Fake data hiding bugs return "", 0, False, None, UserWrapper(None), [], {} @@ -126,6 +141,7 @@ raise KeyError("Required field missing") ``` **Example:** + ```python # ❌ WRONG @property @@ -157,6 +173,7 @@ def user(self): ## Summary: Quick Reference ### ✅ ACCEPTABLE Defensive Checks + - Destructors (`__del__`) - Optional parameters (`param: Type | None = None`) - Lazy initialization (starts as None) @@ -164,6 +181,7 @@ def user(self): - External libraries we don't control ### ❌ VIOLATIONS (NO defensive checks) + - Required parameters in `__init__()` - Known library versions (PyGithub >=2.4.0) - Architecture guarantees (`repository_data`) @@ -171,6 +189,7 @@ def user(self): - Type discrimination (use `isinstance()`) ### Enforcement + - Code reviews catch violations - Type hints match reality - Prek hooks automate checks @@ -185,23 +204,27 @@ FastAPI-based GitHub webhook server that automates repository management and pul ### Core Architecture Components **Event-Driven Handler Architecture:** + - `webhook_server/libs/handlers/` contains specialized handlers - Handlers instantiated by main FastAPI app (`app.py`) - Pattern: `__init__(github_webhook, ...)` → `process_event(event_data)` **Configuration System:** + - `webhook_server/libs/config.py` manages YAML-based configuration with schema validation - Global config at `/home/podman/data/config.yaml` with per-repository overrides via `.github-webhook-server.yaml` - Schema validation in `webhook_server/config/schema.yaml` - Configuration reloaded per webhook event (no server restart needed) **GitHub API Integration:** + - `webhook_server/libs/github_api.py` provides core `GithubWebhook` class - Uses PyGithub (REST API v3) for all GitHub operations - **🔴 CRITICAL:** PyGithub is synchronous/blocking - **MUST** wrap with `asyncio.to_thread()` - Supports multiple GitHub tokens with automatic failover **Log Viewer System:** + - `webhook_server/web/log_viewer.py` contains `LogViewerController` - **Memory-optimized**: Streaming/chunked processing (90% memory reduction) - Real-time log streaming via WebSocket @@ -209,12 +232,14 @@ FastAPI-based GitHub webhook server that automates repository management and pul ## Development Commands ### Environment Setup + ```bash uv sync source .venv/bin/activate ``` ### Running the Server + ```bash # Development uv run entrypoint.py @@ -224,6 +249,7 @@ WEBHOOK_SERVER_DATA_DIR=/path/to/data uv run entrypoint.py ``` ### Testing + ```bash # Run all tests uv run --group tests pytest -n auto @@ -233,6 +259,7 @@ uv run --group tests pytest -n auto --cov=webhook_server ``` ### Code Quality + ```bash uv run ruff format uv run ruff check @@ -242,6 +269,7 @@ uv run ruff check && uv run ruff format && uv run mypy webhook_server/ ``` ### Configuration Validation + ```bash uv run webhook_server/tests/test_schema_validator.py config.yaml uv run pytest webhook_server/tests/test_config_schema.py -v @@ -269,15 +297,18 @@ class SomeHandler: #### What Blocks the Event Loop 1. **Method calls** - ALL trigger API calls: + - `.get_*()`, `.create_*()`, `.edit()`, `.update()`, `.add_to_*()`, `.remove_from_*()` 2. **Property accesses** - MANY trigger API calls: + - `.draft`, `.mergeable`, `.state`, `.committer`, `.author`, `.permissions`, `.labels`, `.assignees` - **ANY property not in webhook payload** 3. **PaginatedList iteration** - BLOCKS during iteration 4. **Safe operations** (don't block): + - Properties from webhook payload (`.number`, `.title`, `.body`) - Already-fetched cached data (rare) @@ -318,6 +349,7 @@ for commit in pull_request.get_commits(): ... # BLOCKS! #### Decision Tree Before accessing ANY PyGithub object: + 1. Is this a PyGithub object? → YES, it may block 2. Calling a method? → **DEFINITELY BLOCKS** - wrap in `asyncio.to_thread()` 3. Accessing a property? → **MAY BLOCK** - wrap in `asyncio.to_thread(lambda: obj.property)` @@ -326,12 +358,14 @@ Before accessing ANY PyGithub object: 6. **Unsure? ALWAYS wrap in `asyncio.to_thread()`** **Why this is critical:** + - PyGithub is synchronous - each operation blocks 100ms-2 seconds - Blocking = frozen server (no other webhooks processed) - `asyncio.to_thread()` runs code in thread pool, keeps event loop responsive - **NOT OPTIONAL** - required for correct async operation **Impact of blocking:** + - Single blocking call freezes entire server - Incoming webhooks must wait - Server appears unresponsive @@ -363,6 +397,7 @@ collaborators = self.github_webhook.repository_data['collaborators']['edges'] ``` **Key principles:** + - Fetch once per webhook, never per handler - Fail-fast: Exception propagates → webhook aborts - Type is `dict[str, Any]`, never `| None` (fail-fast guarantees) @@ -372,6 +407,7 @@ collaborators = self.github_webhook.repository_data['collaborators']['edges'] **Location:** `webhook_server/libs/github_api.py` lines 534-570 **Early exit conditions (no clone needed):** + 1. **Action != "completed"** - Skip `created` action 2. **Can-be-merged with non-success conclusion** - Primary optimization @@ -391,6 +427,7 @@ elif self.github_event == "check_run": ``` **Benefits:** + - 90-95% reduction in unnecessary cloning - Saves 5-30 seconds per skipped clone - Reduced resource usage @@ -430,6 +467,7 @@ logger.exception("Error with full traceback") # Preferred over logger.error(... JSON-based logging for webhook execution tracking with thread-safe context using ContextVar. **Context Creation (app.py):** + ```python from webhook_server.utils.context import create_context @@ -444,6 +482,7 @@ ctx = create_context( ``` **Step Tracking:** + ```python from webhook_server.utils.context import get_context @@ -459,6 +498,7 @@ except Exception as ex: ``` **Handler Usage:** + ```python class PullRequestHandler: async def process_event(self, event_data: dict) -> None: @@ -472,11 +512,13 @@ class PullRequestHandler: ``` **Log File Format:** + - Location: `{config.data_dir}/logs/webhooks_YYYY-MM-DD.json` - Format: Pretty-printed JSON (2-space indentation) - Rotation: Daily based on UTC date **Log entry structure:** + ```json { "hook_id": "github-delivery-id", @@ -515,13 +557,17 @@ except asyncio.CancelledError: ## Critical Architectural Rules ### Import Organization + **MANDATORY:** All imports at top of files + - No imports in functions or try/except blocks - Exception: TYPE_CHECKING imports can be conditional - Prek hooks enforce this ### Type Hints + **MANDATORY:** Complete type hints (mypy strict mode) + ```python # ✅ CORRECT async def process_pr(self, pull_request: PullRequest, reviewers: list[str]) -> None: ... @@ -531,7 +577,9 @@ async def process_pr(self, pull_request, reviewers): ... ``` ### Test Coverage + **MANDATORY:** 90% code coverage required + - Check: `uv run --group tests pytest --cov=webhook_server` - New code without tests fails CI - Tests in `webhook_server/tests/` @@ -539,6 +587,7 @@ async def process_pr(self, pull_request, reviewers): ... ## Testing Patterns ### Test File Organization + ```bash webhook_server/tests/ ├── test_*.py # Unit and integration tests @@ -548,6 +597,7 @@ webhook_server/tests/ ``` ### Mock Testing Pattern + ```python from unittest.mock import AsyncMock, Mock @@ -559,6 +609,7 @@ with patch("asyncio.to_thread", side_effect=mock_to_thread): ``` ### Test Token Pattern + ```python TEST_GITHUB_TOKEN = "ghp_test1234..." # pragma: allowlist secret @@ -572,13 +623,16 @@ def mock_github_api(): ## Security Considerations ### Log Viewer Security + ⚠️ **CRITICAL:** Log viewer endpoints (`/logs/*`) are unauthenticated + - Deploy only on trusted networks (VPN, internal network) - Never expose to public internet - Use reverse proxy with authentication for external access - Logs contain sensitive data: tokens, webhook payloads, user information ### Token Handling + - Store tokens in environment variables or secret management systems - Use multiple tokens for rate limit distribution - Never commit tokens to repository @@ -587,6 +641,7 @@ def mock_github_api(): ## Common Development Tasks ### Adding a New Handler + 1. Create handler file in `webhook_server/libs/handlers/` 2. Implement `__init__(self, github_webhook, ...)` and `process_event(event_data)` 3. Use `self.github_webhook.unified_api` for GitHub operations @@ -594,6 +649,7 @@ def mock_github_api(): 5. Update `app.py` to instantiate handler ### Updating Configuration Schema + 1. Edit `webhook_server/config/schema.yaml` 2. Run `uv run pytest webhook_server/tests/test_config_schema.py -v` 3. Update examples in `examples/config.yaml`