From e180859368d4f00d6974934604d81c6d350f77cb Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Thu, 30 Apr 2026 08:57:31 +0000 Subject: [PATCH] fix: token_spend calculation incorrect with concurrent webhooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CountingRequester was broken in two ways: 1. withLazy() bypass: PyGithub's Github.get_repo() calls requester.withLazy() which returns a NEW raw Requester, completely bypassing the CountingRequester wrapper. Only Github-level calls (get_rate_limit) were counted — all Repository-level API calls (labels, check runs, comments) were invisible. Fixed by overriding withLazy() to propagate the shared counter. 2. Shared counter between concurrent webhooks: if a Github instance were reused, multiple webhooks would share the same CountingRequester and corrupt each other's token_spend. Fixed by always creating a fresh CountingRequester per webhook. Also includes: - Single-token optimization: skip rate limit comparison when only one token is configured - WEBHOOK_SERVER_DEV_MODE env var for uvicorn auto-reload - Remove hasattr guard on _Github__requester (PyGithub >=2.4.0 guaranteed) Closes #970 --- docs/docker-deployment.md | 2 +- entrypoint.py | 19 ++- webhook_server/libs/github_api.py | 68 +++++---- webhook_server/tests/conftest.py | 5 +- .../tests/test_github_api_metrics.py | 132 ++++++++++++++---- webhook_server/tests/test_helpers.py | 39 ++++++ webhook_server/utils/context.py | 2 +- webhook_server/utils/helpers.py | 17 +++ 8 files changed, 225 insertions(+), 59 deletions(-) diff --git a/docs/docker-deployment.md b/docs/docker-deployment.md index 742e9a4c9..18fd5d340 100644 --- a/docs/docker-deployment.md +++ b/docs/docker-deployment.md @@ -186,7 +186,7 @@ For containerized deployments, put these runtime settings in `config.yaml`: - `verify-github-ips` - `verify-cloudflare-ips` -> **Warning:** The checked-in Compose example shows `MAX_WORKERS`, `WEBHOOK_SERVER_IP_BIND`, `WEBHOOK_SERVER_PORT`, `WEBHOOK_SECRET`, `VERIFY_GITHUB_IPS`, and `VERIFY_CLOUDFLARE_IPS` as environment variables, but the application code reads those values from `config.yaml` keys (`max-workers`, `ip-bind`, `port`, `webhook-secret`, `verify-github-ips`, and `verify-cloudflare-ips`). The environment variables consumed directly at runtime are `WEBHOOK_SERVER_DATA_DIR`, `ENABLE_LOG_SERVER`, and `ENABLE_MCP_SERVER`. The Podman cleanup script also reads `PUID`. `PGID` appears in the example, but the application code does not read it. +> **Warning:** The checked-in Compose example shows `MAX_WORKERS`, `WEBHOOK_SERVER_IP_BIND`, `WEBHOOK_SERVER_PORT`, `WEBHOOK_SECRET`, `VERIFY_GITHUB_IPS`, and `VERIFY_CLOUDFLARE_IPS` as environment variables, but the application code reads those values from `config.yaml` keys (`max-workers`, `ip-bind`, `port`, `webhook-secret`, `verify-github-ips`, and `verify-cloudflare-ips`). The environment variables consumed directly at runtime are `WEBHOOK_SERVER_DATA_DIR`, `ENABLE_LOG_SERVER`, `ENABLE_MCP_SERVER`, and `WEBHOOK_SERVER_DEV_MODE`. The Podman cleanup script also reads `PUID`. `PGID` appears in the example, but the application code does not read it. > **Note:** `ENABLE_LOG_SERVER` and `ENABLE_MCP_SERVER` are enabled only when they are set to the literal string `true`. diff --git a/entrypoint.py b/entrypoint.py index a44bdf8c2..598c2ac60 100644 --- a/entrypoint.py +++ b/entrypoint.py @@ -1,7 +1,9 @@ import asyncio +import os import subprocess import sys from pathlib import Path +from typing import Any import uvicorn @@ -14,6 +16,7 @@ _port = _root_config.get("port", 5000) _max_workers = _root_config.get("max-workers", 10) _webhook_secret = _root_config.get("webhook-secret") +_dev_mode = os.environ.get("WEBHOOK_SERVER_DEV_MODE", "").lower() in ("1", "true", "yes") def run_podman_cleanup() -> None: @@ -53,10 +56,12 @@ def run_podman_cleanup() -> None: # - Application logs use simple-logger with console=True for colored output in Docker logs # - Both logging systems work together: uvicorn handles HTTP request logs, # while simple-logger handles application-level logs with structured formatting - uvicorn.run( - "webhook_server.app:FASTAPI_APP", - host=_ip_bind, - port=int(_port), - workers=int(_max_workers), - reload=False, - ) + uvicorn_kwargs: dict[str, Any] = { + "host": _ip_bind, + "port": int(_port), + "reload": _dev_mode, + } + if not _dev_mode: + uvicorn_kwargs["workers"] = int(_max_workers) + + uvicorn.run("webhook_server.app:FASTAPI_APP", **uvicorn_kwargs) diff --git a/webhook_server/libs/github_api.py b/webhook_server/libs/github_api.py index 8b5faf7d2..3c3e9f411 100644 --- a/webhook_server/libs/github_api.py +++ b/webhook_server/libs/github_api.py @@ -61,21 +61,45 @@ class CountingRequester: """ Wrapper around PyGithub Requester to count API calls for a specific instance. Intercepts request* methods to increment a counter. + Also intercepts withLazy() so derived Requester instances share the same counter. """ - def __init__(self, requester: Any) -> None: + def __init__( + self, + requester: Any, + shared_count: list[int] | None = None, + shared_lock: threading.Lock | None = None, + ) -> None: self._requester = requester - self.count = 0 - self._thread_lock = threading.Lock() + self._shared_count: list[int] = shared_count if shared_count is not None else [0] + self._thread_lock = shared_lock or threading.Lock() + + @property + def count(self) -> int: + return self._shared_count[0] + + @count.setter + def count(self, value: int) -> None: + self._shared_count[0] = value + + def withLazy(self, lazy: Any) -> CountingRequester: + new_requester = self._requester.withLazy(lazy) + return CountingRequester( + new_requester, + shared_count=self._shared_count, + shared_lock=self._thread_lock, + ) def __getattr__(self, name: str) -> Any: attr = getattr(self._requester, name) + # PyGithub >=2.4.0 uses request* methods (requestJson, requestJsonAndCheck, etc.) + # for all REST API calls. This is tied to our pinned version — audit on PyGithub upgrades. if name.startswith("request") and callable(attr): def wrapper(*args: Any, **kwargs: Any) -> Any: # Increment counter with thread safety since PyGithub may run in threads. with self._thread_lock: - self.count += 1 + self._shared_count[0] += 1 return attr(*args, **kwargs) return wrapper @@ -107,7 +131,6 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. self.github_api: github.Github | None = None self.initial_rate_limit_remaining: int | None = None self.requester_wrapper: CountingRequester | None = None - self.initial_wrapper_count: int = 0 if not self.config.repository_data: raise RepositoryNotFoundInConfigError(f"Repository {self.repository_name} not found in config file") @@ -121,20 +144,16 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging. if github_api and self.token: self.github_api = github_api - # Wrap the requester to count API calls per this webhook instance - # This must be done BEFORE creating self.repository so it shares the wrapped requester - # PyGithub stores the requester in _Github__requester (name mangling) - if hasattr(self.github_api, "_Github__requester"): - requester = self.github_api._Github__requester - if isinstance(requester, CountingRequester): - # Already wrapped (shared Github instance), reuse existing wrapper - self.requester_wrapper = requester - else: - self.requester_wrapper = CountingRequester(requester) - self.github_api._Github__requester = self.requester_wrapper + # Wrap the requester to count API calls per this webhook instance. + # This must be done BEFORE creating self.repository so it shares the wrapped requester. + # PyGithub >=2.4.0 stores the requester in _Github__requester (name mangling). + requester = self.github_api._Github__requester + # Unwrap existing CountingRequester to get the real requester + if isinstance(requester, CountingRequester): + requester = requester._requester - # Capture initial count for per-webhook delta calculation - self.initial_wrapper_count = self.requester_wrapper.count + self.requester_wrapper = CountingRequester(requester) + self.github_api._Github__requester = self.requester_wrapper # Track initial rate limit for token spend calculation # Note: log_prefix not set yet, so we can't use it in error messages here @@ -195,14 +214,14 @@ async def _update_context_metrics(self) -> None: return if self.requester_wrapper: - self.ctx.token_spend = self.requester_wrapper.count - self.initial_wrapper_count + self.ctx.token_spend = self.requester_wrapper.count if self.initial_rate_limit_remaining is not None: self.ctx.initial_rate_limit = self.initial_rate_limit_remaining if self.requester_wrapper: - self.ctx.final_rate_limit = max( - 0, self.initial_rate_limit_remaining - (self.requester_wrapper.count - self.initial_wrapper_count) - ) + # Heuristic: assumes each REST call consumes one rate limit unit. + # Does not account for GitHub App client calls or endpoint-specific costs. + self.ctx.final_rate_limit = max(0, self.initial_rate_limit_remaining - self.requester_wrapper.count) # Update api_user self.ctx.api_user = self.api_user @@ -223,9 +242,8 @@ async def _get_token_metrics(self) -> str: # Use the wrapper count if available (thread-safe per request) # We skip checking global rate limit to avoid inflating the API call count with an extra call if self.requester_wrapper: - token_spend = self.requester_wrapper.count - self.initial_wrapper_count - # If rate limit was already low, or if the delta calculation leads to negative - # values due to race conditions (unlikely with correct locking), clamp to 0 + token_spend = self.requester_wrapper.count + # Clamp to 0 in case rate limit reset occurred between initial check and now remaining = max(0, self.initial_rate_limit_remaining - token_spend) return ( diff --git a/webhook_server/tests/conftest.py b/webhook_server/tests/conftest.py index 355fb1838..7f3d580bf 100644 --- a/webhook_server/tests/conftest.py +++ b/webhook_server/tests/conftest.py @@ -119,7 +119,10 @@ def github_webhook(mocker, request): mocker.patch(f"{base_import_path}.get_repository_github_app_api", return_value=True) mocker.patch("github.AuthenticatedUser", return_value=True) - mocker.patch(f"{base_import_path}.get_api_with_highest_rate_limit", return_value=("API", "TOKEN", "USER")) + mock_github_api = mocker.Mock() + mock_github_api._Github__requester = mocker.Mock() + mock_github_api.get_rate_limit.return_value.rate.remaining = 5000 + mocker.patch(f"{base_import_path}.get_api_with_highest_rate_limit", return_value=(mock_github_api, "TOKEN", "USER")) mocker.patch(f"{base_import_path}.get_github_repo_api", return_value=Repository()) mocker.patch(f"{base_import_path}.GithubWebhook.add_api_users_to_auto_verified_and_merged_users", return_value=None) diff --git a/webhook_server/tests/test_github_api_metrics.py b/webhook_server/tests/test_github_api_metrics.py index 043d7bd1a..0c066e234 100644 --- a/webhook_server/tests/test_github_api_metrics.py +++ b/webhook_server/tests/test_github_api_metrics.py @@ -13,6 +13,7 @@ def test_counting_requester_init(self): wrapper = CountingRequester(requester) assert wrapper._requester == requester assert wrapper.count == 0 + assert wrapper._shared_count == [0] assert isinstance(wrapper._thread_lock, type(threading.Lock())) def test_counting_requester_increments(self): @@ -60,6 +61,31 @@ def make_requests(): assert wrapper.count == 1000 + def test_counting_requester_with_lazy_shares_count(self): + requester = Mock() + requester.requestJsonAndCheck = Mock(return_value="result") + + # withLazy returns a new mock requester + lazy_requester = Mock() + lazy_requester.requestJsonAndCheck = Mock(return_value="lazy_result") + requester.withLazy = Mock(return_value=lazy_requester) + + wrapper = CountingRequester(requester) + + # Call on original wrapper + wrapper.requestJsonAndCheck("arg1") + assert wrapper.count == 1 + + # Create lazy version (simulates what PyGithub does in get_repo) + lazy_wrapper = wrapper.withLazy(True) + + # Call on lazy wrapper + lazy_wrapper.requestJsonAndCheck("arg2") + + # Both should share the count + assert wrapper.count == 2 + assert lazy_wrapper.count == 2 + class TestGithubWebhookMetrics: @pytest.fixture @@ -110,14 +136,13 @@ def test_init_wraps_requester_new( # Verify wrapper was created and set assert isinstance(gh.requester_wrapper, CountingRequester) assert mock_github_api._Github__requester == gh.requester_wrapper - assert gh.initial_wrapper_count == 0 @patch("webhook_server.libs.github_api.Config") @patch("webhook_server.libs.github_api.get_api_with_highest_rate_limit") @patch("webhook_server.libs.github_api.get_github_repo_api") @patch("webhook_server.libs.github_api.get_repository_github_app_api") @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") - def test_init_reuses_wrapper( + def test_init_unwraps_existing_wrapper( self, mock_color, mock_get_app_api, @@ -132,8 +157,9 @@ def test_init_reuses_wrapper( mock_config.return_value.repository_local_data.return_value = {} mock_github_api = Mock() - # Already wrapped - existing_wrapper = CountingRequester(Mock()) + # Already wrapped by a previous webhook + inner_requester = Mock() + existing_wrapper = CountingRequester(inner_requester) existing_wrapper.count = 5 mock_github_api._Github__requester = existing_wrapper @@ -145,9 +171,64 @@ def test_init_reuses_wrapper( gh = GithubWebhook(minimal_hook_data, minimal_headers, logger) - # Verify wrapper was reused - assert gh.requester_wrapper == existing_wrapper - assert gh.initial_wrapper_count == 5 + # Verify a NEW wrapper was created (not the existing one) + assert isinstance(gh.requester_wrapper, CountingRequester) + assert gh.requester_wrapper is not existing_wrapper + # New wrapper starts at count 0 + assert gh.requester_wrapper.count == 0 + # The new wrapper wraps the inner requester, not the old wrapper + assert gh.requester_wrapper._requester is inner_requester + + @patch("webhook_server.libs.github_api.Config") + @patch("webhook_server.libs.github_api.get_api_with_highest_rate_limit") + @patch("webhook_server.libs.github_api.get_github_repo_api") + @patch("webhook_server.libs.github_api.get_repository_github_app_api") + @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") + def test_concurrent_webhooks_have_independent_counts( + self, + mock_color, + mock_get_app_api, + mock_get_repo_api, + mock_get_api, + mock_config, + minimal_hook_data, + minimal_headers, + logger, + ): + """Regression test for #970: two concurrent webhooks must not share counts.""" + mock_config.return_value.repository = True + mock_config.return_value.repository_local_data.return_value = {} + + # Both webhooks use the same Github instance (same token) + mock_github_api = Mock() + mock_requester = Mock() + mock_requester.requestJsonAndCheck = Mock(return_value="result") + # withLazy returns a new mock that also has request methods + lazy_requester = Mock() + lazy_requester.requestJsonAndCheck = Mock(return_value="lazy_result") + mock_requester.withLazy = Mock(return_value=lazy_requester) + mock_github_api._Github__requester = mock_requester + mock_github_api.get_rate_limit.return_value.rate.remaining = 5000 + + mock_get_api.return_value = (mock_github_api, "token", "apiuser") + mock_get_repo_api.return_value = Mock() + mock_get_app_api.return_value = Mock() + mock_color.return_value = "test-repo" + + gh1 = GithubWebhook(minimal_hook_data, minimal_headers, logger) + gh2 = GithubWebhook(minimal_hook_data, minimal_headers, logger) + + # Each webhook has its own wrapper + assert gh1.requester_wrapper is not gh2.requester_wrapper + + # Simulate API calls through each webhook's wrapper + gh1.requester_wrapper.requestJsonAndCheck("a") + gh1.requester_wrapper.requestJsonAndCheck("b") + gh2.requester_wrapper.requestJsonAndCheck("c") + + # Counts must be independent + assert gh1.requester_wrapper.count == 2 + assert gh2.requester_wrapper.count == 1 @patch("webhook_server.libs.github_api.Config") @patch("webhook_server.libs.github_api.get_api_with_highest_rate_limit") @@ -195,7 +276,7 @@ async def test_get_token_metrics_with_wrapper( @patch("webhook_server.libs.github_api.get_repository_github_app_api") @patch("webhook_server.utils.helpers.get_repository_color_for_log_prefix") @pytest.mark.asyncio - async def test_get_token_metrics_with_reused_wrapper( + async def test_get_token_metrics_per_webhook_count( self, mock_color, mock_get_app_api, @@ -212,10 +293,8 @@ async def test_get_token_metrics_with_reused_wrapper( mock_github_api = Mock() mock_github_api.get_rate_limit.return_value.rate.remaining = 4995 - # Reused wrapper started at 5 - existing_wrapper = CountingRequester(Mock()) - existing_wrapper.count = 5 - mock_github_api._Github__requester = existing_wrapper + mock_requester = Mock() + mock_github_api._Github__requester = mock_requester mock_get_api.return_value = (mock_github_api, "token", "apiuser") mock_get_repo_api.return_value = Mock() @@ -223,15 +302,15 @@ async def test_get_token_metrics_with_reused_wrapper( gh = GithubWebhook(minimal_hook_data, minimal_headers, logger) - # Initial wrapper count should be 5 - assert gh.initial_wrapper_count == 5 + # New wrapper always starts at 0 + assert gh.requester_wrapper.count == 0 - # Simulate usage (+3 calls) - gh.requester_wrapper.count = 8 + # Simulate 3 API calls + gh.requester_wrapper.count = 3 metrics = await gh._get_token_metrics() - # Should show 3 calls (8 - 5) + # Should show 3 calls (count is used directly) assert "3 API calls" in metrics assert "initial: 4995" in metrics assert "remaining: 4992" in metrics @@ -259,8 +338,8 @@ async def test_get_token_metrics_fallback_reset( mock_github_api = Mock() # Initial rate limit mock_github_api.get_rate_limit.return_value.rate.remaining = 100 - # No wrapper (simulate failure to wrap) - del mock_github_api._Github__requester + mock_requester = Mock() + mock_github_api._Github__requester = mock_requester mock_get_api.return_value = (mock_github_api, "token", "apiuser") mock_get_repo_api.return_value = Mock() @@ -268,7 +347,7 @@ async def test_get_token_metrics_fallback_reset( gh = GithubWebhook(minimal_hook_data, minimal_headers, logger) - # Manually unset wrapper to test fallback + # Manually unset wrapper to test the fallback path in _get_token_metrics gh.requester_wrapper = None # Mock reset happened (final > initial) @@ -280,8 +359,13 @@ async def test_get_token_metrics_fallback_reset( assert "initial: 100" in metrics assert "final: 5000" in metrics - def test_del_safe_missing_attr(self, minimal_hook_data, minimal_headers, logger): - # Create instance but don't init fully to avoid setting attributes + def test_del_cleanup_without_clone_dir(self): + """Test that __del__ handles missing clone_repo_dir gracefully. + + __del__ uses getattr with a default because it can be called during failed + initialization when clone_repo_dir was never set. + """ gh = GithubWebhook.__new__(GithubWebhook) - # Should not raise AttributeError - gh.__del__() + # Call destructor directly to make failures deterministic — + # del gh would silently swallow any exceptions in __del__ + GithubWebhook.__del__(gh) diff --git a/webhook_server/tests/test_helpers.py b/webhook_server/tests/test_helpers.py index a187a85a3..b4d2a3bf9 100644 --- a/webhook_server/tests/test_helpers.py +++ b/webhook_server/tests/test_helpers.py @@ -156,6 +156,45 @@ def test_get_api_with_highest_rate_limit_invalid_tokens( assert token == "valid_token" assert user == "user2" + @patch("webhook_server.utils.helpers.get_apis_and_tokes_from_config") + @patch("webhook_server.utils.helpers.log_rate_limit") + def test_get_api_with_highest_rate_limit_single_token(self, mock_log_rate_limit: Mock, mock_get_apis: Mock) -> None: + """Test single-token short-circuit skips comparison loop.""" + mock_api = Mock() + mock_api.rate_limiting = [100, 5000] + mock_api.get_user.return_value.login = "user1" + mock_rate_limit = Mock() + mock_rate_limit.rate.remaining = 4500 + mock_rate_limit.rate.reset = Mock() + mock_rate_limit.rate.limit = 5000 + mock_api.get_rate_limit.return_value = mock_rate_limit + + mock_get_apis.return_value = [(mock_api, "token1")] + + with patch.dict(os.environ, {"WEBHOOK_SERVER_DATA_DIR": "webhook_server/tests/manifests"}): + config = Config(repository="test-repo") + api, token, user = get_api_with_highest_rate_limit(config=config, repository_name="test-repo") + + assert api == mock_api + assert token == "token1" + assert user == "user1" + # Should call get_rate_limit for logging, but not for comparison + mock_api.get_rate_limit.assert_called_once() + mock_log_rate_limit.assert_called_once() + + @patch("webhook_server.utils.helpers.get_apis_and_tokes_from_config") + def test_get_api_with_highest_rate_limit_single_token_invalid(self, mock_get_apis: Mock) -> None: + """Test single-token path rejects invalid token (rate limit 60).""" + mock_api = Mock() + mock_api.rate_limiting = [30, 60] # Invalid token indicator + + mock_get_apis.return_value = [(mock_api, "invalid_token")] + + with patch.dict(os.environ, {"WEBHOOK_SERVER_DATA_DIR": "webhook_server/tests/manifests"}): + config = Config(repository="test-repo") + with pytest.raises(NoApiTokenError, match="rate limit 60"): + get_api_with_highest_rate_limit(config=config, repository_name="test-repo") + def test_get_logger_with_params_log_file_path(self, tmp_path, monkeypatch): """Test get_logger_with_params with log_file that is not an absolute path.""" # Patch Config.get_value to return a log file name diff --git a/webhook_server/utils/context.py b/webhook_server/utils/context.py index 395a32e61..436938032 100644 --- a/webhook_server/utils/context.py +++ b/webhook_server/utils/context.py @@ -105,7 +105,7 @@ class WebhookContext: started_at: Webhook processing start time (UTC) completed_at: Webhook processing completion time (UTC) workflow_steps: Dict of workflow steps keyed by step name - token_spend: GitHub API tokens consumed (rate_limit_before - rate_limit_after) + token_spend: GitHub API calls counted by this webhook's requester wrapper initial_rate_limit: GitHub API rate limit at start final_rate_limit: GitHub API rate limit at end success: Overall execution success status diff --git a/webhook_server/utils/helpers.py b/webhook_server/utils/helpers.py index 92ee01073..dc881e9a7 100644 --- a/webhook_server/utils/helpers.py +++ b/webhook_server/utils/helpers.py @@ -492,6 +492,23 @@ def get_api_with_highest_rate_limit(config: Config, repository_name: str = "") - apis_and_tokens = get_apis_and_tokes_from_config(config=config) logger.debug(f"Checking {len(apis_and_tokens)} API(s) for highest rate limit") + # Short-circuit: single token doesn't need rate limit comparison + if len(apis_and_tokens) == 1: + _api, _token = apis_and_tokens[0] + if _api.rate_limiting[-1] == 60: + raise NoApiTokenError("Single configured token has rate limit 60 (indicates invalid token)") + + try: + _api_user = _api.get_user().login + except GithubException as ex: + raise NoApiTokenError(f"Single configured token is invalid: {ex}") from ex + + _rate_limit = _api.get_rate_limit() + log_rate_limit(rate_limit=_rate_limit, api_user=_api_user) + + logger.info(f"API user {_api_user} selected (single token configured)") + return _api, _token, _api_user + for _api, _token in apis_and_tokens: if _api.rate_limiting[-1] == 60: logger.warning("API has rate limit set to 60 which indicates an invalid token, skipping")