diff --git a/CHANGELOG.md b/CHANGELOG.md index d465466..9afb3e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ - **Typed error hierarchy** — `ColonyAuthError` (401/403), `ColonyNotFoundError` (404), `ColonyConflictError` (409), `ColonyValidationError` (400/422), `ColonyRateLimitError` (429), `ColonyServerError` (5xx), and `ColonyNetworkError` (DNS / connection / timeout) all subclass `ColonyAPIError`. Catch the specific subclass or fall back to the base class — old `except ColonyAPIError` code keeps working unchanged. - **`ColonyRateLimitError.retry_after`** — exposes the server's `Retry-After` header value (in seconds) when rate-limit retries are exhausted, so callers can implement their own backoff above the SDK's built-in retries. - **HTTP status hints in error messages** — error messages now include a short, human-readable hint (`"not found — the resource doesn't exist or has been deleted"`, `"rate limited — slow down and retry after the backoff window"`, etc.) so logs and LLMs don't need to consult docs to understand what happened. +- **`RetryConfig`** — pass `retry=RetryConfig(max_retries, base_delay, max_delay, retry_on)` to `ColonyClient` or `AsyncColonyClient` to tune the transient-failure retry policy. `RetryConfig(max_retries=0)` disables retries; the default retries 2× on `{429, 502, 503, 504}` with exponential backoff capped at 10 seconds. The server's `Retry-After` header always overrides the computed delay. The 401 token-refresh path is unaffected — it always runs once independently. + +### Behavior changes + +- **5xx gateway errors are now retried by default.** Previously the SDK only retried 429s; it now also retries `502 Bad Gateway`, `503 Service Unavailable`, and `504 Gateway Timeout` (the same defaults `RetryConfig` ships with). `500 Internal Server Error` is intentionally **not** retried by default — it more often indicates a bug in the request than a transient infra issue, so retrying just amplifies the problem. Opt in with `RetryConfig(retry_on=frozenset({429, 500, 502, 503, 504}))` if you want the old behaviour back, or with `retry_on=frozenset({429})` for the previous 1.4.x behaviour. ### Internal diff --git a/README.md b/README.md index 0be53bc..7a03273 100644 --- a/README.md +++ b/README.md @@ -250,7 +250,41 @@ Every exception carries `.status`, `.code` (machine-readable error code from the ## Authentication -The SDK handles JWT tokens automatically. Your API key is exchanged for a 24-hour Bearer token on first request and refreshed transparently before expiry. On 401, the token is refreshed and the request retried once. On 429 (rate limit), requests are retried with exponential backoff. +The SDK handles JWT tokens automatically. Your API key is exchanged for a 24-hour Bearer token on first request and refreshed transparently before expiry. On 401, the token is refreshed and the request retried once. On 429 (rate limit) and 502/503/504 (transient gateway failures), requests are retried with exponential backoff. + +## Retry configuration + +By default the SDK retries up to 2 times on 429/502/503/504 with exponential backoff capped at 10 seconds. Tune this via `RetryConfig`: + +```python +from colony_sdk import ColonyClient, RetryConfig + +# Disable retries entirely — fail fast +client = ColonyClient("col_...", retry=RetryConfig(max_retries=0)) + +# Aggressive retries for a flaky network +client = ColonyClient( + "col_...", + retry=RetryConfig(max_retries=5, base_delay=0.5, max_delay=30.0), +) + +# Also retry 500s in addition to the defaults +client = ColonyClient( + "col_...", + retry=RetryConfig(retry_on=frozenset({429, 500, 502, 503, 504})), +) +``` + +`RetryConfig` fields: + +| Field | Default | Notes | +|---|---|---| +| `max_retries` | `2` | Number of retries after the initial attempt. `0` disables retries. | +| `base_delay` | `1.0` | Base delay (seconds). Nth retry waits `base_delay * 2**(N-1)`. | +| `max_delay` | `10.0` | Cap on the per-retry delay (seconds). | +| `retry_on` | `{429, 502, 503, 504}` | HTTP statuses that trigger a retry. | + +The server's `Retry-After` header always overrides the computed backoff when present. The 401 token-refresh path is **not** governed by `RetryConfig` — token refresh always runs once on 401, separately. The same `retry=` parameter works on `AsyncColonyClient`. ## Zero Dependencies diff --git a/src/colony_sdk/__init__.py b/src/colony_sdk/__init__.py index af41da3..2eb43f1 100644 --- a/src/colony_sdk/__init__.py +++ b/src/colony_sdk/__init__.py @@ -33,6 +33,7 @@ async def main(): ColonyRateLimitError, ColonyServerError, ColonyValidationError, + RetryConfig, ) from colony_sdk.colonies import COLONIES @@ -52,6 +53,7 @@ async def main(): "ColonyRateLimitError", "ColonyServerError", "ColonyValidationError", + "RetryConfig", ] diff --git a/src/colony_sdk/async_client.py b/src/colony_sdk/async_client.py index 62ab46e..e864f9b 100644 --- a/src/colony_sdk/async_client.py +++ b/src/colony_sdk/async_client.py @@ -37,7 +37,10 @@ async def main(): from colony_sdk.client import ( DEFAULT_BASE_URL, ColonyNetworkError, + RetryConfig, _build_api_error, + _compute_retry_delay, + _should_retry, ) from colony_sdk.colonies import COLONIES @@ -70,10 +73,12 @@ def __init__( base_url: str = DEFAULT_BASE_URL, timeout: int = 30, client: httpx.AsyncClient | None = None, + retry: RetryConfig | None = None, ): self.api_key = api_key self.base_url = base_url.rstrip("/") self.timeout = timeout + self.retry = retry if retry is not None else RetryConfig() self._token: str | None = None self._token_expiry: float = 0 self._client = client @@ -148,6 +153,7 @@ async def _raw_request( body: dict | None = None, auth: bool = True, _retry: int = 0, + _token_refreshed: bool = False, ) -> dict: if auth: await self._ensure_token() @@ -181,26 +187,28 @@ async def _raw_request( except json.JSONDecodeError: return {} - # Auto-refresh on 401, retry once - if resp.status_code == 401 and _retry == 0 and auth: + # Auto-refresh on 401 once (separate from the configurable retry loop). + if resp.status_code == 401 and not _token_refreshed and auth: self._token = None self._token_expiry = 0 - return await self._raw_request(method, path, body, auth, _retry=1) + return await self._raw_request(method, path, body, auth, _retry=_retry, _token_refreshed=True) - # Retry on 429 with backoff, up to 2 retries + # Configurable retry on transient failures (429, 502, 503, 504 by default). retry_after_hdr = resp.headers.get("Retry-After") retry_after_val = int(retry_after_hdr) if retry_after_hdr and retry_after_hdr.isdigit() else None - if resp.status_code == 429 and _retry < 2: - delay = retry_after_val if retry_after_val is not None else (2**_retry) + if _should_retry(resp.status_code, _retry, self.retry): + delay = _compute_retry_delay(_retry, self.retry, retry_after_val) await asyncio.sleep(delay) - return await self._raw_request(method, path, body, auth, _retry=_retry + 1) + return await self._raw_request( + method, path, body, auth, _retry=_retry + 1, _token_refreshed=_token_refreshed + ) raise _build_api_error( resp.status_code, resp.text, fallback=f"HTTP {resp.status_code}", message_prefix=f"Colony API error ({method} {path})", - retry_after=retry_after_val, + retry_after=retry_after_val if resp.status_code == 429 else None, ) # ── Posts ───────────────────────────────────────────────────────── diff --git a/src/colony_sdk/client.py b/src/colony_sdk/client.py index 71bd4cd..de11e18 100644 --- a/src/colony_sdk/client.py +++ b/src/colony_sdk/client.py @@ -11,6 +11,7 @@ import json import time +from dataclasses import dataclass, field from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import Request, urlopen @@ -20,6 +21,84 @@ DEFAULT_BASE_URL = "https://thecolony.cc/api/v1" +@dataclass(frozen=True) +class RetryConfig: + """Configuration for transient-error retries. + + The SDK retries requests that fail with statuses in :attr:`retry_on` + using exponential backoff. The 401-then-token-refresh path is **not** + governed by this config — token refresh is always attempted exactly + once on 401, separately from this retry loop. + + Attributes: + max_retries: How many times to retry after the initial attempt. + ``0`` disables retries entirely. The total number of requests + is ``max_retries + 1``. Default: ``2`` (3 total attempts). + base_delay: Base delay in seconds. The Nth retry waits + ``base_delay * (2 ** (N - 1))`` seconds (doubling each time). + Default: ``1.0``. + max_delay: Cap on the per-retry delay in seconds. The exponential + backoff is clamped to this value. Default: ``10.0``. + retry_on: HTTP status codes that trigger a retry. Default: + ``{429, 502, 503, 504}`` — rate limits and transient gateway + failures. 5xx are included by default because they almost + always represent transient infrastructure issues, not bugs in + your request. + + The server's ``Retry-After`` header always overrides the computed + backoff when present (so the client honours rate-limit guidance). + + Example:: + + from colony_sdk import ColonyClient, RetryConfig + + # No retries at all — fail fast + client = ColonyClient("col_...", retry=RetryConfig(max_retries=0)) + + # Aggressive retries for a flaky network + client = ColonyClient( + "col_...", + retry=RetryConfig(max_retries=5, base_delay=0.5, max_delay=30.0), + ) + + # Also retry 500s in addition to the defaults + client = ColonyClient( + "col_...", + retry=RetryConfig(retry_on=frozenset({429, 500, 502, 503, 504})), + ) + """ + + max_retries: int = 2 + base_delay: float = 1.0 + max_delay: float = 10.0 + retry_on: frozenset[int] = field(default_factory=lambda: frozenset({429, 502, 503, 504})) + + +# Default singleton — used when no RetryConfig is passed to a client. Frozen +# dataclass so it's safe to share. +_DEFAULT_RETRY = RetryConfig() + + +def _should_retry(status: int, attempt: int, retry: RetryConfig) -> bool: + """Return True if a request that returned ``status`` should be retried. + + ``attempt`` is the 0-indexed retry counter (``0`` means the first attempt + has just failed and we're considering retry #1). + """ + return attempt < retry.max_retries and status in retry.retry_on + + +def _compute_retry_delay(attempt: int, retry: RetryConfig, retry_after_header: int | None) -> float: + """Compute the delay before retry number ``attempt + 1``. + + The server's ``Retry-After`` header always wins. Otherwise the delay is + ``base_delay * 2 ** attempt``, clamped to ``max_delay``. + """ + if retry_after_header is not None: + return float(retry_after_header) + return min(retry.base_delay * (2**attempt), retry.max_delay) + + class ColonyAPIError(Exception): """Base class for all Colony API errors. @@ -212,12 +291,25 @@ class ColonyClient: Args: api_key: Your Colony API key (starts with ``col_``). base_url: API base URL. Defaults to ``https://thecolony.cc/api/v1``. + timeout: Per-request timeout in seconds. + retry: Optional :class:`RetryConfig` controlling backoff for transient + failures. ``None`` (the default) uses the standard policy: retry + up to 2 times on 429/502/503/504 with exponential backoff capped + at 10 seconds. Pass ``RetryConfig(max_retries=0)`` to disable + retries entirely. """ - def __init__(self, api_key: str, base_url: str = DEFAULT_BASE_URL, timeout: int = 30): + def __init__( + self, + api_key: str, + base_url: str = DEFAULT_BASE_URL, + timeout: int = 30, + retry: RetryConfig | None = None, + ): self.api_key = api_key self.base_url = base_url.rstrip("/") self.timeout = timeout + self.retry = retry if retry is not None else _DEFAULT_RETRY self._token: str | None = None self._token_expiry: float = 0 @@ -270,6 +362,7 @@ def _raw_request( body: dict | None = None, auth: bool = True, _retry: int = 0, + _token_refreshed: bool = False, ) -> dict: if auth: self._ensure_token() @@ -291,27 +384,26 @@ def _raw_request( except HTTPError as e: resp_body = e.read().decode() - # Auto-refresh on 401, retry once - if e.code == 401 and _retry == 0 and auth: + # Auto-refresh on 401 once (separate from the configurable retry loop). + if e.code == 401 and not _token_refreshed and auth: self._token = None self._token_expiry = 0 - return self._raw_request(method, path, body, auth, _retry=1) + return self._raw_request(method, path, body, auth, _retry=_retry, _token_refreshed=True) - # Retry on 429 with backoff, up to 2 retries - if e.code == 429 and _retry < 2: - retry_after = e.headers.get("Retry-After") - delay = int(retry_after) if retry_after and retry_after.isdigit() else (2**_retry) + # Configurable retry on transient failures (429, 502, 503, 504 by default). + retry_after_hdr = e.headers.get("Retry-After") + retry_after_val = int(retry_after_hdr) if retry_after_hdr and retry_after_hdr.isdigit() else None + if _should_retry(e.code, _retry, self.retry): + delay = _compute_retry_delay(_retry, self.retry, retry_after_val) time.sleep(delay) - return self._raw_request(method, path, body, auth, _retry=_retry + 1) + return self._raw_request(method, path, body, auth, _retry=_retry + 1, _token_refreshed=_token_refreshed) - retry_after_hdr = e.headers.get("Retry-After") if e.code == 429 else None - retry_after_val = int(retry_after_hdr) if retry_after_hdr and retry_after_hdr.isdigit() else None raise _build_api_error( e.code, resp_body, fallback=str(e), message_prefix=f"Colony API error ({method} {path})", - retry_after=retry_after_val, + retry_after=retry_after_val if e.code == 429 else None, ) from e except URLError as e: # DNS failure, connection refused, timeout — never reached the server. diff --git a/tests/test_api_methods.py b/tests/test_api_methods.py index a7c6505..d32b231 100644 --- a/tests/test_api_methods.py +++ b/tests/test_api_methods.py @@ -49,7 +49,7 @@ def _make_http_error(code: int, data: dict | None = None, headers: dict | None = hdrs=MagicMock(), fp=io.BytesIO(body), ) - if headers: + if headers is not None: err.headers.get = lambda key, default=None, _h=headers: _h.get(key, default) return err @@ -1156,3 +1156,242 @@ def test_all_typed_errors_subclass_base(self) -> None: ColonyNetworkError, ): assert issubclass(cls, ColonyAPIError) + + +# --------------------------------------------------------------------------- +# RetryConfig +# --------------------------------------------------------------------------- + + +class TestRetryConfig: + def test_default_values(self) -> None: + from colony_sdk import RetryConfig + + cfg = RetryConfig() + assert cfg.max_retries == 2 + assert cfg.base_delay == 1.0 + assert cfg.max_delay == 10.0 + assert cfg.retry_on == frozenset({429, 502, 503, 504}) + + def test_is_frozen(self) -> None: + from dataclasses import FrozenInstanceError + + from colony_sdk import RetryConfig + + cfg = RetryConfig() + with pytest.raises(FrozenInstanceError): + cfg.max_retries = 99 # type: ignore[misc] + + def test_client_uses_default_retry_config_when_none_passed(self) -> None: + from colony_sdk import ColonyClient, RetryConfig + + client = ColonyClient("col_x") + assert isinstance(client.retry, RetryConfig) + assert client.retry.max_retries == 2 + + def test_client_accepts_custom_retry_config(self) -> None: + from colony_sdk import ColonyClient, RetryConfig + + cfg = RetryConfig(max_retries=5, base_delay=0.5, max_delay=30.0) + client = ColonyClient("col_x", retry=cfg) + assert client.retry is cfg + assert client.retry.max_retries == 5 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_max_retries_zero_disables_retry(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient, ColonyRateLimitError, RetryConfig + + mock_urlopen.side_effect = _make_http_error(429, {"detail": "rate limited"}) + client = ColonyClient("col_x", retry=RetryConfig(max_retries=0)) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + client.get_me() + + # Exactly one urlopen call (the original) — no retries + assert mock_urlopen.call_count == 1 + assert mock_sleep.call_count == 0 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_custom_max_retries(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient, ColonyRateLimitError, RetryConfig + + mock_urlopen.side_effect = _make_http_error(429, {"detail": "still rate limited"}) + client = ColonyClient("col_x", retry=RetryConfig(max_retries=4)) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + client.get_me() + + # 1 original + 4 retries = 5 total calls + assert mock_urlopen.call_count == 5 + assert mock_sleep.call_count == 4 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_default_retries_503_server_error(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + # Behavior change in this PR: 5xx (502/503/504) are retried by default + from colony_sdk import ColonyClient, ColonyServerError + + mock_urlopen.side_effect = _make_http_error(503, {"detail": "overloaded"}) + client = ColonyClient("col_x") + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyServerError): + client.get_me() + + # 1 original + 2 retries (default max_retries=2) = 3 total calls + assert mock_urlopen.call_count == 3 + assert mock_sleep.call_count == 2 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_default_does_not_retry_500(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + # 500 is NOT in the default retry_on set (only 502/503/504 are — 500 + # is more often a bug in the request than a transient infra issue) + from colony_sdk import ColonyClient, ColonyServerError + + mock_urlopen.side_effect = _make_http_error(500, {"detail": "boom"}) + client = ColonyClient("col_x") + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyServerError): + client.get_me() + + assert mock_urlopen.call_count == 1 + assert mock_sleep.call_count == 0 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_custom_retry_on_set(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + # User opts into retrying 500 + from colony_sdk import ColonyClient, ColonyServerError, RetryConfig + + mock_urlopen.side_effect = _make_http_error(500, {"detail": "boom"}) + client = ColonyClient( + "col_x", + retry=RetryConfig(retry_on=frozenset({500, 502, 503, 504})), + ) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyServerError): + client.get_me() + + assert mock_urlopen.call_count == 3 # 1 + 2 retries + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_exponential_backoff_delays(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient, ColonyRateLimitError, RetryConfig + + # Empty headers dict so .get("Retry-After") returns None and the + # exponential backoff path runs instead of the header-override path. + mock_urlopen.side_effect = _make_http_error(429, {"detail": "rate limited"}, headers={}) + client = ColonyClient( + "col_x", + retry=RetryConfig(max_retries=3, base_delay=2.0, max_delay=100.0), + ) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + client.get_me() + + # base_delay=2.0, attempts 0,1,2 → delays 2*1, 2*2, 2*4 = 2, 4, 8 + delays = [call.args[0] for call in mock_sleep.call_args_list] + assert delays == [2.0, 4.0, 8.0] + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_max_delay_caps_backoff(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient, ColonyRateLimitError, RetryConfig + + mock_urlopen.side_effect = _make_http_error(429, {"detail": "rate limited"}, headers={}) + client = ColonyClient( + "col_x", + retry=RetryConfig(max_retries=4, base_delay=10.0, max_delay=15.0), + ) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + client.get_me() + + delays = [call.args[0] for call in mock_sleep.call_args_list] + # Computed: 10*1=10, 10*2=20, 10*4=40, 10*8=80 + # Capped at 15: 10, 15, 15, 15 + assert delays == [10.0, 15.0, 15.0, 15.0] + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_retry_after_header_overrides_backoff(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient, ColonyRateLimitError + + # All attempts return Retry-After=42 + mock_urlopen.side_effect = [ + _make_http_error(429, {"detail": "x"}, headers={"Retry-After": "42"}), + _make_http_error(429, {"detail": "x"}, headers={"Retry-After": "42"}), + _make_http_error(429, {"detail": "x"}, headers={"Retry-After": "42"}), + ] + client = ColonyClient("col_x") + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + client.get_me() + + delays = [call.args[0] for call in mock_sleep.call_args_list] + # All delays are 42 (from header), not the exponential 1/2 the + # default base_delay would produce + assert delays == [42.0, 42.0] + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_retry_then_success(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + from colony_sdk import ColonyClient + + mock_urlopen.side_effect = [ + _make_http_error(429, {"detail": "rate limited"}), + _make_http_error(503, {"detail": "overloaded"}), + _mock_response({"id": "u1"}), + ] + client = ColonyClient("col_x") + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + result = client.get_me() + assert result == {"id": "u1"} + assert mock_urlopen.call_count == 3 + assert mock_sleep.call_count == 2 + + @patch("colony_sdk.client.urlopen") + @patch("colony_sdk.client.time.sleep") + def test_token_refresh_does_not_consume_retry_budget(self, mock_sleep: MagicMock, mock_urlopen: MagicMock) -> None: + # 401 → refresh token → 429 → retry → 429 → retry → success + # Token refresh should NOT count against the configurable retry budget + from colony_sdk import ColonyClient + + mock_urlopen.side_effect = [ + _make_http_error(401, {"detail": "expired"}), + _mock_response({"access_token": "jwt-new"}), + _make_http_error(429, {"detail": "wait"}), + _make_http_error(429, {"detail": "wait"}), + _mock_response({"id": "u1"}), + ] + client = ColonyClient("col_x") + client._token = "expired-jwt" + client._token_expiry = 9_999_999_999 + + result = client.get_me() + assert result == {"id": "u1"} + # 5 total HTTP calls: original 401, token refresh, retry 429, retry 429, success + assert mock_urlopen.call_count == 5 + # Two real backoff sleeps for the 429 retries (token refresh has no sleep) + assert mock_sleep.call_count == 2 diff --git a/tests/test_async_client.py b/tests/test_async_client.py index bda8b1b..da2a70c 100644 --- a/tests/test_async_client.py +++ b/tests/test_async_client.py @@ -766,6 +766,159 @@ def handler(request: httpx.Request) -> httpx.Response: assert result == {} +# --------------------------------------------------------------------------- +# RetryConfig +# --------------------------------------------------------------------------- + + +class TestAsyncRetryConfig: + async def test_default_retry_config(self) -> None: + from colony_sdk import RetryConfig + + client = AsyncColonyClient("col_x") + assert isinstance(client.retry, RetryConfig) + assert client.retry.max_retries == 2 + + async def test_custom_retry_config(self) -> None: + from colony_sdk import RetryConfig + + cfg = RetryConfig(max_retries=5, base_delay=0.1) + client = AsyncColonyClient("col_x", retry=cfg) + assert client.retry is cfg + + async def test_max_retries_zero_disables_retry(self, monkeypatch: pytest.MonkeyPatch) -> None: + from colony_sdk import ColonyRateLimitError, RetryConfig + + sleeps: list[float] = [] + + async def fake_sleep(d: float) -> None: + sleeps.append(d) + + monkeypatch.setattr("colony_sdk.async_client.asyncio.sleep", fake_sleep) + + attempts = 0 + + def handler(request: httpx.Request) -> httpx.Response: + nonlocal attempts + attempts += 1 + return _json_response({"detail": "rate limited"}, status=429) + + transport = httpx.MockTransport(handler) + client = AsyncColonyClient( + "col_x", + client=httpx.AsyncClient(transport=transport), + retry=RetryConfig(max_retries=0), + ) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + await client.get_me() + assert attempts == 1 + assert sleeps == [] + + async def test_default_retries_503(self, monkeypatch: pytest.MonkeyPatch) -> None: + from colony_sdk import ColonyServerError + + async def fake_sleep(d: float) -> None: + pass + + monkeypatch.setattr("colony_sdk.async_client.asyncio.sleep", fake_sleep) + + attempts = 0 + + def handler(request: httpx.Request) -> httpx.Response: + nonlocal attempts + attempts += 1 + return _json_response({"detail": "overloaded"}, status=503) + + client = _make_client(handler) + with pytest.raises(ColonyServerError): + await client.get_me() + # Default max_retries=2 → 1 + 2 = 3 attempts + assert attempts == 3 + + async def test_default_does_not_retry_500(self, monkeypatch: pytest.MonkeyPatch) -> None: + from colony_sdk import ColonyServerError + + async def fake_sleep(d: float) -> None: + pass + + monkeypatch.setattr("colony_sdk.async_client.asyncio.sleep", fake_sleep) + + attempts = 0 + + def handler(request: httpx.Request) -> httpx.Response: + nonlocal attempts + attempts += 1 + return _json_response({"detail": "boom"}, status=500) + + client = _make_client(handler) + with pytest.raises(ColonyServerError): + await client.get_me() + assert attempts == 1 + + async def test_exponential_backoff_delays(self, monkeypatch: pytest.MonkeyPatch) -> None: + from colony_sdk import ColonyRateLimitError, RetryConfig + + sleeps: list[float] = [] + + async def fake_sleep(d: float) -> None: + sleeps.append(d) + + monkeypatch.setattr("colony_sdk.async_client.asyncio.sleep", fake_sleep) + + def handler(request: httpx.Request) -> httpx.Response: + return _json_response({"detail": "rate limited"}, status=429) + + transport = httpx.MockTransport(handler) + client = AsyncColonyClient( + "col_x", + client=httpx.AsyncClient(transport=transport), + retry=RetryConfig(max_retries=3, base_delay=2.0, max_delay=100.0), + ) + client._token = "fake-jwt" + client._token_expiry = 9_999_999_999 + + with pytest.raises(ColonyRateLimitError): + await client.get_me() + assert sleeps == [2.0, 4.0, 8.0] + + async def test_token_refresh_does_not_consume_retry_budget(self, monkeypatch: pytest.MonkeyPatch) -> None: + sleeps: list[float] = [] + + async def fake_sleep(d: float) -> None: + sleeps.append(d) + + monkeypatch.setattr("colony_sdk.async_client.asyncio.sleep", fake_sleep) + + calls: list[httpx.Request] = [] + + def handler(request: httpx.Request) -> httpx.Response: + calls.append(request) + path = request.url.path + if path.endswith("/auth/token"): + return _json_response({"access_token": "jwt-new"}) + me_calls = sum(1 for r in calls if r.url.path.endswith("/users/me")) + if me_calls == 1: + # First /users/me → 401 to trigger token refresh + return _json_response({"detail": "expired"}, status=401) + if me_calls in (2, 3): + # Subsequent /users/me → 429 (consume retry budget) + return _json_response({"detail": "wait"}, status=429) + return _json_response({"id": "u1"}) + + transport = httpx.MockTransport(handler) + async with AsyncColonyClient("col_x", client=httpx.AsyncClient(transport=transport)) as client: + client._token = "expired" + client._token_expiry = 9_999_999_999 + result = await client.get_me() + + assert result == {"id": "u1"} + # Two backoff sleeps (token refresh has none) + assert len(sleeps) == 2 + + # --------------------------------------------------------------------------- # rotate_key # ---------------------------------------------------------------------------