diff --git a/src/edge_proxy/health_check/__init__.py b/src/edge_proxy/health_check/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/edge_proxy/health_check/responses.py b/src/edge_proxy/health_check/responses.py new file mode 100644 index 0000000..dd52e59 --- /dev/null +++ b/src/edge_proxy/health_check/responses.py @@ -0,0 +1,31 @@ +import typing +from datetime import datetime +from typing import Optional + +from fastapi.responses import ORJSONResponse +from starlette.background import BackgroundTask + + +class HealthCheckResponse(ORJSONResponse): + def __init__( + self, + status_code: int = 200, + status: str = "ok", + reason: Optional[str] = None, + last_successful_update: Optional[datetime] = None, + headers: typing.Mapping[str, str] | None = None, + media_type: str | None = None, + background: BackgroundTask | None = None, + ): + content = { + "status": status, + "reason": reason, + "last_successful_update": last_successful_update, + } + super().__init__( + status_code=status_code, + content=content, + headers=headers, + media_type=media_type, + background=background, + ) diff --git a/src/edge_proxy/server.py b/src/edge_proxy/server.py index 3c26d4a..5dbd029 100644 --- a/src/edge_proxy/server.py +++ b/src/edge_proxy/server.py @@ -1,5 +1,4 @@ -from contextlib import suppress -from datetime import datetime +from datetime import datetime, timedelta import httpx import structlog @@ -8,6 +7,7 @@ from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import ORJSONResponse +from edge_proxy.health_check.responses import HealthCheckResponse from fastapi_utils.tasks import repeat_every from edge_proxy.cache import LocalMemEnvironmentsCache @@ -41,13 +41,31 @@ async def unknown_key_error(request, exc): @app.get("/health", response_class=ORJSONResponse, deprecated=True) @app.get("/proxy/health", response_class=ORJSONResponse) async def health_check(): - with suppress(TypeError): - last_updated = datetime.now() - environment_service.last_updated_at - buffer = 30 * len(settings.environment_key_pairs) # 30s per environment - if last_updated.total_seconds() <= settings.api_poll_frequency_seconds + buffer: - return ORJSONResponse(status_code=200, content={"status": "ok"}) + last_updated_at = environment_service.last_updated_at + if not last_updated_at: + return HealthCheckResponse( + status_code=500, + status="error", + reason="environment document(s) not updated.", + last_successful_update=None, + ) - return ORJSONResponse(status_code=500, content={"status": "error"}) + if settings.health_check.count_stale_documents_as_failing: + buffer = settings.health_check.grace_period_seconds * len( + settings.environment_key_pairs + ) + threshold = datetime.now() - timedelta( + seconds=settings.api_poll_frequency_seconds + buffer + ) + if last_updated_at < threshold: + return HealthCheckResponse( + status_code=500, + status="error", + reason="environment document(s) stale.", + last_successful_update=last_updated_at, + ) + + return HealthCheckResponse(last_successful_update=last_updated_at) @app.get("/api/v1/flags/", response_class=ORJSONResponse) diff --git a/src/edge_proxy/settings.py b/src/edge_proxy/settings.py index c6c565d..d8e2c1f 100644 --- a/src/edge_proxy/settings.py +++ b/src/edge_proxy/settings.py @@ -100,6 +100,11 @@ class ServerSettings(BaseModel): reload: bool = False +class HealthCheckSettings(BaseModel): + count_stale_documents_as_failing: bool = True + grace_period_seconds: int = 30 + + class AppSettings(BaseModel): environment_key_pairs: list[EnvironmentKeyPair] = Field( default_factory=lambda: [ @@ -128,6 +133,7 @@ class AppSettings(BaseModel): allow_origins: list[str] = Field(default_factory=lambda: ["*"]) logging: LoggingSettings = LoggingSettings() server: ServerSettings = ServerSettings() + health_check: HealthCheckSettings = HealthCheckSettings() class AppConfig(AppSettings, BaseSettings): diff --git a/tests/test_server.py b/tests/test_server.py index 410427c..48c06fa 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -6,6 +6,7 @@ from fastapi.testclient import TestClient from pytest_mock import MockerFixture +from edge_proxy.settings import AppSettings, HealthCheckSettings from tests.fixtures.response_data import environment_1 if typing.TYPE_CHECKING: @@ -30,18 +31,53 @@ def test_health_check_returns_500_if_cache_was_not_updated( ) -> None: response = client.get("/proxy/health") assert response.status_code == 500 - assert response.json() == {"status": "error"} + assert response.json() == { + "status": "error", + "reason": "environment document(s) not updated.", + "last_successful_update": None, + } def test_health_check_returns_500_if_cache_is_stale( mocker: MockerFixture, client: TestClient, ) -> None: + last_updated_at = datetime.now() - timedelta(days=10) mocked_environment_service = mocker.patch("edge_proxy.server.environment_service") - mocked_environment_service.last_updated_at = datetime.now() - timedelta(days=10) + mocked_environment_service.last_updated_at = last_updated_at response = client.get("/proxy/health") assert response.status_code == 500 - assert response.json() == {"status": "error"} + assert response.json() == { + "status": "error", + "reason": "environment document(s) stale.", + "last_successful_update": last_updated_at.isoformat(), + } + + +def test_health_check_returns_200_if_cache_is_stale_and_health_check_configured_correctly( + mocker: MockerFixture, + client: TestClient, +) -> None: + # Given + settings = AppSettings( + health_check=HealthCheckSettings(count_stale_documents_as_failing=False) + ) + mocker.patch("edge_proxy.server.settings", settings) + + last_updated_at = datetime.now() - timedelta(days=10) + mocked_environment_service = mocker.patch("edge_proxy.server.environment_service") + mocked_environment_service.last_updated_at = last_updated_at + + # When + response = client.get("/proxy/health") + + # Then + assert response.status_code == 200 + assert response.json() == { + "status": "ok", + "reason": None, + "last_successful_update": last_updated_at.isoformat(), + } def test_get_flags(