diff --git a/src/art/local/backend.py b/src/art/local/backend.py index c9f79e3e8..77817c97a 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -321,6 +321,8 @@ async def _monitor_openai_server( max_tokens=1, timeout=5, ) + # get the completion response, exit the loop + break except Exception as e: # If the server is sleeping, a failed health check is okay if await self._services[model_name].vllm_engine_is_sleeping():