diff --git a/src/art/local/backend.py b/src/art/local/backend.py
index c9f79e3e8..77817c97a 100644
--- a/src/art/local/backend.py
+++ b/src/art/local/backend.py
@@ -321,6 +321,8 @@ async def _monitor_openai_server(
                             max_tokens=1,
                             timeout=5,
                         )
+                        # get the completion response, exit the loop
+                        break
                     except Exception as e:
                         # If the server is sleeping, a failed health check is okay
                         if await self._services[model_name].vllm_engine_is_sleeping():