From 54a3b4a5b5c72adea6cf27f3b126eaa41fa43aca Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 1 Apr 2025 14:50:13 -0700 Subject: [PATCH] feat: use cuda_graph by default for vllm Signed-off-by: Parth Chadha --- nemo_reinforcer/models/generation/vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index edef3c69ae..e22110bf55 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -178,7 +178,8 @@ def __init__( gpu_memory_utilization=self.cfg["vllm_cfg"]["gpu_memory_utilization"], enable_prefix_caching=True, dtype="auto", - enforce_eager=True, + # Use cuda-graph by default for performance, set to True to use eager execution + enforce_eager=False, max_model_len=self.cfg["vllm_cfg"]["max_model_len"], trust_remote_code=True, worker_cls=UpdatableVllmInternalWorker,