diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index 1483245259..ada0bf2623 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -109,6 +109,8 @@ def configure_worker( init_kwargs["fraction_of_gpus"] = num_gpus env_vars["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" + # Skip vllm P2P check and rely on driver to report peer to peer capability. + env_vars["VLLM_SKIP_P2P_CHECK"] = "1" return resources, env_vars, init_kwargs