From c4a04146b036da3cf0829163e1f8d6159f364f35 Mon Sep 17 00:00:00 2001 From: jubick1337 Date: Sat, 12 Jul 2025 12:59:36 -0700 Subject: [PATCH] fix: adjust temperature scaling logic based on engine version Signed-off-by: jubick1337 --- nemo_rl/models/policy/dtensor_policy_worker.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py index df8b4e734f..d3dda33cd9 100644 --- a/nemo_rl/models/policy/dtensor_policy_worker.py +++ b/nemo_rl/models/policy/dtensor_policy_worker.py @@ -563,7 +563,12 @@ def train( "generation" in self.cfg and self.cfg["generation"] is not None ): - logits.div_(self.cfg["generation"]["temperature"]) + # The V1 engine returns raw logits before temperature scaling. + # The V0 engine (when VLLM_USE_V1 is not '1') returns scaled logits. + # Therefore, we only divide if we are NOT using the V1 engine. + use_v1_engine = os.environ.get("VLLM_USE_V1") == "1" + if not use_v1_engine: + logits.div_(self.cfg["generation"]["temperature"]) if self.cp_size > 1: seq_index_dtensor = (