From 6ddd3ca146915190558c4cd98d94df6b1ee6b1c5 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 6 Jan 2026 08:17:04 +0000 Subject: [PATCH] fix: gemma3 27b must now have skip_tokenizer_init=False in vllm Signed-off-by: Terry Kong --- nemo_rl/models/generation/vllm/vllm_worker.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nemo_rl/models/generation/vllm/vllm_worker.py b/nemo_rl/models/generation/vllm/vllm_worker.py index 75e3334d4a..d2de455c22 100644 --- a/nemo_rl/models/generation/vllm/vllm_worker.py +++ b/nemo_rl/models/generation/vllm/vllm_worker.py @@ -388,6 +388,14 @@ def _patch_vllm_vit_flash_attn_backend(): ) # disable quantization vllm_kwargs["hf_overrides"]["quantization_config"] = {} + elif "Gemma3ForConditionalGeneration" in getattr( + hf_config, "architectures", [] + ): + if self.cfg["vllm_cfg"]["skip_tokenizer_init"]: + print( + "Gemma3ForConditionalGeneration models may crash when skip_tokenizer_init is True. NeMo-RL is forcing it to False for this architecture. See https://github.com/NVIDIA-NeMo/RL/issues/1681 for more details." + ) + self.cfg["vllm_cfg"]["skip_tokenizer_init"] = False llm_kwargs = dict( model=self.model_name,