diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 6860cf30d884..032100e7a6e3 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -1186,7 +1186,10 @@ def __init__(self, config: Llama4Config): self.multi_modal_projector = Llama4MultiModalProjector(config) self.language_model = Llama4ForCausalLM(config.text_config) self.vocab_size = config.text_config.vocab_size - self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 + if hasattr(self.config, "pad_token_id"): + self.pad_token_id = self.config.pad_token_id + else: + self.pad_token_id = self.config.text_config.pad_token_id or -1 self.post_init()