diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index dd19189332cf..2cc8dbbbe639 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3748,11 +3748,13 @@ def _fix_key(key): else: _loaded_keys = loaded_keys not_initialized_submodules = set_initialized_submodules(model, _loaded_keys) - # if we're about to tie the output embeds to the input embeds we don't need to init them + # If we're about to tie the output embeds to the input embeds we don't need to init them if hasattr(model.config, "tie_word_embeddings") and model.config.tie_word_embeddings: output_embeddings = model.get_output_embeddings() if output_embeddings is not None: - output_embeddings._is_hf_initialized = True + # Still need to initialize if there is a bias term since biases are not tied. + if not hasattr(output_embeddings, "bias") or output_embeddings.bias is None: + output_embeddings._is_hf_initialized = True else: not_initialized_submodules = dict(model.named_modules()) # This will only initialize submodules that are not marked as initialized by the line above.