diff --git a/src/transformers/models/t5gemma/configuration_t5gemma.py b/src/transformers/models/t5gemma/configuration_t5gemma.py index 76ad99132056..f2820789afd6 100644 --- a/src/transformers/models/t5gemma/configuration_t5gemma.py +++ b/src/transformers/models/t5gemma/configuration_t5gemma.py @@ -299,6 +299,7 @@ def __init__( super().__init__(**kwargs) self.is_encoder_decoder = is_encoder_decoder + self.num_hidden_layers = kwargs.get("num_hidden_layers", decoder.num_hidden_layers) self.use_cache = kwargs.get("use_cache", decoder.use_cache) self.initializer_range = kwargs.get("initializer_range", decoder.initializer_range) self.dropout_rate = dropout_rate diff --git a/src/transformers/models/t5gemma/modular_t5gemma.py b/src/transformers/models/t5gemma/modular_t5gemma.py index d41c8796e5ee..10b746b76ef6 100644 --- a/src/transformers/models/t5gemma/modular_t5gemma.py +++ b/src/transformers/models/t5gemma/modular_t5gemma.py @@ -314,6 +314,7 @@ def __init__( super().__init__(**kwargs) self.is_encoder_decoder = is_encoder_decoder + self.num_hidden_layers = kwargs.get("num_hidden_layers", decoder.num_hidden_layers) self.use_cache = kwargs.get("use_cache", decoder.use_cache) self.initializer_range = kwargs.get("initializer_range", decoder.initializer_range) self.dropout_rate = dropout_rate