huggingface · gante · Oct 3, 2024 · Sep 20, 2024 · Sep 23, 2024 · Sep 24, 2024
diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@@ -283,28 +283,6 @@ def is_pipeline_test_to_skip(
 
         return False
 
-    # overwrite from GenerationTesterMixin to solve problem
-    # with conflicting random seeds
-    def _get_input_ids_and_config(self, batch_size=2):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        config.attention_type = "original_full"
-
-        input_ids = inputs_dict.pop(self.input_name)
-        _ = inputs_dict.pop("attention_mask", None)
-        _ = inputs_dict.pop("decoder_input_ids", None)
-        _ = inputs_dict.pop("decoder_attention_mask", None)
-        attention_mask = torch.ones_like(input_ids, dtype=torch.long)
-
-        # cut to half length & take max batch_size 3
-        sequence_length = input_ids.shape[-1] // 2
-        input_ids = input_ids[:batch_size, :sequence_length]
-        attention_mask = attention_mask[:batch_size, :sequence_length]
-
-        if config.eos_token_id is not None and config.pad_token_id is None:
-            # hack to allow generate for models such as GPT2 as is done in `generate()`
-            config.pad_token_id = config.eos_token_id
-        return config, input_ids, attention_mask, inputs_dict
-
     def setUp(self):
         self.model_tester = BigBirdPegasusModelTester(self)
         self.config_tester = ConfigTester(self, config_class=BigBirdPegasusConfig)
@@ -485,6 +463,13 @@ def test_for_change_to_full_attn(self):
     def test_load_save_without_tied_weights(self):
         pass
 
+    def test_generate_with_head_masking(self):
+        # overwritten to temporarily switch the attention type to `original_full`
+        original_self_attention_type = self.model_tester.attention_type
+        self.model_tester.attention_type = "original_full"
+        super().test_generate_with_head_masking()
+        self.model_tester.attention_type = original_self_attention_type
+
 
 @require_torch
 @require_sentencepiece

diff --git a/tests/models/chameleon/test_modeling_chameleon.py b/tests/models/chameleon/test_modeling_chameleon.py
@@ -116,7 +116,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         sequence_labels = None
         token_labels = None

diff --git a/tests/models/cohere/test_modeling_cohere.py b/tests/models/cohere/test_modeling_cohere.py
@@ -95,7 +95,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/dac/test_modeling_dac.py b/tests/models/dac/test_modeling_dac.py
@@ -123,7 +123,6 @@ class DacModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     test_headmasking = False
     test_resize_embeddings = False
     pipeline_model_mapping = {"feature-extraction": DacModel} if is_torch_available() else {}
-    input_name = "input_values"
 
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         # model does not have attention and does not support returning hidden states

diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py
@@ -141,7 +141,6 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
     test_headmasking = False
     test_resize_embeddings = False
     pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {}
-    input_name = "input_values"
 
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         # model does not have attention and does not support returning hidden states

diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
@@ -119,7 +119,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py
@@ -106,7 +106,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py
@@ -105,7 +105,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py
@@ -338,13 +338,11 @@ def test_global_attention(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
         self.model_tester.check_global_attention(*config_and_inputs)
 
-    def _get_input_ids_and_config(self, batch_size=2):
-        config, input_ids, attention_mask, inputs_dict = GenerationTesterMixin._get_input_ids_and_config(
-            self, batch_size=batch_size
-        )
+    def prepare_config_and_inputs_for_generate(self, *args, **kwargs):
+        config, inputs_dict = super().prepare_config_and_inputs_for_generate(*args, **kwargs)
         # LED computes attention scores based on mask indices if `is_global`
         inputs_dict.pop("global_attention_mask")
-        return config, input_ids, attention_mask, inputs_dict
+        return config, inputs_dict
 
     # LEDForSequenceClassification does not support inputs_embeds
     def test_inputs_embeds(self):

diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
@@ -112,7 +112,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py
@@ -170,7 +170,6 @@ class MimiModelTest(ModelTesterMixin, unittest.TestCase):
     test_headmasking = False
     test_resize_embeddings = False
     test_torchscript = False
-    input_name = "input_values"
 
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         # model does support returning hidden states

diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
@@ -112,7 +112,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids:

diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
@@ -108,7 +108,7 @@ def prepare_config_and_inputs(self):
 
         input_mask = None
         if self.use_input_mask:
-            input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+            input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device))
 
         token_type_ids = None
         if self.use_token_type_ids: