From 49b3b38d99aa177e58b756663600569d054655e1 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Fri, 20 Sep 2024 19:23:10 +0000 Subject: [PATCH 01/15] tmp commit --- tests/generation/test_utils.py | 26 +++++++++++------ .../test_modeling_bigbird_pegasus.py | 29 +++++-------------- tests/models/led/test_modeling_led.py | 1 + .../models/reformer/test_modeling_reformer.py | 17 ++++------- .../test_modeling_seamless_m4t.py | 20 ------------- .../test_modeling_seamless_m4t_v2.py | 20 ------------- .../test_modeling_speech_to_text.py | 12 -------- 7 files changed, 31 insertions(+), 94 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 9754a4b7dcc6..b9d1f649763f 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -99,21 +99,29 @@ class GenerationTesterMixin: def _get_input_ids_and_config(self, batch_size=2): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - # TODO: @raushan or @gante, use `model.main_input_name` as the main input instead of relyinn on `input_ids` + # TODO: @raushan or @gante, use `model.main_input_name` as the main input instead of relying on `input_ids` input_ids = inputs_dict.pop(self.input_name)[:batch_size, :] - inputs_dict.pop("attention_mask", None) - # we don't want encoder-decoder models to start from filled decoder ids - inputs_dict.pop("decoder_input_ids", None) - inputs_dict.pop("decoder_attention_mask", None) - - # we'll set cache use in each test differently - inputs_dict.pop("use_cache", None) + # We don't want a few model inputs in our generalist model input dictionary. + input_keys_to_ignore = [ + # we don't want to mask attention heads + "head_mask", + "decoder_head_mask", + # we don't want encoder-decoder models to start from filled decoder ids + "decoder_input_ids", + "decoder_attention_mask", + # we'll set cache use in each test differently + "use_cache", + # we manually set attention_mask [TODO @joao: we probably want to reuse it when it exists] + "attention_mask", + # model-specific inputs to ignore + # "global_attention_mask", # LED computes attention scores based on mask indices if `is_global` + ] inputs_dict = { k: v[:batch_size, ...] for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) + if k not in input_keys_to_ignore and isinstance(v, torch.Tensor) } if config.eos_token_id is not None and config.pad_token_id is None: # hack to allow generate for models such as GPT2 as is done in `generate()` diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py index 0f28fc2d67b5..eae9ee9fbf58 100644 --- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py +++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py @@ -283,28 +283,6 @@ def is_pipeline_test_to_skip( return False - # overwrite from GenerationTesterMixin to solve problem - # with conflicting random seeds - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.attention_type = "original_full" - - input_ids = inputs_dict.pop(self.input_name) - _ = inputs_dict.pop("attention_mask", None) - _ = inputs_dict.pop("decoder_input_ids", None) - _ = inputs_dict.pop("decoder_attention_mask", None) - attention_mask = torch.ones_like(input_ids, dtype=torch.long) - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - attention_mask = attention_mask[:batch_size, :sequence_length] - - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - config.pad_token_id = config.eos_token_id - return config, input_ids, attention_mask, inputs_dict - def setUp(self): self.model_tester = BigBirdPegasusModelTester(self) self.config_tester = ConfigTester(self, config_class=BigBirdPegasusConfig) @@ -485,6 +463,13 @@ def test_for_change_to_full_attn(self): def test_load_save_without_tied_weights(self): pass + def test_generate_with_head_masking(self): + # overwritten to temporarily switch the attention type to `original_full` + original_self_attention_type = self.model_tester.attention_type + self.model_tester.attention_type = "original_full" + super().test_generate_with_head_masking() + self.model_tester.attention_type = original_self_attention_type + @require_torch @require_sentencepiece diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py index a4d81ab2e1c6..a81bd5c39bea 100644 --- a/tests/models/led/test_modeling_led.py +++ b/tests/models/led/test_modeling_led.py @@ -344,6 +344,7 @@ def _get_input_ids_and_config(self, batch_size=2): ) # LED computes attention scores based on mask indices if `is_global` inputs_dict.pop("global_attention_mask") + # breakpoint() return config, input_ids, attention_mask, inputs_dict # LEDForSequenceClassification does not support inputs_embeds diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 11c2e821975d..7680ab3169f3 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -684,20 +684,15 @@ def _check_hidden_states_for_generate( def test_left_padding_compatibility(self): pass - def _get_input_ids_and_config(self, batch_size=2): + def _get_input_ids_and_config(self, *args, **kwargs): # override because overwise we hit max possible seq length for model (4*8=32) # decreasing the seq_length in tester causes errors for "training_tests", those need exactly max seq length # NOTE: seq_length has to be multiple of 4, otherwise it fails for other tests - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict.pop(self.input_name) - _ = inputs_dict.pop("attention_mask", None) - _ = inputs_dict.pop("decoder_input_ids", None) - _ = inputs_dict.pop("decoder_attention_mask", None) - input_ids = input_ids[:batch_size, :16] - attention_mask = torch.ones_like(input_ids, dtype=torch.long)[:batch_size, :16] - config.eos_token_id = None - config.forced_eos_token_id = None - return config, input_ids, attention_mask, inputs_dict + original_sequence_length = self.model_tester.seq_length + self.model_tester.seq_length = 16 + test_inputs = super()._get_input_ids_and_config(*args, **kwargs) + self.model_tester.seq_length = original_sequence_length + return test_inputs @require_torch diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 79f705785541..5f158e4802f1 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -379,26 +379,6 @@ def test_model_from_pretrained(self): model = SeamlessM4TModel.from_pretrained(model_name) self.assertIsNotNone(model) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict[self.input_name] - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - - # generate max 3 tokens - max_length = input_ids.shape[-1] + 3 - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - - attention_mask = torch.ones(input_ids.shape[:2], dtype=torch.long)[:batch_size, :sequence_length] - - return config, input_ids.float(), attention_mask, max_length - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index 1d11cbb247ca..67c6e4bb0809 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -395,26 +395,6 @@ def test_model_from_pretrained(self): model = SeamlessM4Tv2Model.from_pretrained(model_name) self.assertIsNotNone(model) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict[self.input_name] - - # cut to half length & take max batch_size 3 - sequence_length = input_ids.shape[-1] // 2 - input_ids = input_ids[:batch_size, :sequence_length] - - # generate max 3 tokens - max_length = input_ids.shape[-1] + 3 - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - - attention_mask = torch.ones(input_ids.shape[:2], dtype=torch.long)[:batch_size, :sequence_length] - - return config, input_ids.float(), attention_mask, max_length - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index cef2a6781775..16c3a155c709 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -284,18 +284,6 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest input_name = "input_features" - def _get_input_ids_and_config(self, batch_size=2): - config, input_ids, attention_mask, inputs_dict = GenerationTesterMixin._get_input_ids_and_config(self) - - # `input_ids` is actually `input_features` which is a 3D tensor. - # We must overwrite the mask to make it 2D since the original `_get_input_ids_and_config` creates an - # attention mask of the same shape as `input_ids`. - if len(attention_mask.shape) > 2: - sequence_length = input_ids.shape[1] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long, device=attention_mask.device) - - return config, input_ids, attention_mask, inputs_dict - def setUp(self): self.model_tester = Speech2TextModelTester(self) self.config_tester = ConfigTester(self, config_class=Speech2TextConfig) From 047e6e09fb9551a73a77c7b8a267e59e72e9c737 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Mon, 23 Sep 2024 13:16:31 +0000 Subject: [PATCH 02/15] tmp commit --- tests/generation/test_utils.py | 213 +++++++----------- tests/models/led/test_modeling_led.py | 9 +- .../models/reformer/test_modeling_reformer.py | 4 +- 3 files changed, 92 insertions(+), 134 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index b9d1f649763f..c3294f75b3e9 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -97,49 +97,36 @@ class GenerationTesterMixin: input_name = "input_ids" max_new_tokens = 3 - def _get_input_ids_and_config(self, batch_size=2): + def prepare_config_and_inputs_for_generate(self, batch_size=2): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - # TODO: @raushan or @gante, use `model.main_input_name` as the main input instead of relying on `input_ids` - input_ids = inputs_dict.pop(self.input_name)[:batch_size, :] - # We don't want a few model inputs in our generalist model input dictionary. + # We don't want a few model inputs in our model input dictionary for generation tests input_keys_to_ignore = [ # we don't want to mask attention heads "head_mask", "decoder_head_mask", + "cross_attn_head_mask", # we don't want encoder-decoder models to start from filled decoder ids "decoder_input_ids", "decoder_attention_mask", # we'll set cache use in each test differently "use_cache", - # we manually set attention_mask [TODO @joao: we probably want to reuse it when it exists] - "attention_mask", - # model-specific inputs to ignore - # "global_attention_mask", # LED computes attention scores based on mask indices if `is_global` ] - - inputs_dict = { + filtered_inputs_dict = { k: v[:batch_size, ...] for k, v in inputs_dict.items() - if k not in input_keys_to_ignore and isinstance(v, torch.Tensor) + if k not in input_keys_to_ignore } - if config.eos_token_id is not None and config.pad_token_id is None: - # hack to allow generate for models such as GPT2 as is done in `generate()` - if isinstance(config.eos_token_id, int): - config.eos_token_id = [config.eos_token_id] - config.pad_token_id = config.eos_token_id[0] - if self.has_attentions: - attention_mask = torch.ones_like(input_ids, dtype=torch.long) - else: - attention_mask = None - - # It is important set set the eos_token_id to None to ensure that no sequences - # shorter than `max_length` can be generated + # It is important set `eos_token_id` to `None` to avoid early stopping (would break for length-based checks) + if config.eos_token_id is not None and config.pad_token_id is None: + config.pad_token_id = ( + config.eos_token_id if isinstance(config.eos_token_id, int) else config.eos_token_id[0] + ) config.eos_token_id = None config.forced_eos_token_id = None - return config, input_ids, attention_mask, inputs_dict + return config, filtered_inputs_dict def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = { @@ -198,39 +185,6 @@ def _get_constrained_beam_kwargs(self, num_return_sequences=1): } return beam_kwargs - def _greedy_generate( - self, - model, - input_ids, - attention_mask, - inputs_dict, - output_scores=False, - output_logits=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - use_cache=True, - ): - logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=False, - num_beams=1, - max_new_tokens=self.max_new_tokens, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - output_scores=output_scores, - output_logits=output_logits, - return_dict_in_generate=return_dict_in_generate, - use_cache=use_cache, - **logits_processor_kwargs, - **model_kwargs, - **inputs_dict, - ) - - return output_generate - def _sample_generate( self, model, @@ -448,81 +402,88 @@ def _contrastive_generate( @pytest.mark.generate def test_greedy_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, input_ids=input_ids, attention_mask=attention_mask, inputs_dict=inputs_dict - ) - - if model.config.is_encoder_decoder: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) - else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) - @pytest.mark.generate - def test_greedy_generate_dict_outputs(self): - for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - output_scores=True, - output_logits=True, - output_hidden_states=True, - output_attentions=self.has_attentions, - return_dict_in_generate=True, - use_cache=False, - ) - - if model.config.is_encoder_decoder: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - # Retrocompatibility check - self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput) - else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - # Retrocompatibility check - self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput) - - self._check_outputs(output_generate, input_ids, model.config) - - @pytest.mark.generate - def test_greedy_generate_dict_outputs_use_cache(self): - for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - - if not hasattr(config, "use_cache"): - self.skipTest(reason="This model doesn't support caching") - if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]): - self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes") - - config.is_decoder = True - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - output_scores=True, - output_logits=True, - output_hidden_states=True, - output_attentions=self.has_attentions, - return_dict_in_generate=True, - use_cache=True, - ) + logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) + generate_kwargs = { + "do_sample": False, + "num_beams": 1, + "max_new_tokens": self.max_new_tokens, + "use_cache": True + } + output_generate = model.generate(**inputs_dict, **generate_kwargs, **logits_processor_kwargs) if model.config.is_encoder_decoder: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) - - self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + input_length = inputs_dict[self.input_name].shape[-1] + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_length) + + # @pytest.mark.generate + # def test_greedy_generate_dict_outputs(self): + # for model_class in self.all_generative_model_classes: + # config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + + # model = model_class(config).to(torch_device).eval() + # output_generate = self._greedy_generate( + # model=model, + # input_ids=input_ids, + # attention_mask=attention_mask, + # inputs_dict=inputs_dict, + # output_scores=True, + # output_logits=True, + # output_hidden_states=True, + # output_attentions=self.has_attentions, + # return_dict_in_generate=True, + # use_cache=False, + # ) + + # if model.config.is_encoder_decoder: + # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) + # self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) + # # Retrocompatibility check + # self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput) + # else: + # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + # self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) + # # Retrocompatibility check + # self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput) + + # self._check_outputs(output_generate, input_ids, model.config) + + # @pytest.mark.generate + # def test_greedy_generate_dict_outputs_use_cache(self): + # for model_class in self.all_generative_model_classes: + # config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + + # if not hasattr(config, "use_cache"): + # self.skipTest(reason="This model doesn't support caching") + # if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]): + # self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes") + + # config.is_decoder = True + # model = model_class(config).to(torch_device).eval() + # output_generate = self._greedy_generate( + # model=model, + # input_ids=input_ids, + # attention_mask=attention_mask, + # inputs_dict=inputs_dict, + # output_scores=True, + # output_logits=True, + # output_hidden_states=True, + # output_attentions=self.has_attentions, + # return_dict_in_generate=True, + # use_cache=True, + # ) + + # if model.config.is_encoder_decoder: + # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) + # else: + # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + + # self._check_outputs(output_generate, input_ids, model.config, use_cache=True) @pytest.mark.generate def test_sample_generate(self): diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py index a81bd5c39bea..f1eb2b3929b6 100644 --- a/tests/models/led/test_modeling_led.py +++ b/tests/models/led/test_modeling_led.py @@ -338,14 +338,11 @@ def test_global_attention(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() self.model_tester.check_global_attention(*config_and_inputs) - def _get_input_ids_and_config(self, batch_size=2): - config, input_ids, attention_mask, inputs_dict = GenerationTesterMixin._get_input_ids_and_config( - self, batch_size=batch_size - ) + def prepare_config_and_inputs_for_generate(self, *args, **kwargs): + config, inputs_dict = super().prepare_config_and_inputs_for_generate(*args, **kwargs) # LED computes attention scores based on mask indices if `is_global` inputs_dict.pop("global_attention_mask") - # breakpoint() - return config, input_ids, attention_mask, inputs_dict + return config, inputs_dict # LEDForSequenceClassification does not support inputs_embeds def test_inputs_embeds(self): diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 7680ab3169f3..d837742e9ccd 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -684,13 +684,13 @@ def _check_hidden_states_for_generate( def test_left_padding_compatibility(self): pass - def _get_input_ids_and_config(self, *args, **kwargs): + def prepare_config_and_inputs_for_generate(self, *args, **kwargs): # override because overwise we hit max possible seq length for model (4*8=32) # decreasing the seq_length in tester causes errors for "training_tests", those need exactly max seq length # NOTE: seq_length has to be multiple of 4, otherwise it fails for other tests original_sequence_length = self.model_tester.seq_length self.model_tester.seq_length = 16 - test_inputs = super()._get_input_ids_and_config(*args, **kwargs) + test_inputs = super().prepare_config_and_inputs_for_generate(*args, **kwargs) self.model_tester.seq_length = original_sequence_length return test_inputs From db119a0b17c27a7a238898123e4f53a89fbd46b3 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 24 Sep 2024 18:59:06 +0000 Subject: [PATCH 03/15] tmp commit --- tests/generation/test_utils.py | 212 +++++++++--------- .../test_modeling_speech_to_text.py | 46 +--- tests/models/whisper/test_modeling_whisper.py | 42 ---- 3 files changed, 109 insertions(+), 191 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index c3294f75b3e9..28b33057ded2 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -113,7 +113,7 @@ def prepare_config_and_inputs_for_generate(self, batch_size=2): "use_cache", ] filtered_inputs_dict = { - k: v[:batch_size, ...] + k: v[:batch_size, ...] if isinstance(v, torch.Tensor) else v for k, v in inputs_dict.items() if k not in input_keys_to_ignore } @@ -185,11 +185,37 @@ def _get_constrained_beam_kwargs(self, num_return_sequences=1): } return beam_kwargs + def _greedy_generate( + self, + model, + inputs_dict, + output_scores=False, + output_logits=False, + output_attentions=False, + output_hidden_states=False, + return_dict_in_generate=False, + use_cache=True, + ): + logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) + output_generate = model.generate( + do_sample=False, + num_beams=1, + max_new_tokens=self.max_new_tokens, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + output_scores=output_scores, + output_logits=output_logits, + return_dict_in_generate=return_dict_in_generate, + use_cache=use_cache, + **logits_processor_kwargs, + **inputs_dict, + ) + + return output_generate + def _sample_generate( self, model, - input_ids, - attention_mask, inputs_dict, num_return_sequences, output_scores=False, @@ -201,9 +227,7 @@ def _sample_generate( ): torch.manual_seed(0) logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=True, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=True, num_beams=1, max_new_tokens=self.max_new_tokens, @@ -215,7 +239,6 @@ def _sample_generate( return_dict_in_generate=return_dict_in_generate, use_cache=use_cache, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -224,8 +247,6 @@ def _sample_generate( def _beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -236,9 +257,7 @@ def _beam_search_generate( use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -249,7 +268,6 @@ def _beam_search_generate( use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -258,8 +276,6 @@ def _beam_search_generate( def _beam_sample_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -271,9 +287,7 @@ def _beam_sample_generate( ): torch.manual_seed(0) logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=True, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=True, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -284,7 +298,6 @@ def _beam_sample_generate( use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -293,8 +306,6 @@ def _beam_sample_generate( def _group_beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, beam_kwargs, output_scores=False, @@ -305,9 +316,7 @@ def _group_beam_search_generate( use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -318,7 +327,6 @@ def _group_beam_search_generate( use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -327,8 +335,6 @@ def _group_beam_search_generate( def _constrained_beam_search_generate( self, model, - input_ids, - attention_mask, inputs_dict, constraints, beam_kwargs, @@ -340,9 +346,7 @@ def _constrained_beam_search_generate( use_cache=True, ): logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, max_new_tokens=self.max_new_tokens, output_scores=output_scores, @@ -354,7 +358,6 @@ def _constrained_beam_search_generate( use_cache=use_cache, **beam_kwargs, **logits_processor_kwargs, - **model_kwargs, **inputs_dict, ) @@ -363,8 +366,6 @@ def _constrained_beam_search_generate( def _contrastive_generate( self, model, - input_ids, - attention_mask, inputs_dict, output_scores=False, output_logits=False, @@ -379,9 +380,7 @@ def _contrastive_generate( } logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} output_generate = model.generate( - input_ids, do_sample=False, num_beams=1, max_new_tokens=self.max_new_tokens, @@ -392,7 +391,6 @@ def _contrastive_generate( return_dict_in_generate=return_dict_in_generate, use_cache=use_cache, **logits_processor_kwargs, - **model_kwargs, **contrastive_search_kwargs, **inputs_dict, ) @@ -400,90 +398,81 @@ def _contrastive_generate( return output_generate @pytest.mark.generate - def test_greedy_generate(self): + def test_greedy_generate_foo(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() - - logits_processor_kwargs = self._get_logits_processor_kwargs(do_sample=False, config=model.config) - generate_kwargs = { - "do_sample": False, - "num_beams": 1, - "max_new_tokens": self.max_new_tokens, - "use_cache": True - } - output_generate = model.generate(**inputs_dict, **generate_kwargs, **logits_processor_kwargs) + output_generate = self._greedy_generate(model=model, inputs_dict=inputs_dict) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - input_length = inputs_dict[self.input_name].shape[-1] - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_length) - - # @pytest.mark.generate - # def test_greedy_generate_dict_outputs(self): - # for model_class in self.all_generative_model_classes: - # config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - - # model = model_class(config).to(torch_device).eval() - # output_generate = self._greedy_generate( - # model=model, - # input_ids=input_ids, - # attention_mask=attention_mask, - # inputs_dict=inputs_dict, - # output_scores=True, - # output_logits=True, - # output_hidden_states=True, - # output_attentions=self.has_attentions, - # return_dict_in_generate=True, - # use_cache=False, - # ) - - # if model.config.is_encoder_decoder: - # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) - # self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - # # Retrocompatibility check - # self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput) - # else: - # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) - # self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - # # Retrocompatibility check - # self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput) - - # self._check_outputs(output_generate, input_ids, model.config) - - # @pytest.mark.generate - # def test_greedy_generate_dict_outputs_use_cache(self): - # for model_class in self.all_generative_model_classes: - # config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - - # if not hasattr(config, "use_cache"): - # self.skipTest(reason="This model doesn't support caching") - # if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]): - # self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes") - - # config.is_decoder = True - # model = model_class(config).to(torch_device).eval() - # output_generate = self._greedy_generate( - # model=model, - # input_ids=input_ids, - # attention_mask=attention_mask, - # inputs_dict=inputs_dict, - # output_scores=True, - # output_logits=True, - # output_hidden_states=True, - # output_attentions=self.has_attentions, - # return_dict_in_generate=True, - # use_cache=True, - # ) - - # if model.config.is_encoder_decoder: - # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) - # else: - # self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) - - # self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) + + @pytest.mark.generate + def test_greedy_generate_dict_outputs_foo(self): # <----------------------------- fix me + for model_class in self.all_generative_model_classes: + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] + + model = model_class(config).to(torch_device).eval() + output_generate = self._greedy_generate( + model=model, + inputs_dict=inputs_dict, + output_scores=True, + output_logits=True, + output_hidden_states=True, + output_attentions=self.has_attentions, + return_dict_in_generate=True, + use_cache=False, + ) + + if model.config.is_encoder_decoder: + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) + self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) + # Retrocompatibility check + self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput) + else: + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) + self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) + # Retrocompatibility check + self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput) + + self._check_outputs(output_generate, main_input, model.config) + + @pytest.mark.generate + def test_greedy_generate_dict_outputs_use_cache(self): + for model_class in self.all_generative_model_classes: + config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + + if not hasattr(config, "use_cache"): + self.skipTest(reason="This model doesn't support caching") + if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]): + self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes") + + config.is_decoder = True + model = model_class(config).to(torch_device).eval() + output_generate = self._greedy_generate( + model=model, + input_ids=input_ids, + attention_mask=attention_mask, + inputs_dict=inputs_dict, + output_scores=True, + output_logits=True, + output_hidden_states=True, + output_attentions=self.has_attentions, + return_dict_in_generate=True, + use_cache=True, + ) + + if model.config.is_encoder_decoder: + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) + else: + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + + self._check_outputs(output_generate, input_ids, model.config, use_cache=True) @pytest.mark.generate def test_sample_generate(self): @@ -2057,8 +2046,9 @@ def test_inherits_generation_mixin(self): for model_class in self.all_generative_model_classes: self.assertTrue("GenerationMixin" in str(model_class.__bases__)) - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, seq_length = input_ids.shape + def _check_outputs(self, output, main_input, config, use_cache=False, num_return_sequences=1): + batch_size = main_input.shape[0] + seq_length = main_input.shape[-1] config = config.text_config if hasattr(config, "text_config") else config num_sequences_in_output = batch_size * num_return_sequences @@ -2066,6 +2056,10 @@ def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_ output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length ) + # in some models we subsample the sequence length in inner layers + if hasattr(self.model_tester, "get_subsampled_output_lengths"): + seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) + # scores self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 16c3a155c709..76c224f9b498 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -620,46 +620,12 @@ def test_resize_embeddings_untied(self): def test_generate_without_input_ids(self): pass - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, seq_length = input_ids.shape[:2] - subsampled_seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) - num_sequences_in_output = batch_size * num_return_sequences - gen_len = ( - output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length - ) - - # scores - self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) - - # Attentions - # encoder - self._check_encoder_attention_for_generate( - output.encoder_attentions, batch_size, config, subsampled_seq_length - ) - # decoder - self._check_attentions_for_generate( - num_sequences_in_output, - output.decoder_attentions, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - - # Hidden States - # encoder - self._check_encoder_hidden_states_for_generate( - output.encoder_hidden_states, batch_size, config, subsampled_seq_length - ) - - # decoder - self._check_hidden_states_for_generate( - num_sequences_in_output, - output.decoder_hidden_states, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, + def _check_outputs(self, output, main_input, config, use_cache=False, num_return_sequences=1): + # In this model, the index of `batch_size` and `sequence_length`` in `main_input` is different: they are the + # first two dimensions of the tensor. + main_input = main_input[:, :, 0] + super()._check_outputs( + output, main_input, config, use_cache=use_cache, num_return_sequences=num_return_sequences ) def _create_and_check_torchscript(self, config, inputs_dict): diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index b4e71ca72e56..bbec3f3e607d 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -868,48 +868,6 @@ def test_resize_embeddings_untied(self): def test_generate_without_input_ids(self): pass - def _check_outputs(self, output, input_ids, config, use_cache=False, num_return_sequences=1): - batch_size, mel, seq_length = input_ids.shape - subsampled_seq_length = self.model_tester.get_subsampled_output_lengths(seq_length) - num_sequences_in_output = batch_size * num_return_sequences - gen_len = ( - output.sequences.shape[-1] - 1 if config.is_encoder_decoder else output.sequences.shape[-1] - seq_length - ) - - # scores - self._check_scores(num_sequences_in_output, output.scores, length=gen_len, config=config) - - # Attentions - # encoder - self._check_encoder_attention_for_generate( - output.encoder_attentions, batch_size, config, subsampled_seq_length - ) - # decoder - self._check_attentions_for_generate( - num_sequences_in_output, - output.decoder_attentions, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - - # Hidden States - # encoder - self._check_encoder_hidden_states_for_generate( - output.encoder_hidden_states, batch_size, config, subsampled_seq_length - ) - - # decoder - self._check_hidden_states_for_generate( - num_sequences_in_output, - output.decoder_hidden_states, - min_length=1, - max_length=output.sequences.shape[-1], - config=config, - use_cache=use_cache, - ) - @require_flash_attn @require_torch_gpu @pytest.mark.flash_attn_test From 7b5e2aaf39b5e61a54e1228434811e2aefa0e15a Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 08:44:46 +0000 Subject: [PATCH 04/15] fix greedy tests --- tests/generation/test_utils.py | 13 +- .../models/musicgen/test_modeling_musicgen.py | 119 +++--------------- .../test_modeling_musicgen_melody.py | 117 +++-------------- 3 files changed, 34 insertions(+), 215 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 28b33057ded2..172956a6b668 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -398,7 +398,7 @@ def _contrastive_generate( return output_generate @pytest.mark.generate - def test_greedy_generate_foo(self): + def test_greedy_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() main_input = inputs_dict[self.input_name] @@ -412,7 +412,7 @@ def test_greedy_generate_foo(self): self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate - def test_greedy_generate_dict_outputs_foo(self): # <----------------------------- fix me + def test_greedy_generate_dict_outputs(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() main_input = inputs_dict[self.input_name] @@ -445,7 +445,8 @@ def test_greedy_generate_dict_outputs_foo(self): # <--------------------------- @pytest.mark.generate def test_greedy_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] if not hasattr(config, "use_cache"): self.skipTest(reason="This model doesn't support caching") @@ -456,8 +457,6 @@ def test_greedy_generate_dict_outputs_use_cache(self): model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, output_scores=True, output_logits=True, @@ -470,9 +469,9 @@ def test_greedy_generate_dict_outputs_use_cache(self): if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) - self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + self._check_outputs(output_generate, main_input, model.config, use_cache=True) @pytest.mark.generate def test_sample_generate(self): diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index a385a18b91c5..e481f1580209 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -61,7 +61,6 @@ set_seed, ) from transformers.generation import ( - GenerateDecoderOnlyOutput, GenerateEncoderDecoderOutput, ) @@ -124,6 +123,7 @@ def __init__( pad_token_id=99, bos_token_id=99, num_codebooks=4, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -141,6 +141,7 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id self.num_codebooks = num_codebooks + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size * self.num_codebooks, self.seq_length], self.vocab_size) @@ -166,6 +167,7 @@ def get_config(self): bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) return config @@ -305,24 +307,10 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): return logits_processor_kwargs def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() - config.audio_channels = 2 - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - inputs_dict={}, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @require_flash_attn @require_torch_gpu @@ -998,6 +986,7 @@ def __init__( num_codebooks=4, num_filters=4, codebook_size=128, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -1017,6 +1006,7 @@ def __init__( self.num_codebooks = num_codebooks self.num_filters = num_filters self.codebook_size = codebook_size + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) @@ -1052,6 +1042,7 @@ def get_config(self): bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) config = MusicgenConfig.from_sub_models_config(text_encoder_config, audio_encoder_config, decoder_config) return config @@ -1426,34 +1417,6 @@ def _get_input_ids_and_config(self, batch_size=2): return config, input_ids, attention_mask - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are - # different modalities -> different shapes) - def _greedy_generate( - self, - model, - input_ids, - attention_mask, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=False, - num_beams=1, - max_new_tokens=self.max_new_tokens, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - output_scores=output_scores, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are # different modalities -> different shapes) def _sample_generate( @@ -1489,46 +1452,6 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_greedy_generate_dict_outputs(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - self.assertNotIn(config.pad_token_id, output_generate) - - def test_greedy_generate_dict_outputs_use_cache(self): - for model_class in self.greedy_sample_model_classes: - # enable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - - config.use_cache = True - config.is_decoder = True - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - def test_sample_generate(self): for model_class in self.greedy_sample_model_classes: config, input_ids, attention_mask = self._get_input_ids_and_config() @@ -1595,24 +1518,10 @@ def test_generate_fp16(self): ) def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.audio_channels = 2 - - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @unittest.skip( reason="MusicgenModel is actually not the base of MusicgenForCausalLM as the latter is a composit model" diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index e8584e238d3c..cb8bef35b894 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -124,6 +124,7 @@ def __init__( bos_token_id=99, num_codebooks=4, conditional_seq_length=4, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -143,6 +144,7 @@ def __init__( self.num_codebooks = num_codebooks self.conditional_seq_length = conditional_seq_length self.encoder_seq_length = conditional_seq_length + seq_length + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size * self.num_codebooks, self.seq_length], self.vocab_size) @@ -168,6 +170,7 @@ def get_config(self): bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) return config @@ -308,23 +311,10 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): return logits_processor_kwargs def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask, _ = self._get_input_ids_and_config() - config.audio_channels = 2 - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - inputs_dict={}, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @require_flash_attn @require_torch_gpu @@ -996,6 +986,7 @@ def __init__( codebook_size=128, conditional_seq_length=3, chroma_length=24, + audio_channels=1, ): self.parent = parent self.batch_size = batch_size @@ -1018,6 +1009,7 @@ def __init__( self.conditional_seq_length = conditional_seq_length self.chroma_length = chroma_length self.encoder_seq_length = conditional_seq_length + seq_length + self.audio_channels = audio_channels def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.conditional_seq_length], self.vocab_size) @@ -1053,6 +1045,7 @@ def get_config(self): bos_token_id=self.bos_token_id, num_codebooks=self.num_codebooks, tie_word_embeddings=False, + audio_channels=self.audio_channels, ) config = MusicgenMelodyConfig.from_sub_models_config( text_encoder_config, audio_encoder_config, decoder_config, chroma_length=self.chroma_length @@ -1410,34 +1403,6 @@ def _get_input_ids_and_config(self, batch_size=2): return config, input_ids, attention_mask - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are - # different modalities -> different shapes) - def _greedy_generate( - self, - model, - input_ids, - attention_mask, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=False, - num_beams=1, - max_new_tokens=self.max_new_tokens, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - output_scores=output_scores, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are # different modalities -> different shapes) def _sample_generate( @@ -1473,46 +1438,6 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_greedy_generate_dict_outputs(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) - - def test_greedy_generate_dict_outputs_use_cache(self): - for model_class in self.greedy_sample_model_classes: - # enable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - - config.use_cache = True - config.is_decoder = True - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - def test_sample_generate(self): for model_class in self.greedy_sample_model_classes: config, input_ids, attention_mask = self._get_input_ids_and_config() @@ -1579,24 +1504,10 @@ def test_generate_fp16(self): ) def test_greedy_generate_stereo_outputs(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.audio_channels = 2 - - model = model_class(config).to(torch_device).eval() - output_generate = self._greedy_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - - self.assertNotIn(config.pad_token_id, output_generate) + original_audio_channels = self.model_tester.audio_channels + self.model_tester.audio_channels = 2 + super().test_greedy_generate_dict_outputs() + self.model_tester.audio_channels = original_audio_channels @unittest.skip( reason="MusicgenMelodyModel is actually not the base of MusicgenMelodyForCausalLM as the latter is a composit model" From 9c613f4343c1c640f149d576af4ed460202f5c5b Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 12:33:00 +0000 Subject: [PATCH 05/15] up to beam search --- tests/generation/test_utils.py | 70 +++++--------- .../models/musicgen/test_modeling_musicgen.py | 96 +------------------ .../test_modeling_musicgen_melody.py | 96 +------------------ tests/models/t5/test_modeling_tf_t5.py | 4 +- 4 files changed, 30 insertions(+), 236 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 172956a6b668..820d0ee19459 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -463,7 +463,7 @@ def test_greedy_generate_dict_outputs_use_cache(self): output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: @@ -476,32 +476,26 @@ def test_greedy_generate_dict_outputs_use_cache(self): @pytest.mark.generate def test_sample_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() - output_generate = self._sample_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - num_return_sequences=1, - ) + output_generate = self._sample_generate(model=model, inputs_dict=inputs_dict, num_return_sequences=1) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() output_generate = self._sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, num_return_sequences=2, output_scores=True, @@ -518,45 +512,39 @@ def test_sample_generate_dict_output(self): # Retrocompatibility check self.assertIsInstance(output_generate, SampleEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, SampleDecoderOnlyOutput) - self._check_outputs(output_generate, input_ids, model.config, num_return_sequences=2) + self._check_outputs(output_generate, main_input, model.config, num_return_sequences=2) @pytest.mark.generate def test_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() - output_generate = self._beam_search_generate( - model=model, - input_ids=input_ids, - attention_mask=attention_mask, - inputs_dict=inputs_dict, - beam_kwargs=beam_kwargs, - ) + output_generate = self._beam_search_generate(model=model, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -572,20 +560,20 @@ def test_beam_search_generate_dict_output(self): # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate def test_beam_search_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: - # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] if not hasattr(config, "use_cache"): self.skipTest(reason="This model doesn't support caching") @@ -599,8 +587,6 @@ def test_beam_search_generate_dict_outputs_use_cache(self): model = model_class(config).to(torch_device).eval() output_generate = self._beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -608,16 +594,16 @@ def test_beam_search_generate_dict_outputs_use_cache(self): output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self._check_outputs( - output_generate, input_ids, model.config, use_cache=True, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, use_cache=True, num_return_sequences=beam_kwargs["num_beams"] ) @require_accelerate @@ -1007,7 +993,7 @@ def test_contrastive_generate_low_memory(self): if any(model_name in model_class.__name__.lower() for model_name in ["gptbigcode"]): self.skipTest(reason="TODO: fix me") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate() # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1019,23 +1005,19 @@ def test_contrastive_generate_low_memory(self): model = model_class(config).to(torch_device).eval() low_output = model.generate( - input_ids, top_k=4, penalty_alpha=0.6, low_memory=True, max_new_tokens=self.max_new_tokens, - attention_mask=attention_mask, **inputs_dict, use_cache=True, ) high_output = model.generate( - input_ids, top_k=4, penalty_alpha=0.6, low_memory=False, max_new_tokens=self.max_new_tokens, - attention_mask=attention_mask, **inputs_dict, use_cache=True, ) @@ -1061,7 +1043,7 @@ def test_beam_search_low_memory(self): ] ): self.skipTest(reason="May fix in the future: need model-specific fixes") - config, input_ids, _, _ = self._get_input_ids_and_config(batch_size=2) + config, inputs_dict = self.prepare_config_and_inputs_for_generate() # batch_size=1 is ok, but batch_size>1 will cause non-identical output config.use_cache = True @@ -1071,7 +1053,7 @@ def test_beam_search_low_memory(self): model = model_class(config).to(torch_device).eval() low_output = model.generate( - input_ids, + **inputs_dict, max_new_tokens=8, num_beams=5, early_stopping=True, @@ -1080,7 +1062,7 @@ def test_beam_search_low_memory(self): ) high_output = model.generate( - input_ids, + **inputs_dict, max_new_tokens=8, num_beams=5, early_stopping=True, diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index e481f1580209..859a4afaa7fa 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -284,24 +284,6 @@ def test_tie_model_weights(self): def test_tied_weights_keys(self): pass - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - _ = inputs_dict.pop("attention_mask", None) - inputs_dict = { - k: v[:batch_size, ...] - for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) - } - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[: batch_size * config.num_codebooks, :] - - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - return config, input_ids, attention_mask, inputs_dict - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs @@ -1406,88 +1388,12 @@ def test_model_get_set_embeddings(self): lm_heads = model.get_output_embeddings() self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear)) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[:batch_size, :] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - - return config, input_ids, attention_mask - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are - # different modalities -> different shapes) - def _sample_generate( - self, - model, - input_ids, - attention_mask, - num_return_sequences, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - torch.manual_seed(0) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=True, - num_beams=1, - max_new_tokens=self.max_new_tokens, - num_return_sequences=num_return_sequences, - output_scores=output_scores, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_sample_generate(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - model = model_class(config).to(torch_device).eval() - - # check `generate()` and `sample()` are equal - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=1, - ) - self.assertIsInstance(output_generate, torch.Tensor) - - def test_sample_generate_dict_output(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=3, - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput) - def test_generate_without_input_ids(self): - config, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() # if no bos token id => cannot generate from None if config.bos_token_id is None: diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index cb8bef35b894..e509062e57ee 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -288,24 +288,6 @@ def test_tie_model_weights(self): def test_tied_weights_keys(self): pass - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - _ = inputs_dict.pop("attention_mask", None) - inputs_dict = { - k: v[:batch_size, ...] - for k, v in inputs_dict.items() - if "head_mask" not in k and isinstance(v, torch.Tensor) - } - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[: batch_size * config.num_codebooks, :] - - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - return config, input_ids, attention_mask, inputs_dict - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs @@ -1392,88 +1374,12 @@ def test_model_get_set_embeddings(self): lm_heads = model.get_output_embeddings() self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear)) - def _get_input_ids_and_config(self, batch_size=2): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - input_ids = inputs_dict["input_ids"] - - # take max batch_size - sequence_length = input_ids.shape[-1] - input_ids = input_ids[:batch_size, :] - attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long) - - return config, input_ids, attention_mask - - # override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are - # different modalities -> different shapes) - def _sample_generate( - self, - model, - input_ids, - attention_mask, - num_return_sequences, - output_scores=False, - output_attentions=False, - output_hidden_states=False, - return_dict_in_generate=False, - ): - torch.manual_seed(0) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_generate = model.generate( - input_ids, - do_sample=True, - num_beams=1, - max_new_tokens=self.max_new_tokens, - num_return_sequences=num_return_sequences, - output_scores=output_scores, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict_in_generate=return_dict_in_generate, - remove_invalid_values=True, - **model_kwargs, - ) - - return output_generate - def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_sample_generate(self): - for model_class in self.greedy_sample_model_classes: - config, input_ids, attention_mask = self._get_input_ids_and_config() - model = model_class(config).to(torch_device).eval() - - # check `generate()` and `sample()` are equal - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=1, - ) - self.assertIsInstance(output_generate, torch.Tensor) - - def test_sample_generate_dict_output(self): - for model_class in self.greedy_sample_model_classes: - # disable cache - config, input_ids, attention_mask = self._get_input_ids_and_config() - config.use_cache = False - model = model_class(config).to(torch_device).eval() - - output_generate = self._sample_generate( - model=model, - input_ids=input_ids.to(torch_device), - attention_mask=attention_mask.to(torch_device), - num_return_sequences=3, - output_scores=True, - output_hidden_states=True, - output_attentions=True, - return_dict_in_generate=True, - ) - - self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput) - def test_generate_without_input_ids(self): - config, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() # if no bos token id => cannot generate from None if config.bos_token_id is None: diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index d7b6fd84d5fd..037f1b1e2188 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -470,7 +470,7 @@ def test_greedy_xla_generate_simple(self): self.assertListEqual(expected_output_string, output_strings_xla) @slow - def test_greedy_generate(self): + def test_t5_greedy_generate(self): model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small") tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small") @@ -520,7 +520,7 @@ def test_sample_xla_generate_simple(self): self.assertListEqual(expected_output_string_xla, output_strings_xla) @slow - def test_sample_generate(self): + def test_t5_sample_generate(self): model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small") tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small") From d274cdab3fdfa9c153f77816faebf0de5d9edcd4 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 12:57:58 +0000 Subject: [PATCH 06/15] let's see what breaks --- tests/generation/test_utils.py | 206 ++++++++---------- .../models/musicgen/test_modeling_musicgen.py | 3 - .../test_modeling_musicgen_melody.py | 3 - 3 files changed, 90 insertions(+), 122 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 820d0ee19459..1385cc3e4400 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -603,7 +603,11 @@ def test_beam_search_generate_dict_outputs_use_cache(self): self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self._check_outputs( - output_generate, main_input, model.config, use_cache=True, num_return_sequences=beam_kwargs["num_beams"] + output_generate, + main_input, + model.config, + use_cache=True, + num_return_sequences=beam_kwargs["num_beams"], ) @require_accelerate @@ -617,7 +621,7 @@ def test_model_parallel_beam_search(self): if model_class._no_split_modules is None: continue - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() model = model_class(config).eval() with tempfile.TemporaryDirectory() as tmp_dir: @@ -625,8 +629,6 @@ def test_model_parallel_beam_search(self): new_model = model_class.from_pretrained(tmp_dir, device_map="auto") new_model.generate( - input_ids, - attention_mask=attention_mask, max_new_tokens=self.max_new_tokens, num_beams=2, **inputs_dict, @@ -635,14 +637,13 @@ def test_model_parallel_beam_search(self): @pytest.mark.generate def test_beam_sample_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) @@ -650,7 +651,7 @@ def test_beam_sample_generate(self): if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) # for VLMs inputs embeds won't match input ids unless images are encoded and merged with ids properly # no quick fix available, since obtaining image embeddings step is very model-specific @@ -664,12 +665,11 @@ def test_beam_sample_generate(self): "inputs_embeds" in prepare_inputs_for_generation_args and "cache_positions" in prepare_inputs_for_generation_args ): - input_embeds = model.get_input_embeddings()(input_ids) + input_embeds = model.get_input_embeddings()(inputs_dict["input_ids"]) beam_kwargs.update({"inputs_embeds": input_embeds}) output_generate2 = self._beam_sample_generate( model=model, input_ids=None, - attention_mask=attention_mask, inputs_dict={}, beam_kwargs=beam_kwargs, ) @@ -679,15 +679,14 @@ def test_beam_sample_generate(self): @pytest.mark.generate def test_beam_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() output_generate = self._beam_sample_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -704,18 +703,18 @@ def test_beam_sample_generate_dict_output(self): # Retrocompatibility check self.assertIsInstance(output_generate, BeamSampleEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSampleDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate def test_generate_without_input_ids(self): - config, _, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() # if no bos token id => cannot generate from None if config.bos_token_id is None: @@ -737,49 +736,45 @@ def test_generate_without_input_ids(self): @pytest.mark.generate def test_group_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() # check `generate()` and `group_beam_search()` are equal beam_kwargs = self._get_diverse_beam_kwargs() output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) # check `group_beam_search` for higher than 1 `num_return_sequences` num_return_sequences = 2 beam_kwargs = self._get_diverse_beam_kwargs(num_return_sequences=num_return_sequences) output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_group_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_diverse_beam_kwargs() output_generate = self._group_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, beam_kwargs=beam_kwargs, output_scores=True, @@ -795,21 +790,22 @@ def test_group_beam_search_generate_dict_output(self): # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) - # TODO: @gante + # TODO: @gante check why it is flaky @is_flaky() @pytest.mark.generate def test_constrained_beam_search_generate(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() @@ -825,8 +821,6 @@ def test_constrained_beam_search_generate(self): beam_kwargs = self._get_constrained_beam_kwargs() output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -835,7 +829,7 @@ def test_constrained_beam_search_generate(self): if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) for generation_output in output_generate: self._check_sequence_inside_sequence(force_tokens, generation_output) @@ -851,8 +845,6 @@ def test_constrained_beam_search_generate(self): output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -861,7 +853,7 @@ def test_constrained_beam_search_generate(self): if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) for generation_output in output_generate: self._check_sequence_inside_sequence(force_tokens, generation_output) @@ -869,7 +861,8 @@ def test_constrained_beam_search_generate(self): @pytest.mark.generate def test_constrained_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] model = model_class(config).to(torch_device).eval() @@ -884,8 +877,6 @@ def test_constrained_beam_search_generate_dict_output(self): beam_kwargs = self._get_constrained_beam_kwargs() output_generate = self._constrained_beam_search_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, constraints=constraints, beam_kwargs=beam_kwargs, @@ -903,13 +894,13 @@ def test_constrained_beam_search_generate_dict_output(self): # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput) # Retrocompatibility check self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput) self._check_outputs( - output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"] + output_generate, main_input, model.config, num_return_sequences=beam_kwargs["num_beams"] ) @pytest.mark.generate @@ -922,7 +913,8 @@ def test_contrastive_generate(self): if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]): self.skipTest(reason="Won't fix: old model with different cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -933,15 +925,13 @@ def test_contrastive_generate(self): model = model_class(config).to(torch_device).eval() output_generate = self._contrastive_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, use_cache=True, ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + main_input.shape[-1]) @pytest.mark.generate def test_contrastive_generate_dict_outputs_use_cache(self): @@ -953,7 +943,8 @@ def test_contrastive_generate_dict_outputs_use_cache(self): if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]): self.skipTest(reason="Won't fix: old model with different cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -963,8 +954,6 @@ def test_contrastive_generate_dict_outputs_use_cache(self): model = model_class(config).to(torch_device).eval() output_generate = self._contrastive_generate( model=model, - input_ids=input_ids, - attention_mask=attention_mask, inputs_dict=inputs_dict, output_scores=True, output_logits=True, @@ -977,9 +966,9 @@ def test_contrastive_generate_dict_outputs_use_cache(self): if model.config.is_encoder_decoder: self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + 1) else: - self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1]) + self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + main_input.shape[-1]) - self._check_outputs(output_generate, input_ids, model.config, use_cache=True) + self._check_outputs(output_generate, main_input, model.config, use_cache=True) @pytest.mark.generate def test_contrastive_generate_low_memory(self): @@ -1043,6 +1032,7 @@ def test_beam_search_low_memory(self): ] ): self.skipTest(reason="May fix in the future: need model-specific fixes") + config, inputs_dict = self.prepare_config_and_inputs_for_generate() # batch_size=1 is ok, but batch_size>1 will cause non-identical output @@ -1107,7 +1097,8 @@ def test_assisted_decoding_matches_greedy_search(self, assistant_type): self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[self.input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1133,9 +1124,7 @@ def test_assisted_decoding_matches_greedy_search(self, assistant_type): "return_dict_in_generate": True, "use_cache": True, } - output_greedy = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_greedy = model.generate(**generation_kwargs, **inputs_dict) # test with the same assistant model or randomly init one # in the first case all candidate tokens are accepted, in the second none is accepted @@ -1147,15 +1136,13 @@ def test_assisted_decoding_matches_greedy_search(self, assistant_type): assistant_model.generation_config.num_assistant_tokens = 2 # see b) assistant_model.generation_config.num_assistant_tokens_schedule = "constant" # see b) generation_kwargs.update({"assistant_model": assistant_model}) - output_assisted = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_assisted = model.generate(**generation_kwargs, **inputs_dict) # The two outputs must match and their shape must be as expected self.assertListEqual(output_greedy.sequences.tolist(), output_assisted.sequences.tolist()) for output in (output_greedy, output_assisted): - self._check_outputs(output, input_ids, model.config, use_cache=True) + self._check_outputs(output, main_input, model.config, use_cache=True) @is_flaky() @pytest.mark.generate @@ -1184,7 +1171,8 @@ def test_prompt_lookup_decoding_matches_greedy_search(self): self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[self.input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1211,20 +1199,16 @@ def test_prompt_lookup_decoding_matches_greedy_search(self): "use_cache": True, } - output_greedy = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_greedy = model.generate(**generation_kwargs, **inputs_dict) generation_kwargs.update({"prompt_lookup_num_tokens": 2}) # see b) - output_prompt_lookup = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_prompt_lookup = model.generate(**generation_kwargs, **inputs_dict) # The two outputs must match and their shape must be as expected self.assertListEqual(output_greedy.sequences.tolist(), output_prompt_lookup.sequences.tolist()) for output in (output_greedy, output_prompt_lookup): - self._check_outputs(output, input_ids, model.config, use_cache=True) + self._check_outputs(output, main_input, model.config, use_cache=True) @pytest.mark.generate def test_dola_decoding_sample(self): @@ -1240,7 +1224,8 @@ def test_dola_decoding_sample(self): self.skipTest("DoLa is not supported for models that don't return layerwise hidden states") # enable cache if the model is not openai-gpt, xlnet, cpm, or xlm - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] # Encoder-decoder models are not supported if config.is_encoder_decoder: @@ -1267,9 +1252,8 @@ def test_dola_decoding_sample(self): "use_cache": hasattr(config, "use_cache"), # Some models don't support the cache } generation_kwargs.update({"dola_layers": "low"}) - model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {} - output_dola = model.generate(input_ids, **model_kwargs, **generation_kwargs, **inputs_dict) - self._check_outputs(output_dola, input_ids, model.config, use_cache=hasattr(config, "use_cache")) + output_dola = model.generate(**generation_kwargs, **inputs_dict) + self._check_outputs(output_dola, main_input, model.config, use_cache=hasattr(config, "use_cache")) @pytest.mark.generate def test_assisted_decoding_sample(self): @@ -1297,7 +1281,8 @@ def test_assisted_decoding_sample(self): self.skipTest(reason="May fix in the future: need model-specific fixes") # enable cache - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) + main_input = inputs_dict[self.input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1327,11 +1312,9 @@ def test_assisted_decoding_sample(self): "return_dict_in_generate": True, "use_cache": True, } - output_assisted = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + output_assisted = model.generate(**generation_kwargs, **inputs_dict) - self._check_outputs(output_assisted, input_ids, config, use_cache=True) + self._check_outputs(output_assisted, main_input, config, use_cache=True) @pytest.mark.generate def test_prompt_lookup_decoding_stops_at_eos(self): @@ -1367,7 +1350,8 @@ def test_generate_with_head_masking(self): """Test designed for encoder-decoder models to ensure the attention head masking is used.""" attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"] for model_class in self.all_generative_model_classes: - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + # We want to test only encoder-decoder models if not config.is_encoder_decoder: continue @@ -1390,8 +1374,6 @@ def test_generate_with_head_masking(self): for attn_name, (name, mask) in zip(attention_names, head_masking.items()): out = model.generate( - input_ids, - attention_mask=attention_mask, num_beams=1, output_attentions=self.has_attentions, return_dict_in_generate=True, @@ -1420,7 +1402,7 @@ def test_left_padding_compatibility(self): # - The model must be a decoder-only architecture (encoder-based architectures use right-padding) decoder_only_classes = [] for model_class in self.all_generative_model_classes: - config, _, _, _ = self._get_input_ids_and_config() + config, _ = self.prepare_config_and_inputs_for_generate() if config.is_encoder_decoder: continue else: @@ -1453,7 +1435,10 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature): return model_kwargs for model_class in decoder_only_classes: - config, input_ids, attention_mask, _ = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + input_ids = inputs_dict["input_ids"] + attention_mask = inputs_dict["attention_mask"] + model = model_class(config).to(torch_device).eval() signature = inspect.signature(model.forward).parameters.keys() @@ -1556,7 +1541,8 @@ def test_generate_from_inputs_embeds_decoder_only(self): # When supported, tests that the decoder model can generate from `inputs_embeds` instead of `input_ids` # if fails, you should probably update the `prepare_inputs_for_generation` function for model_class in self.all_generative_model_classes: - config, input_ids, _, _ = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + input_ids = inputs_dict["input_ids"] # Ignore: # a) eos (to always output 20 tokens) and pad (so we don't try to infer the attn mask from the input_ids, @@ -1627,7 +1613,10 @@ def test_generate_from_inputs_embeds_with_static_cache(self): if not model_class._supports_static_cache: self.skipTest(reason="This model does not support the static cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + input_ids = inputs_dict["input_ids"] + attention_mask = inputs_dict["attention_mask"] + if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") @@ -1765,7 +1754,7 @@ def test_new_cache_format(self, num_beams, do_sample): if not model_class._supports_cache_class: self.skipTest(reason="This model does not support the new cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() model = model_class(config).to(torch_device).eval() generation_kwargs = { @@ -1780,9 +1769,7 @@ def test_new_cache_format(self, num_beams, do_sample): # Sets seed before calling `generate` for the case with do_sample=True seed = torch.randint(0, 1000000, (1,)).item() set_seed(seed) - legacy_results = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + legacy_results = model.generate(**generation_kwargs, **inputs_dict) set_seed(seed) if config.is_encoder_decoder: cache_cls = EncoderDecoderCache @@ -1790,13 +1777,7 @@ def test_new_cache_format(self, num_beams, do_sample): else: cache_cls = DynamicCache past_key_values = cache_cls() - new_results = model.generate( - input_ids, - attention_mask=attention_mask, - past_key_values=past_key_values, - **generation_kwargs, - **inputs_dict, - ) + new_results = model.generate(past_key_values=past_key_values, **generation_kwargs, **inputs_dict) # The two sets of generated sequences must match, despite the cache format between forward passes being # different @@ -1839,12 +1820,15 @@ def test_generate_with_static_cache(self): if not model_class._supports_static_cache: self.skipTest(reason="This model does not support the static cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() + main_input = inputs_dict[self.input_name] + if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") config.is_decoder = True - batch_size, seq_length = input_ids.shape + batch_size = main_input.shape[0] + seq_length = main_input.shape[-1] max_new_tokens = 20 model = model_class(config).to(torch_device).eval() @@ -1867,7 +1851,7 @@ def test_generate_with_static_cache(self): else config.num_key_value_heads ) num_hidden_layers = config.num_hidden_layers - results = model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict) + results = model.generate(**generation_kwargs, **inputs_dict) cache_shape = (batch_size, num_key_value_heads, max_cache_len, head_dim) self.assertTrue(isinstance(results.past_key_values, StaticCache)) @@ -1881,7 +1865,7 @@ def test_generate_with_quant_cache(self): if not model_class._supports_quantized_cache: self.skipTest(reason="This model does not support the quantized cache format") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() config.is_decoder = True model = model_class(config).to(torch_device).eval() @@ -1894,19 +1878,17 @@ def test_generate_with_quant_cache(self): "use_cache": True, } - results = model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict) + results = model.generate(**generation_kwargs, **inputs_dict) self.assertTrue(isinstance(results.past_key_values, QuantoQuantizedCache)) # passing past key values of different type should raise Error with self.assertRaises(ValueError): - model.generate( - input_ids, attention_mask=attention_mask, past_key_valyes=DynamicCache(), **generation_kwargs - ) + model.generate(past_key_valyes=DynamicCache(), **generation_kwargs, **inputs_dict) # setting incorrect cache_config args should raise an Error, i.e. nbits=60 does not make sense generation_kwargs["cache_config"] = {"nbits": 60, "q_group_size": 8, "residual_length": 128} with self.assertRaises(ValueError): - model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs) + model.generate(**generation_kwargs, **inputs_dict) @pytest.mark.generate @require_torch_gpu @@ -1961,7 +1943,7 @@ def test_generate_methods_with_num_logits_to_keep(self): if "num_logits_to_keep" not in set(inspect.signature(model_class.forward).parameters.keys()): self.skipTest(reason="This model does not support `num_logits_to_keep` argument.") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config() + config, inputs_dict = self.prepare_config_and_inputs_for_generate() config.use_cache = True config.is_decoder = True @@ -1975,13 +1957,9 @@ def test_generate_methods_with_num_logits_to_keep(self): } # Setting num_logits_to_keep at 0 keeps all logits (old behavior) - with_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict, num_logits_to_keep=0 - ) + with_all_logits = model.generate(**generation_kwargs, **inputs_dict, num_logits_to_keep=0) # By default, num_logits_to_keep is automatically set to 1 if not provided (new behavior) - without_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **inputs_dict, **generation_kwargs - ) + without_all_logits = model.generate(**inputs_dict, **generation_kwargs) self.assertEqual(with_all_logits.tolist(), without_all_logits.tolist()) @pytest.mark.generate @@ -1993,7 +1971,7 @@ def test_assisted_decoding_with_num_logits_to_keep(self): if model_class._is_stateful: self.skipTest(reason="Stateful models don't support assisted generation") - config, input_ids, attention_mask, inputs_dict = self._get_input_ids_and_config(batch_size=1) + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) config.use_cache = True config.is_decoder = True @@ -2009,13 +1987,9 @@ def test_assisted_decoding_with_num_logits_to_keep(self): } # Setting num_logits_to_keep at 0 keeps all logits (old behavior) - with_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **generation_kwargs, **inputs_dict, num_logits_to_keep=0 - ) + with_all_logits = model.generate(**generation_kwargs, **inputs_dict, num_logits_to_keep=0) # By default, num_logits_to_keep is automatically set to 1 if not provided (new behavior) - without_all_logits = model.generate( - input_ids, attention_mask=attention_mask, **inputs_dict, **generation_kwargs - ) + without_all_logits = model.generate(**inputs_dict, **generation_kwargs) self.assertEqual(with_all_logits.tolist(), without_all_logits.tolist()) @pytest.mark.generate diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index 859a4afaa7fa..f10734e53283 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -60,9 +60,6 @@ MusicgenModel, set_seed, ) - from transformers.generation import ( - GenerateEncoderDecoderOutput, - ) def _config_zero_init(config): diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index e509062e57ee..64a0f1956cad 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -61,9 +61,6 @@ MusicgenMelodyModel, set_seed, ) - from transformers.generation import ( - GenerateDecoderOnlyOutput, - ) if is_torchaudio_available(): from transformers import MusicgenMelodyProcessor From d5ecc5d853583e96c2b4c176be1a1a7673726a97 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 13:12:09 +0000 Subject: [PATCH 07/15] delete a few more overwrites --- tests/models/musicgen/test_modeling_musicgen.py | 16 ---------------- .../test_modeling_musicgen_melody.py | 16 ---------------- 2 files changed, 32 deletions(-) diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index f10734e53283..cc30238c8df9 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -1389,22 +1389,6 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_generate_without_input_ids(self): - config, _ = self.prepare_config_and_inputs_for_generate() - - # if no bos token id => cannot generate from None - if config.bos_token_id is None: - self.skipTest(reason="bos_token_id is None") - - for model_class in self.greedy_sample_model_classes: - model = model_class(config).to(torch_device) - model.eval() - - output_ids_generate = model.generate( - do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True - ) - self.assertIsNotNone(output_ids_generate) - @require_torch_fp16 @require_torch_accelerator # not all operations are supported in fp16 on CPU def test_generate_fp16(self): diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index 64a0f1956cad..35af9fe0768d 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -1375,22 +1375,6 @@ def _get_logits_processor_kwargs(self, do_sample=False, config=None): logits_processor_kwargs = {} return logits_processor_kwargs - def test_generate_without_input_ids(self): - config, _ = self.prepare_config_and_inputs_for_generate() - - # if no bos token id => cannot generate from None - if config.bos_token_id is None: - self.skipTest(reason="bos_token_id is None") - - for model_class in self.greedy_sample_model_classes: - model = model_class(config).to(torch_device) - model.eval() - - output_ids_generate = model.generate( - do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True - ) - self.assertIsNotNone(output_ids_generate) - @require_torch_fp16 @require_torch_accelerator # not all operations are supported in fp16 on CPU def test_generate_fp16(self): From a63ef8c620e90f3edefe2266a046d0a5d444baf2 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 13:38:41 +0000 Subject: [PATCH 08/15] fix contrastic search test --- tests/generation/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 1385cc3e4400..131b096b652b 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -982,7 +982,7 @@ def test_contrastive_generate_low_memory(self): if any(model_name in model_class.__name__.lower() for model_name in ["gptbigcode"]): self.skipTest(reason="TODO: fix me") - config, inputs_dict = self.prepare_config_and_inputs_for_generate() + config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): From d8f8bd53007ec2b4cfdd66890cdd324b72a48a18 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 13:41:29 +0000 Subject: [PATCH 09/15] fix left-padding test --- tests/generation/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 131b096b652b..9032664e37bb 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1437,7 +1437,7 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature): for model_class in decoder_only_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() input_ids = inputs_dict["input_ids"] - attention_mask = inputs_dict["attention_mask"] + attention_mask = inputs_dict.get("attention_mask") or torch.ones_like(input_ids) model = model_class(config).to(torch_device).eval() signature = inspect.signature(model.forward).parameters.keys() From d67e936c6d25ea42d1d9af56bbe6f72c7f47e9bc Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 13:56:27 +0000 Subject: [PATCH 10/15] fix a few more --- tests/generation/test_utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 9032664e37bb..6b14a90eab60 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1437,7 +1437,9 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature): for model_class in decoder_only_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() input_ids = inputs_dict["input_ids"] - attention_mask = inputs_dict.get("attention_mask") or torch.ones_like(input_ids) + attention_mask = inputs_dict.get("attention_mask") + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) model = model_class(config).to(torch_device).eval() signature = inspect.signature(model.forward).parameters.keys() @@ -1542,7 +1544,6 @@ def test_generate_from_inputs_embeds_decoder_only(self): # if fails, you should probably update the `prepare_inputs_for_generation` function for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - input_ids = inputs_dict["input_ids"] # Ignore: # a) eos (to always output 20 tokens) and pad (so we don't try to infer the attn mask from the input_ids, @@ -1563,6 +1564,8 @@ def test_generate_from_inputs_embeds_decoder_only(self): if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): continue + input_ids = inputs_dict.pop("input_ids") + # Traditional way of generating text outputs_from_ids = model.generate( input_ids, max_new_tokens=5, return_dict_in_generate=True, output_scores=True @@ -1614,8 +1617,6 @@ def test_generate_from_inputs_embeds_with_static_cache(self): self.skipTest(reason="This model does not support the static cache format") config, inputs_dict = self.prepare_config_and_inputs_for_generate() - input_ids = inputs_dict["input_ids"] - attention_mask = inputs_dict["attention_mask"] if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") @@ -1624,9 +1625,11 @@ def test_generate_from_inputs_embeds_with_static_cache(self): if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): self.skipTest(reason="This model does not support `inputs_embeds` in generation") + input_ids = inputs_dict.pop("input_ids") + model.config.use_cache = True model.config.is_decoder = True - batch_size, seq_length = input_ids.shape + batch_size = input_ids.shape[0] max_cache_len = 30 # here we force to not stop at eos and go until max-length @@ -1651,9 +1654,7 @@ def test_generate_from_inputs_embeds_with_static_cache(self): num_hidden_layers = text_config.num_hidden_layers inputs_embeds = model.get_input_embeddings()(input_ids) - outputs = model.generate( - inputs_embeds=inputs_embeds, attention_mask=attention_mask, **generation_kwargs, **inputs_dict - ) + outputs = model.generate(inputs_embeds=inputs_embeds, **generation_kwargs, **inputs_dict) # we should get `max_length` in shape, not `max_length - embeds_length` cache_shape = (batch_size, num_key_value_heads, max_cache_len, head_dim) From 2a5c0ce7e2e9e5f808f4e84249f76b4304d6f959 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 14:20:40 +0000 Subject: [PATCH 11/15] attn masks must be torch.long --- tests/models/chameleon/test_modeling_chameleon.py | 2 +- tests/models/cohere/test_modeling_cohere.py | 2 +- tests/models/gemma/test_modeling_gemma.py | 2 +- tests/models/granite/test_modeling_granite.py | 2 +- tests/models/granitemoe/test_modeling_granitemoe.py | 2 +- tests/models/llama/test_modeling_llama.py | 2 +- tests/models/mistral/test_modeling_mistral.py | 2 +- tests/models/mixtral/test_modeling_mixtral.py | 2 +- tests/models/olmo/test_modeling_olmo.py | 2 +- tests/models/olmoe/test_modeling_olmoe.py | 2 +- tests/models/persimmon/test_modeling_persimmon.py | 2 +- tests/models/phi3/test_modeling_phi3.py | 2 +- tests/models/qwen2/test_modeling_qwen2.py | 2 +- tests/models/qwen2_moe/test_modeling_qwen2_moe.py | 2 +- tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py | 2 +- tests/models/stablelm/test_modeling_stablelm.py | 2 +- tests/models/starcoder2/test_modeling_starcoder2.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/models/chameleon/test_modeling_chameleon.py b/tests/models/chameleon/test_modeling_chameleon.py index 16e0a548e6dc..703929dc836d 100644 --- a/tests/models/chameleon/test_modeling_chameleon.py +++ b/tests/models/chameleon/test_modeling_chameleon.py @@ -116,7 +116,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) sequence_labels = None token_labels = None diff --git a/tests/models/cohere/test_modeling_cohere.py b/tests/models/cohere/test_modeling_cohere.py index d80bc5c24cf9..7d12dd3d873b 100644 --- a/tests/models/cohere/test_modeling_cohere.py +++ b/tests/models/cohere/test_modeling_cohere.py @@ -95,7 +95,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index a02541d58544..6422133d75eb 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -119,7 +119,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py index 0f4d7640a1bb..9b25698f6401 100644 --- a/tests/models/granite/test_modeling_granite.py +++ b/tests/models/granite/test_modeling_granite.py @@ -106,7 +106,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py index 158259ed5fb4..d5d0cee6daa1 100644 --- a/tests/models/granitemoe/test_modeling_granitemoe.py +++ b/tests/models/granitemoe/test_modeling_granitemoe.py @@ -105,7 +105,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index a21665c822f2..6b273bce7a1f 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -112,7 +112,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 0730f8ba4441..88140b1a20f8 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -112,7 +112,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index db9641e3dcb2..836d38e904cb 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -108,7 +108,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index b74d0fdf03b8..43e0b7afb49f 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -101,7 +101,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/olmoe/test_modeling_olmoe.py b/tests/models/olmoe/test_modeling_olmoe.py index 1ce231e03731..9c3af5723ee1 100644 --- a/tests/models/olmoe/test_modeling_olmoe.py +++ b/tests/models/olmoe/test_modeling_olmoe.py @@ -111,7 +111,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 0d267fb86910..600c5b8a2f73 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -110,7 +110,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py index ce0a71878877..1186717a78cc 100644 --- a/tests/models/phi3/test_modeling_phi3.py +++ b/tests/models/phi3/test_modeling_phi3.py @@ -151,7 +151,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 4d6c432f2042..95bf2cce6d3a 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -116,7 +116,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index 0425172a6fba..e8eb915a328a 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -134,7 +134,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py index 23dace68cf21..d2f658f56bd8 100644 --- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py +++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py @@ -103,7 +103,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index 36cad89bcfdf..c88fda6fb84e 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -113,7 +113,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/models/starcoder2/test_modeling_starcoder2.py b/tests/models/starcoder2/test_modeling_starcoder2.py index c1c7d45d4f18..7ab7faa90ea0 100644 --- a/tests/models/starcoder2/test_modeling_starcoder2.py +++ b/tests/models/starcoder2/test_modeling_starcoder2.py @@ -107,7 +107,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device) + input_mask = torch.tril(torch.ones_like(input_ids).to(torch_device)) token_type_ids = None if self.use_token_type_ids: From 985c8af35925c741df5a39b8cdacb11f6b848001 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 25 Sep 2024 14:34:13 +0000 Subject: [PATCH 12/15] comments --- tests/generation/test_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 6b14a90eab60..5a639d1575d4 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -111,6 +111,7 @@ def prepare_config_and_inputs_for_generate(self, batch_size=2): "decoder_attention_mask", # we'll set cache use in each test differently "use_cache", + # model-specific exceptions should overload/overwrite this function ] filtered_inputs_dict = { k: v[:batch_size, ...] if isinstance(v, torch.Tensor) else v @@ -926,7 +927,7 @@ def test_contrastive_generate(self): output_generate = self._contrastive_generate( model=model, inputs_dict=inputs_dict, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + 1) @@ -960,7 +961,7 @@ def test_contrastive_generate_dict_outputs_use_cache(self): output_hidden_states=True, output_attentions=self.has_attentions, return_dict_in_generate=True, - use_cache=True, + use_cache=True, # Enable cache ) if model.config.is_encoder_decoder: From 79d96f908e3621637605a1935d28c234618ec18a Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 3 Oct 2024 12:23:54 +0000 Subject: [PATCH 13/15] PR comments --- tests/generation/test_utils.py | 59 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 8214cb2a3860..08093af767cc 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -120,12 +120,15 @@ def prepare_config_and_inputs_for_generate(self, batch_size=2): } # It is important set `eos_token_id` to `None` to avoid early stopping (would break for length-based checks) - if config.eos_token_id is not None and config.pad_token_id is None: - config.pad_token_id = ( - config.eos_token_id if isinstance(config.eos_token_id, int) else config.eos_token_id[0] + text_gen_config = config.get_text_config() + if text_gen_config.eos_token_id is not None and text_gen_config.pad_token_id is None: + text_gen_config.pad_token_id = ( + text_gen_config.eos_token_id + if isinstance(text_gen_config.eos_token_id, int) + else text_gen_config.eos_token_id[0] ) - config.eos_token_id = None - config.forced_eos_token_id = None + text_gen_config.eos_token_id = None + text_gen_config.forced_eos_token_id = None return config, filtered_inputs_dict @@ -402,7 +405,7 @@ def _contrastive_generate( def test_greedy_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate(model=model, inputs_dict=inputs_dict) @@ -416,7 +419,7 @@ def test_greedy_generate(self): def test_greedy_generate_dict_outputs(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._greedy_generate( @@ -447,7 +450,7 @@ def test_greedy_generate_dict_outputs(self): def test_greedy_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] if not hasattr(config, "use_cache"): self.skipTest(reason=f"{model_class.__name__} doesn't support caching") @@ -478,7 +481,7 @@ def test_greedy_generate_dict_outputs_use_cache(self): def test_sample_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._sample_generate(model=model, inputs_dict=inputs_dict, num_return_sequences=1) @@ -492,7 +495,7 @@ def test_sample_generate(self): def test_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() output_generate = self._sample_generate( @@ -524,7 +527,7 @@ def test_sample_generate_dict_output(self): def test_beam_search_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() @@ -540,7 +543,7 @@ def test_beam_search_generate(self): def test_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() @@ -574,7 +577,7 @@ def test_beam_search_generate_dict_output(self): def test_beam_search_generate_dict_outputs_use_cache(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] if not hasattr(config, "use_cache"): self.skipTest(reason=f"{model_class.__name__} doesn't support caching") @@ -639,7 +642,7 @@ def test_model_parallel_beam_search(self): def test_beam_sample_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() @@ -681,7 +684,7 @@ def test_beam_sample_generate(self): def test_beam_sample_generate_dict_output(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_beam_kwargs() @@ -738,7 +741,7 @@ def test_generate_without_input_ids(self): def test_group_beam_search_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() # check `generate()` and `group_beam_search()` are equal @@ -770,7 +773,7 @@ def test_group_beam_search_generate(self): def test_group_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() beam_kwargs = self._get_diverse_beam_kwargs() @@ -806,7 +809,7 @@ def test_group_beam_search_generate_dict_output(self): def test_constrained_beam_search_generate(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() @@ -863,7 +866,7 @@ def test_constrained_beam_search_generate(self): def test_constrained_beam_search_generate_dict_output(self): for model_class in self.all_generative_model_classes: config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] model = model_class(config).to(torch_device).eval() @@ -915,7 +918,7 @@ def test_contrastive_generate(self): self.skipTest(reason="Won't fix: old model with different cache format") config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -945,7 +948,7 @@ def test_contrastive_generate_dict_outputs_use_cache(self): self.skipTest(reason="Won't fix: old model with different cache format") config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # NOTE: contrastive search only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1100,7 +1103,7 @@ def test_assisted_decoding_matches_greedy_search(self, assistant_type): # enable cache config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1174,7 +1177,7 @@ def test_prompt_lookup_decoding_matches_greedy_search(self): # enable cache config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1227,7 +1230,7 @@ def test_dola_decoding_sample(self): # enable cache if the model is not openai-gpt, xlnet, cpm, or xlm config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # Encoder-decoder models are not supported if config.is_encoder_decoder: @@ -1251,11 +1254,11 @@ def test_dola_decoding_sample(self): "output_hidden_states": True, "output_attentions": self.has_attentions, "return_dict_in_generate": True, - "use_cache": hasattr(config, "use_cache"), # Some models don't support the cache + "use_cache": getattr(config, "use_cache"), # Some models don't support the cache } generation_kwargs.update({"dola_layers": "low"}) output_dola = model.generate(**generation_kwargs, **inputs_dict) - self._check_outputs(output_dola, main_input, model.config, use_cache=hasattr(config, "use_cache")) + self._check_outputs(output_dola, main_input, model.config, use_cache=getattr(config, "use_cache")) @pytest.mark.generate def test_assisted_decoding_sample(self): @@ -1284,7 +1287,7 @@ def test_assisted_decoding_sample(self): # enable cache config, inputs_dict = self.prepare_config_and_inputs_for_generate(batch_size=1) - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] # NOTE: assisted generation only works with cache on at the moment. if not hasattr(config, "use_cache"): @@ -1829,7 +1832,7 @@ def test_generate_with_static_cache(self): self.skipTest(reason="This model does not support the static cache format") config, inputs_dict = self.prepare_config_and_inputs_for_generate() - main_input = inputs_dict[self.input_name] + main_input = inputs_dict[model_class.main_input_name] if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") From 43fef274c25260151be5ffbb2db1ea186ec860b3 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 3 Oct 2024 12:36:30 +0000 Subject: [PATCH 14/15] nit --- tests/generation/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 08093af767cc..2fb0f8eeaf26 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -120,7 +120,7 @@ def prepare_config_and_inputs_for_generate(self, batch_size=2): } # It is important set `eos_token_id` to `None` to avoid early stopping (would break for length-based checks) - text_gen_config = config.get_text_config() + text_gen_config = config.get_text_config(decoder=True) if text_gen_config.eos_token_id is not None and text_gen_config.pad_token_id is None: text_gen_config.pad_token_id = ( text_gen_config.eos_token_id From 0e6a6edcbf2e08f3382d9db9cb79efdc4593c238 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 3 Oct 2024 12:49:47 +0000 Subject: [PATCH 15/15] rm input_name; nits --- tests/generation/test_utils.py | 7 +++---- tests/models/dac/test_modeling_dac.py | 1 - tests/models/encodec/test_modeling_encodec.py | 1 - tests/models/mimi/test_modeling_mimi.py | 1 - .../models/seamless_m4t/test_modeling_seamless_m4t.py | 2 -- .../seamless_m4t_v2/test_modeling_seamless_m4t_v2.py | 2 -- .../speech_to_text/test_modeling_speech_to_text.py | 2 -- tests/models/speecht5/test_modeling_speecht5.py | 10 ---------- tests/models/univnet/test_modeling_univnet.py | 2 -- tests/models/vits/test_modeling_vits.py | 2 -- tests/models/whisper/test_modeling_whisper.py | 4 ---- 11 files changed, 3 insertions(+), 31 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 2fb0f8eeaf26..caa4f6c2cedd 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -94,7 +94,6 @@ class GenerationTesterMixin: model_tester = None all_generative_model_classes = () - input_name = "input_ids" max_new_tokens = 3 def prepare_config_and_inputs_for_generate(self, batch_size=2): @@ -1254,11 +1253,11 @@ def test_dola_decoding_sample(self): "output_hidden_states": True, "output_attentions": self.has_attentions, "return_dict_in_generate": True, - "use_cache": getattr(config, "use_cache"), # Some models don't support the cache + "use_cache": getattr(config, "use_cache", False), # Some models don't support the cache + "dola_layers": "low", } - generation_kwargs.update({"dola_layers": "low"}) output_dola = model.generate(**generation_kwargs, **inputs_dict) - self._check_outputs(output_dola, main_input, model.config, use_cache=getattr(config, "use_cache")) + self._check_outputs(output_dola, main_input, model.config, use_cache=getattr(config, "use_cache", False)) @pytest.mark.generate def test_assisted_decoding_sample(self): diff --git a/tests/models/dac/test_modeling_dac.py b/tests/models/dac/test_modeling_dac.py index ffe7f31b79a5..e3b729d2f101 100644 --- a/tests/models/dac/test_modeling_dac.py +++ b/tests/models/dac/test_modeling_dac.py @@ -123,7 +123,6 @@ class DacModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False pipeline_model_mapping = {"feature-extraction": DacModel} if is_torch_available() else {} - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does not have attention and does not support returning hidden states diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index cff297be8e00..2aac4dba82e8 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -141,7 +141,6 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) test_headmasking = False test_resize_embeddings = False pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {} - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does not have attention and does not support returning hidden states diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py index dd0f77421be7..ab6184ce2bbe 100644 --- a/tests/models/mimi/test_modeling_mimi.py +++ b/tests/models/mimi/test_modeling_mimi.py @@ -170,7 +170,6 @@ class MimiModelTest(ModelTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False test_torchscript = False - input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): # model does support returning hidden states diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 5f158e4802f1..cb09d44421f4 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -360,8 +360,6 @@ class SeamlessM4TModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase): ) all_generative_model_classes = (SeamlessM4TForSpeechToText,) if is_torch_available() else () - input_name = "input_features" - def setUp(self): self.model_tester = SeamlessM4TModelTester(self, input_modality="speech") self.config_tester = ConfigTester(self, config_class=SeamlessM4TConfig) diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index 67c6e4bb0809..451fff0b35fb 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -376,8 +376,6 @@ class SeamlessM4Tv2ModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase) ) all_generative_model_classes = (SeamlessM4Tv2ForSpeechToText,) if is_torch_available() else () - input_name = "input_features" - def setUp(self): self.model_tester = SeamlessM4Tv2ModelTester(self, input_modality="speech") self.config_tester = ConfigTester(self, config_class=SeamlessM4Tv2Config) diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 76c224f9b498..50446d4628af 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -282,8 +282,6 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest test_pruning = False test_missing_keys = False - input_name = "input_features" - def setUp(self): self.model_tester = Speech2TextModelTester(self) self.config_tester = ConfigTester(self, config_class=Speech2TextConfig) diff --git a/tests/models/speecht5/test_modeling_speecht5.py b/tests/models/speecht5/test_modeling_speecht5.py index e13cf8dd56c3..97abf1a2cf2c 100644 --- a/tests/models/speecht5/test_modeling_speecht5.py +++ b/tests/models/speecht5/test_modeling_speecht5.py @@ -177,8 +177,6 @@ class SpeechT5ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase test_headmasking = False test_resize_embeddings = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ModelTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -375,8 +373,6 @@ class SpeechT5ForSpeechToTextTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_headmasking = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ForSpeechToTextTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -895,8 +891,6 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_headmasking = False - input_name = "input_ids" - def setUp(self): self.model_tester = SpeechT5ForTextToSpeechTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -1441,8 +1435,6 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase): test_headmasking = False test_resize_embeddings = False - input_name = "input_values" - def setUp(self): self.model_tester = SpeechT5ForSpeechToSpeechTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5Config, hidden_size=37) @@ -1854,8 +1846,6 @@ class SpeechT5HifiGanTest(ModelTesterMixin, unittest.TestCase): is_encoder_decoder = False has_attentions = False - input_name = "spectrogram" - def setUp(self): self.model_tester = SpeechT5HifiGanTester(self) self.config_tester = ConfigTester(self, config_class=SpeechT5HifiGanConfig) diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py index f26a423a1a2f..84d28c645874 100644 --- a/tests/models/univnet/test_modeling_univnet.py +++ b/tests/models/univnet/test_modeling_univnet.py @@ -118,8 +118,6 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase): is_encoder_decoder = False has_attentions = False - input_name = "input_features" - def setUp(self): self.model_tester = UnivNetModelTester(self) self.config_tester = ConfigTester( diff --git a/tests/models/vits/test_modeling_vits.py b/tests/models/vits/test_modeling_vits.py index 99ba51e35f66..366194090953 100644 --- a/tests/models/vits/test_modeling_vits.py +++ b/tests/models/vits/test_modeling_vits.py @@ -167,8 +167,6 @@ class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_torchscript = False has_attentions = False - input_name = "input_ids" - def setUp(self): self.model_tester = VitsModelTester(self) self.config_tester = ConfigTester(self, config_class=VitsConfig, hidden_size=37) diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index bbec3f3e607d..c719fcf989da 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -395,8 +395,6 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi # `0.5` is for `test_disk_offload` (which also works for `test_model_parallelism`) model_split_percents = [0.5, 0.8, 0.9] - input_name = "input_features" - # TODO: Fix the failed tests def is_pipeline_test_to_skip( self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name @@ -3469,8 +3467,6 @@ class WhisperEncoderModelTest(ModelTesterMixin, GenerationTesterMixin, unittest. test_pruning = False test_missing_keys = False - input_name = "input_features" - def setUp(self): self.model_tester = WhisperEncoderModelTester(self) self.config_tester = ConfigTester(self, config_class=WhisperConfig)