From 1aced1a8a63438e9e2e15aa186fe5b48137edf1e Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 12:52:31 +0100 Subject: [PATCH 1/9] fix --- tests/generation/test_utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 76ab793e3a36..92a03128cb9c 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1927,6 +1927,22 @@ def test_generate_with_static_cache(self): config, inputs_dict = self.prepare_config_and_inputs_for_generate() main_input = inputs_dict[model_class.main_input_name] + config.rms_norm_eps = 1.0 + config.layer_norm_eps = 1.0 + config.norm_eps = 1.0 + config.norm_epsilon = 1.0 + config.layer_norm_epsilon = 1.0 + + # norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance. + # (We don't need the original epsilon values to check eager/sdpa matches) + for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]: + if hasattr(config, attr): + getattr(config, attr).rms_norm_eps = 1.0 + getattr(config, attr).layer_norm_eps = 1.0 + getattr(config, attr).norm_eps = 1.0 + getattr(config, attr).norm_epsilon = 1.0 + getattr(config, attr).layer_norm_epsilon = 1.0 + if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") @@ -1937,6 +1953,13 @@ def test_generate_with_static_cache(self): for dtype in (torch.float32, torch.float16): model = model_class(config).to(torch_device).to(dtype).eval() + + # Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) + for x in model.modules(): + from torch import nn + if isinstance(x, (nn.LayerNorm, nn.GroupNorm)) or type(x).__name__ == "GemmaRMSNorm": + x.eps = 1.0 + generation_kwargs = { "max_new_tokens": max_new_tokens, "return_dict_in_generate": True, # Required to return `past_key_values` From 1d4545b3a9ac19aa3ed3cb5752abe45e09874b37 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 13:30:04 +0100 Subject: [PATCH 2/9] fix --- src/transformers/testing_utils.py | 37 +++++++++++++++++++++++++++++ tests/generation/test_utils.py | 28 +++++----------------- tests/test_modeling_common.py | 39 ++++++------------------------- 3 files changed, 50 insertions(+), 54 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 30f7b5a68fb2..0b6e176efc6f 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -14,6 +14,7 @@ import collections import contextlib +import copy import doctest import functools import gc @@ -1387,6 +1388,42 @@ def assert_screenout(out, what): assert match_str != -1, f"expecting to find {what} in output: f{out_pr}" +def set_model_tester_for_less_flaky_test(test_case): + if hasattr(test_case.model_tester, "num_hidden_layers"): + test_case.model_tester.num_hidden_layers = 1 + if hasattr(test_case.model_tester, "vision_config") and "num_hidden_layers" in test_case.model_tester.vision_config: + test_case.model_tester.vision_config = copy.deepcopy(test_case.model_tester.vision_config) + test_case.model_tester.vision_config["num_hidden_layers"] = 1 + if hasattr(test_case.model_tester, "text_config") and "num_hidden_layers" in test_case.model_tester.text_config: + test_case.model_tester.text_config = copy.deepcopy(test_case.model_tester.text_config) + test_case.model_tester.text_config["num_hidden_layers"] = 1 + + +def set_config_for_less_flaky_test(config): + config.rms_norm_eps = 1.0 + config.layer_norm_eps = 1.0 + config.norm_eps = 1.0 + config.norm_epsilon = 1.0 + config.layer_norm_epsilon = 1.0 + + # norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance. + # (We don't need the original epsilon values to check eager/sdpa matches) + for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]: + if hasattr(config, attr): + getattr(config, attr).rms_norm_eps = 1.0 + getattr(config, attr).layer_norm_eps = 1.0 + getattr(config, attr).norm_eps = 1.0 + getattr(config, attr).norm_epsilon = 1.0 + getattr(config, attr).layer_norm_epsilon = 1.0 + + +def set_model_for_less_flaky_test(model): + # Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) + for module in model.modules(): + if type(module).__name__ in ["GemmaRMSNorm", "LayerNorm", "GroupNorm"]: + module.eps = 1.0 + + class CaptureStd: """ Context manager to capture: diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 92a03128cb9c..13f527f88d5a 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -37,6 +37,9 @@ require_torch_multi_accelerator, require_torch_multi_gpu, require_torch_sdpa, + set_config_for_less_flaky_test, + set_model_for_less_flaky_test, + set_model_tester_for_less_flaky_test, slow, torch_device, ) @@ -1920,29 +1923,15 @@ def test_generate_with_static_cache(self): Tests that generating with static cache give almost same results as with dynamic cache, and the output cache has the expected shapes """ + set_model_tester_for_less_flaky_test(self) for model_class in self.all_generative_model_classes: if not model_class._supports_static_cache: self.skipTest(reason="This model does not support the static cache format") config, inputs_dict = self.prepare_config_and_inputs_for_generate() + set_config_for_less_flaky_test(config) main_input = inputs_dict[model_class.main_input_name] - config.rms_norm_eps = 1.0 - config.layer_norm_eps = 1.0 - config.norm_eps = 1.0 - config.norm_epsilon = 1.0 - config.layer_norm_epsilon = 1.0 - - # norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance. - # (We don't need the original epsilon values to check eager/sdpa matches) - for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]: - if hasattr(config, attr): - getattr(config, attr).rms_norm_eps = 1.0 - getattr(config, attr).layer_norm_eps = 1.0 - getattr(config, attr).norm_eps = 1.0 - getattr(config, attr).norm_epsilon = 1.0 - getattr(config, attr).layer_norm_epsilon = 1.0 - if config.is_encoder_decoder: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") @@ -1953,12 +1942,7 @@ def test_generate_with_static_cache(self): for dtype in (torch.float32, torch.float16): model = model_class(config).to(torch_device).to(dtype).eval() - - # Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) - for x in model.modules(): - from torch import nn - if isinstance(x, (nn.LayerNorm, nn.GroupNorm)) or type(x).__name__ == "GemmaRMSNorm": - x.eps = 1.0 + set_model_for_less_flaky_test(model) generation_kwargs = { "max_new_tokens": max_new_tokens, diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 99d0a8058c67..2c6e4198eb39 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -89,6 +89,9 @@ require_torch_multi_accelerator, require_torch_multi_gpu, require_torch_sdpa, + set_config_for_less_flaky_test, + set_model_for_less_flaky_test, + set_model_tester_for_less_flaky_test, slow, torch_device, ) @@ -4006,34 +4009,11 @@ def test_eager_matches_sdpa_inference(self, torch_dtype: str): def get_mean_reldiff(failcase, x, ref, atol, rtol): return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" - if hasattr(self.model_tester, "num_hidden_layers"): - self.model_tester.num_hidden_layers = 1 - if hasattr(self.model_tester, "vision_config") and "num_hidden_layers" in self.model_tester.vision_config: - self.model_tester.vision_config = copy.deepcopy(self.model_tester.vision_config) - self.model_tester.vision_config["num_hidden_layers"] = 1 - if hasattr(self.model_tester, "text_config") and "num_hidden_layers" in self.model_tester.text_config: - self.model_tester.text_config = copy.deepcopy(self.model_tester.text_config) - self.model_tester.text_config["num_hidden_layers"] = 1 + set_model_tester_for_less_flaky_test(self) for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - config.rms_norm_eps = 1.0 - config.layer_norm_eps = 1.0 - config.norm_eps = 1.0 - config.norm_epsilon = 1.0 - config.layer_norm_epsilon = 1.0 - - # norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance. - # (We don't need the original epsilon values to check eager/sdpa matches) - for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]: - if hasattr(config, attr): - getattr(config, attr).rms_norm_eps = 1.0 - getattr(config, attr).layer_norm_eps = 1.0 - getattr(config, attr).norm_eps = 1.0 - getattr(config, attr).norm_epsilon = 1.0 - getattr(config, attr).layer_norm_epsilon = 1.0 - + set_config_for_less_flaky_test(config) model = model_class(config) # FIXME: we deactivate boolean mask for models using "use_mask_token" in their constructors. # These models support masking only in the case `use_mask_token=True`. Otherwise they cannot consume an input mask. @@ -4054,13 +4034,8 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol): ) model_eager = model_eager.eval().to(torch_device, dtype=torch_dtype) - # Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) - for x in model_eager.modules(): - if isinstance(x, (nn.LayerNorm, nn.GroupNorm)): - x.eps = 1.0 - for x in model_sdpa.modules(): - if isinstance(x, (nn.LayerNorm, nn.GroupNorm)): - x.eps = 1.0 + set_model_for_less_flaky_test(model_eager) + set_model_for_less_flaky_test(model_sdpa) # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 16 times the model, # but it would be nicer to have an efficient way to use parameterized.expand From 343201b38cd9c963a059315c3bcedf3ea3f2e5bc Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 13:47:27 +0100 Subject: [PATCH 3/9] fix --- src/transformers/testing_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 0b6e176efc6f..a4955ad8e9a5 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -1391,7 +1391,10 @@ def assert_screenout(out, what): def set_model_tester_for_less_flaky_test(test_case): if hasattr(test_case.model_tester, "num_hidden_layers"): test_case.model_tester.num_hidden_layers = 1 - if hasattr(test_case.model_tester, "vision_config") and "num_hidden_layers" in test_case.model_tester.vision_config: + if ( + hasattr(test_case.model_tester, "vision_config") + and "num_hidden_layers" in test_case.model_tester.vision_config + ): test_case.model_tester.vision_config = copy.deepcopy(test_case.model_tester.vision_config) test_case.model_tester.vision_config["num_hidden_layers"] = 1 if hasattr(test_case.model_tester, "text_config") and "num_hidden_layers" in test_case.model_tester.text_config: From ad72abb31335f218c16f764b6dbfa3073397ef1d Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 15:50:00 +0100 Subject: [PATCH 4/9] fix --- .../seamless_m4t_v2/test_modeling_seamless_m4t_v2.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index 451fff0b35fb..01f513f97843 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -835,7 +835,12 @@ def test_generation_languages(self): def test_speech_generation(self): config, input_speech, input_text = self.prepare_speech_and_text_input() + from transformers.testing_utils import set_config_for_less_flaky_test, set_model_for_less_flaky_test + set_config_for_less_flaky_test(config) + model = SeamlessM4Tv2Model(config=config) + set_model_for_less_flaky_test(model) + self.update_generation(model) model.save_pretrained(self.tmpdirname) model.to(torch_device) @@ -847,6 +852,8 @@ def test_speech_generation(self): state_dict = model.state_dict() text_model = SeamlessM4Tv2ForTextToSpeech.from_pretrained(self.tmpdirname) + set_model_for_less_flaky_test(text_model) + self.update_generation(text_model) text_model.to(torch_device) text_model.eval() @@ -854,6 +861,8 @@ def test_speech_generation(self): output_text = self.factory_generation_speech_test(model, input_text) speech_model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained(self.tmpdirname) + set_model_for_less_flaky_test(speech_model) + self.update_generation(speech_model) speech_model.to(torch_device) speech_model.eval() From c0ece15ca7fceb461b56604bdd5e6ebfc9aa8c05 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 16:24:03 +0100 Subject: [PATCH 5/9] fix --- .../models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index 01f513f97843..d7bae6562a0f 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -852,6 +852,9 @@ def test_speech_generation(self): state_dict = model.state_dict() text_model = SeamlessM4Tv2ForTextToSpeech.from_pretrained(self.tmpdirname) + # Even if this component is loaded after `model.save_pretrained` which is after + # `set_model_for_less_flaky_test(model)`, we still need to apply `set_model_for_less_flaky_test` here as the + # `eps` attribute in the model's norm layers is not set from the config. set_model_for_less_flaky_test(text_model) self.update_generation(text_model) @@ -861,6 +864,9 @@ def test_speech_generation(self): output_text = self.factory_generation_speech_test(model, input_text) speech_model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained(self.tmpdirname) + # Even if this component is loaded after `model.save_pretrained` which is after + # `set_model_for_less_flaky_test(model)`, we still need to apply `set_model_for_less_flaky_test` here as the + # `eps` attribute in the model's norm layers is not set from the config. set_model_for_less_flaky_test(speech_model) self.update_generation(speech_model) From d360907825d41cbf14aace572b7d83b2b292be87 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 16:25:41 +0100 Subject: [PATCH 6/9] fix --- tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index d7bae6562a0f..bba195ed1322 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -836,6 +836,7 @@ def test_speech_generation(self): config, input_speech, input_text = self.prepare_speech_and_text_input() from transformers.testing_utils import set_config_for_less_flaky_test, set_model_for_less_flaky_test + set_config_for_less_flaky_test(config) model = SeamlessM4Tv2Model(config=config) From 9911f647502c12fc64d93a625d415122b715d829 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Thu, 28 Nov 2024 16:44:34 +0100 Subject: [PATCH 7/9] fix --- .../test_modeling_musicgen_melody.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index bc8baa2746ad..98b554be65fb 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -41,6 +41,9 @@ require_torch_gpu, require_torch_sdpa, require_torchaudio, + set_config_for_less_flaky_test, + set_model_for_less_flaky_test, + set_model_tester_for_less_flaky_test, slow, torch_device, ) @@ -516,8 +519,11 @@ def test_eager_matches_sdpa_inference(self, torch_dtype: str): def get_mean_reldiff(failcase, x, ref, atol, rtol): return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" + set_model_tester_for_less_flaky_test(self) + for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + set_config_for_less_flaky_test(config) model = model_class(config) is_encoder_decoder = model.config.is_encoder_decoder @@ -534,6 +540,9 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol): ) model_eager = model_eager.eval().to(torch_device) + set_model_for_less_flaky_test(model_eager) + set_model_for_less_flaky_test(model_sdpa) + # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model, # but it would be nicer to have an efficient way to use parameterized.expand fail_cases = [] @@ -1528,8 +1537,11 @@ def test_eager_matches_sdpa_inference(self, torch_dtype: str): def get_mean_reldiff(failcase, x, ref, atol, rtol): return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" + set_model_tester_for_less_flaky_test(self) + for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + set_config_for_less_flaky_test(config) model = model_class(config) is_encoder_decoder = model.config.is_encoder_decoder @@ -1546,6 +1558,9 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol): ) model_eager = model_eager.eval().to(torch_device) + set_model_for_less_flaky_test(model_eager) + set_model_for_less_flaky_test(model_sdpa) + # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model, # but it would be nicer to have an efficient way to use parameterized.expand fail_cases = [] From a8eddaa36b95e3677c1e27b0e1181865080d9f27 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Tue, 10 Dec 2024 18:22:27 +0100 Subject: [PATCH 8/9] fix --- src/transformers/testing_utils.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index a4955ad8e9a5..aa078c3c40e3 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -1403,28 +1403,29 @@ def set_model_tester_for_less_flaky_test(test_case): def set_config_for_less_flaky_test(config): - config.rms_norm_eps = 1.0 - config.layer_norm_eps = 1.0 - config.norm_eps = 1.0 - config.norm_epsilon = 1.0 - config.layer_norm_epsilon = 1.0 + target_attrs = ["rms_norm_eps", "layer_norm_eps", "norm_eps", "norm_epsilon", "layer_norm_epsilon", "batch_norm_eps"] + for target_attr in target_attrs: + setattr(config, target_attr, 1.0) # norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance. # (We don't need the original epsilon values to check eager/sdpa matches) - for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]: + attrs = ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"] + for attr in attrs: if hasattr(config, attr): - getattr(config, attr).rms_norm_eps = 1.0 - getattr(config, attr).layer_norm_eps = 1.0 - getattr(config, attr).norm_eps = 1.0 - getattr(config, attr).norm_epsilon = 1.0 - getattr(config, attr).layer_norm_epsilon = 1.0 + for target_attr in target_attrs: + setattr(getattr(config, attr), target_attr, 1.0) def set_model_for_less_flaky_test(model): # Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) - for module in model.modules(): - if type(module).__name__ in ["GemmaRMSNorm", "LayerNorm", "GroupNorm"]: - module.eps = 1.0 + target_names = ("LayerNorm", "GroupNorm", "BatchNorm", "RMSNorm", "BatchNorm2d", "BatchNorm1d") + target_attrs = ["eps", "epsilon", "variance_epsilon"] + if is_torch_available() and isinstance(model, torch.nn.Module): + for module in model.modules(): + if type(module).__name__.endswith(target_names): + for attr in target_attrs: + if hasattr(module, attr): + setattr(module, attr, 1.0) class CaptureStd: From 39a8850658a2d54f78fc84763918e8776f4398ee Mon Sep 17 00:00:00 2001 From: ydshieh Date: Tue, 10 Dec 2024 18:24:34 +0100 Subject: [PATCH 9/9] fix --- src/transformers/testing_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index aa078c3c40e3..1c4c40212969 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -1403,7 +1403,14 @@ def set_model_tester_for_less_flaky_test(test_case): def set_config_for_less_flaky_test(config): - target_attrs = ["rms_norm_eps", "layer_norm_eps", "norm_eps", "norm_epsilon", "layer_norm_epsilon", "batch_norm_eps"] + target_attrs = [ + "rms_norm_eps", + "layer_norm_eps", + "norm_eps", + "norm_epsilon", + "layer_norm_epsilon", + "batch_norm_eps", + ] for target_attr in target_attrs: setattr(config, target_attr, 1.0)