From eb6141571771b79fa0d1a6eecb17d224acba5e81 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Mon, 15 Sep 2025 12:34:56 +0000 Subject: [PATCH 01/14] first commit --- tests/kernels/test_kernels.py | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tests/kernels/test_kernels.py diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py new file mode 100644 index 000000000000..3f706055e104 --- /dev/null +++ b/tests/kernels/test_kernels.py @@ -0,0 +1,51 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py + +import os +import tempfile +import textwrap + +from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available +from transformers.testing_utils import ( + TestCasePlus, + backend_device_count, + require_huggingface_hub_greater_or_equal, + require_torch_multi_accelerator, + torch_device, + torchrun, +) + + +if is_torch_available(): + import torch + + +class TestHubKernels(TestCasePlus): + def setUp(self): + self.model_id = "unsloth/Llama-3.2-1B-Instruct" + self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) + self.model = AutoModelForCausalLM.from_pretrained(self.model_id, use_kernels=True) + self.input = "Hello" + self.EXPECTED_OUTPUT = "Hello, how are you?" + + def test_forward(self): + tokenized_input = self.tokenizer(self.input, return_tensors="pt").input_ids.to(self.model.device) + output = self.model.generate(tokenized_input, max_new_tokens=10) + self.assertTrue(self.tokenizer.decode(output[0], skip_special_tokens=True) == self.EXPECTED_OUTPUT) + + def test_rmsnorm(self): + layer = self.model.model.layers[0].input_layernorm + print(layer) From 36ea8a220bb942520c82ed908c79201188e663e4 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Wed, 17 Sep 2025 08:45:26 +0000 Subject: [PATCH 02/14] add tests --- src/transformers/integrations/hub_kernels.py | 11 +- tests/kernels/test_kernels.py | 130 +++++++++++++++++-- 2 files changed, 125 insertions(+), 16 deletions(-) diff --git a/src/transformers/integrations/hub_kernels.py b/src/transformers/integrations/hub_kernels.py index 95ca49a74915..a64e156bacf4 100644 --- a/src/transformers/integrations/hub_kernels.py +++ b/src/transformers/integrations/hub_kernels.py @@ -51,10 +51,13 @@ ) }, "RMSNorm": { - "cuda": LayerRepository( - repo_id="kernels-community/liger_kernels", - layer_name="LigerRMSNorm", - ), + "cuda": { + Mode.INFERENCE: LayerRepository( + repo_id="kernels-community/liger_kernels", + layer_name="LigerRMSNorm", + # revision="pure-layer-test", + ), + }, "rocm": { Mode.INFERENCE: LayerRepository( repo_id="kernels-community/liger_kernels", diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 3f706055e104..173b832f3967 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -13,39 +13,145 @@ # limitations under the License. # Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py - -import os -import tempfile -import textwrap +import gc +import copy from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available from transformers.testing_utils import ( TestCasePlus, + backend_empty_cache, + require_kernels, + require_torch_accelerator, backend_device_count, require_huggingface_hub_greater_or_equal, require_torch_multi_accelerator, torch_device, torchrun, ) +from kernels import kernelize, Mode, Device if is_torch_available(): import torch +@require_kernels class TestHubKernels(TestCasePlus): def setUp(self): self.model_id = "unsloth/Llama-3.2-1B-Instruct" self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) - self.model = AutoModelForCausalLM.from_pretrained(self.model_id, use_kernels=True) + self.model_kernelized = AutoModelForCausalLM.from_pretrained( + self.model_id, use_kernels=True, device_map=torch_device + ) + self.model_not_kernelized = AutoModelForCausalLM.from_pretrained( + self.model_id, use_kernels=False, device_map=torch_device + ) self.input = "Hello" - self.EXPECTED_OUTPUT = "Hello, how are you?" + self.EXPECTED_OUTPUT = set() + self.EXPECTED_OUTPUT.add("Hello, I'm looking for a reliable and trustworthy online") + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + gc.collect() + @require_torch_accelerator def test_forward(self): - tokenized_input = self.tokenizer(self.input, return_tensors="pt").input_ids.to(self.model.device) - output = self.model.generate(tokenized_input, max_new_tokens=10) - self.assertTrue(self.tokenizer.decode(output[0], skip_special_tokens=True) == self.EXPECTED_OUTPUT) + tokenized_input = self.tokenizer(self.input, return_tensors="pt").input_ids.to(self.model_kernelized.device) + output_ = self.model_kernelized.generate(tokenized_input, max_new_tokens=10, do_sample=False) + output = self.tokenizer.decode(output_[0], skip_special_tokens=True) + self.assertTrue(output in self.EXPECTED_OUTPUT) + + def test_getter_use_kernels(self): + self.assertTrue(self.model_kernelized.use_kernels) + self.assertFalse(self.model_not_kernelized.use_kernels) + + def test_kernelized_forward_is_different(self, kernelized_model, not_kernelized_model): + """ + Iterate over modules and check if the forward method is different between + the kernelized and not kernelized models. Break on first difference, else continue. + Finally, assert that at least one forward is different. + """ + found_difference = False + for (name1, module1), (name2, module2) in zip( + kernelized_model.named_modules(), not_kernelized_model.named_modules() + ): + # Only compare modules with the same name + if name1 != name2: + continue + # Check if both modules have a 'forward' attribute + if hasattr(module1, "forward") and hasattr(module2, "forward"): + # Compare the code objects of the forward methods + code1 = getattr(module1.forward, "__code__", None) + code2 = getattr(module2.forward, "__code__", None) + if code1 is not None and code2 is not None: + if code1 is not code2: + found_difference = True + break + self.assertTrue( + found_difference, + "No module's forward method was different between kernelized and not kernelized models.", + ) + + def test_kernelized_forward_is_the_same(self, model_1, model_2): + """ + Iterate over modules and check if the forward method is the same between + the kernelized and not kernelized models. Break on first difference, else continue. + Finally, assert that at least one forward is the same. + """ + no_difference = True + for (name1, module1), (name2, module2) in zip(model_1.named_modules(), model_2.named_modules()): + # Only compare modules with the same name + if name1 != name2: + continue + # Check if both modules have a 'forward' attribute + if hasattr(module1, "forward") and hasattr(module2, "forward"): + # Compare the code objects of the forward methods + code1 = getattr(module1.forward, "__code__", None) + code2 = getattr(module2.forward, "__code__", None) + if code1 is not None and code2 is not None: + if code1 != code2: + no_difference = False + break + self.assertTrue( + no_difference, + "All module's forward methods were the same between the two models", + ) + + def test_kernelize(self): + model = copy.deepcopy(self.model_not_kernelized) + kernelize(model, mode=Mode.INFERENCE, device=Device(type=model.device.type)) + self.test_kernelized_forward_is_different(model, self.model_not_kernelized) + self.test_kernelized_forward_is_the_same(model, self.model_kernelized) + del model + + def test_setter_use_kernels(self): + model = copy.deepcopy(self.model_not_kernelized) + model.use_kernels = True + self.assertTrue(model.use_kernels) + self.test_kernelized_forward_is_different(model, self.model_not_kernelized) + self.test_kernelized_forward_is_the_same(model, self.model_kernelized) + del model + + def test_unkernelize(self): + model = copy.deepcopy(self.model_kernelized) + + with self.assertLogs("transformers.modeling_utils", level="WARNING") as cm: + model.use_kernels = False + + self.assertTrue( + any( + "Disabling kernels at runtime is a no-op as there is no 'unkernelize' routine; keeping current kernels active." + in msg + for msg in cm.output + ) + ) + + self.assertFalse(model.use_kernels) + + def test_rekernelize(self): + model = copy.deepcopy(self.model_kernelized) + model.train() + + - def test_rmsnorm(self): - layer = self.model.model.layers[0].input_layernorm - print(layer) From 6441a0db5f0ff3b16b4192b1afe689f1449ec680 Mon Sep 17 00:00:00 2001 From: medmekk Date: Thu, 2 Oct 2025 11:49:55 +0000 Subject: [PATCH 03/14] add kernel config --- tests/kernels/test_kernels.py | 53 ++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 173b832f3967..7ac0e08a9595 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -13,27 +13,22 @@ # limitations under the License. # Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py -import gc import copy +import gc -from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available +from transformers import AutoModelForCausalLM, AutoTokenizer, KernelConfig from transformers.testing_utils import ( TestCasePlus, backend_empty_cache, require_kernels, require_torch_accelerator, - backend_device_count, - require_huggingface_hub_greater_or_equal, - require_torch_multi_accelerator, torch_device, - torchrun, ) -from kernels import kernelize, Mode, Device - +from transformers.utils import is_kernels_available -if is_torch_available(): - import torch +if is_kernels_available(): + from kernels import Device, Mode, kernelize @require_kernels class TestHubKernels(TestCasePlus): @@ -47,8 +42,7 @@ def setUp(self): self.model_id, use_kernels=False, device_map=torch_device ) self.input = "Hello" - self.EXPECTED_OUTPUT = set() - self.EXPECTED_OUTPUT.add("Hello, I'm looking for a reliable and trustworthy online") + def tearDown(self): gc.collect() @@ -60,6 +54,10 @@ def test_forward(self): tokenized_input = self.tokenizer(self.input, return_tensors="pt").input_ids.to(self.model_kernelized.device) output_ = self.model_kernelized.generate(tokenized_input, max_new_tokens=10, do_sample=False) output = self.tokenizer.decode(output_[0], skip_special_tokens=True) + + self.EXPECTED_OUTPUT = set() + self.EXPECTED_OUTPUT.add("Hello, I'm looking for a reliable and trustworthy online") + self.assertTrue(output in self.EXPECTED_OUTPUT) def test_getter_use_kernels(self): @@ -149,9 +147,32 @@ def test_unkernelize(self): self.assertFalse(model.use_kernels) - def test_rekernelize(self): - model = copy.deepcopy(self.model_kernelized) - model.train() + def test_kernels_mapping(self): + kernel_config = KernelConfig(kernel_mapping={"RMSNorm": "kernels-community/layer_norm:LlamaRMSNorm"}) + model = AutoModelForCausalLM.from_pretrained( + "unsloth/Llama-3.2-1B-Instruct", use_kernels=True, device_map=torch_device, kernel_config=kernel_config + ) - + EXPECTED_OUTPUT = set() + EXPECTED_OUTPUT.add("Hello, I'm looking for a reliable and trustworthy online") + tokenized_input = self.tokenizer(self.input, return_tensors="pt").input_ids.to(model.device) + output = model.generate(tokenized_input, max_new_tokens=10, do_sample=False) + output = self.tokenizer.decode(output[0], skip_special_tokens=True) + self.assertTrue(output in EXPECTED_OUTPUT) + + del model + + def test_faulty_kernel_mapping_layer_name(self): + kernel_config = KernelConfig(kernel_mapping={"RMSNorm1": "kernels-community/layer_norm:LlamaRMSNorm"}) + with self.assertRaises(ValueError): + _ = AutoModelForCausalLM.from_pretrained( + "unsloth/Llama-3.2-1B-Instruct", use_kernels=True, device_map=torch_device, kernel_config=kernel_config + ) + + def test_faulty_kernel_mapping_type(self): + kernel_config = KernelConfig(kernel_mapping={"RMSNorm": 1}) + with self.assertRaises(ValueError): + _ = AutoModelForCausalLM.from_pretrained( + "unsloth/Llama-3.2-1B-Instruct", use_kernels=True, device_map=torch_device, kernel_config=kernel_config + ) From b2a6e255892a57a28b8ccc5ee942178ddc6fb09e Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 09:20:42 +0000 Subject: [PATCH 04/14] add more tests --- tests/kernels/test_kernels.py | 170 +++++++++++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 1 deletion(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 7ac0e08a9595..cad0cf70e446 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -15,8 +15,19 @@ # Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py import copy import gc +import types +import unittest +from unittest.mock import patch from transformers import AutoModelForCausalLM, AutoTokenizer, KernelConfig +from transformers.integrations.hub_kernels import ( + _KERNEL_MODULE_MAPPING, + _HUB_KERNEL_MAPPING, + is_kernel, + lazy_load_kernel, + load_and_register_attn_kernel, +) +from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS from transformers.testing_utils import ( TestCasePlus, backend_empty_cache, @@ -28,8 +39,10 @@ if is_kernels_available(): + import kernels as kernels_pkg from kernels import Device, Mode, kernelize + @require_kernels class TestHubKernels(TestCasePlus): def setUp(self): @@ -43,7 +56,6 @@ def setUp(self): ) self.input = "Hello" - def tearDown(self): gc.collect() backend_empty_cache(torch_device) @@ -146,6 +158,7 @@ def test_unkernelize(self): ) self.assertFalse(model.use_kernels) + del model def test_kernels_mapping(self): kernel_config = KernelConfig(kernel_mapping={"RMSNorm": "kernels-community/layer_norm:LlamaRMSNorm"}) @@ -176,3 +189,158 @@ def test_faulty_kernel_mapping_type(self): _ = AutoModelForCausalLM.from_pretrained( "unsloth/Llama-3.2-1B-Instruct", use_kernels=True, device_map=torch_device, kernel_config=kernel_config ) + + +@require_kernels +class TestKernelUtilities(TestCasePlus): + def test_is_kernel_regex(self): + valid = [ + "org/model", + "org/model@main", + "org/model:my_func", + "org/model@v1.2.3:my_func", + "flash|org/model@rev:fn", + ] + invalid = [ + "org//model", + "org/model:too:many", + "org/model@rev:fn:extra", + "/org/model", + "org:model", + ] + for s in valid: + self.assertTrue(is_kernel(s.split("|")[-1])) + for s in invalid: + self.assertFalse(is_kernel(s)) + + def test_lazy_load_kernel_success_and_cache(self): + sentinel = types.SimpleNamespace(name="sentinel") + + original_get_kernel = getattr(kernels_pkg, "get_kernel") + try: + + def fake_get_kernel(repo_id, revision=None): + self.assertIn(repo_id, {"kernels-community/causal-conv1d"}) + return sentinel + + setattr(kernels_pkg, "get_kernel", fake_get_kernel) + _KERNEL_MODULE_MAPPING.pop("causal-conv1d", None) + + mod1 = lazy_load_kernel("causal-conv1d") + self.assertIs(mod1, sentinel) + mod2 = lazy_load_kernel("causal-conv1d") + self.assertIs(mod2, sentinel) + finally: + setattr(kernels_pkg, "get_kernel", original_get_kernel) + + def test_lazy_load_kernel_unknown(self): + name = "unknown-kernel-name" + _KERNEL_MODULE_MAPPING.pop(name, None) + mod = lazy_load_kernel(name) + self.assertIsNone(mod) + self.assertIn(name, _KERNEL_MODULE_MAPPING) + + def test_lazy_load_kernel_version(self): + HUB = _HUB_KERNEL_MAPPING + name = "causal-conv1d" + version_spec = ">=0.0.4,<0.1.0" + original_get_kernel = getattr(kernels_pkg, "get_kernel") + original_entry = HUB.get(name, None) + + # Use a real ModuleType so caching short-circuits on the second call + sentinel_mod = types.ModuleType("sentinel_kernel_module") + call_count = {"n": 0} + + try: + # Inject dict-style mapping with repo_id and version + HUB[name] = {"repo_id": "kernels-community/causal-conv1d", "version": version_spec} + _KERNEL_MODULE_MAPPING.pop(name, None) + + def fake_get_kernel(repo_id, revision=None, version=None, user_agent=None): + call_count["n"] += 1 + self.assertEqual(repo_id, "kernels-community/causal-conv1d") + self.assertIsNone(revision, "revision must not be set when version is provided") + self.assertEqual(version, version_spec) + return sentinel_mod + + # Patch kernels.get_kernel so lazy_load_kernel picks it up on import + setattr(kernels_pkg, "get_kernel", fake_get_kernel) + + # Act + mod1 = lazy_load_kernel(name) + mod2 = lazy_load_kernel(name) + + # Assert + self.assertIs(mod1, sentinel_mod) + self.assertIs(mod2, sentinel_mod) + self.assertEqual(call_count["n"], 1, "second call should hit the cache") + finally: + # Restore patched function and mapping to avoid side effects + setattr(kernels_pkg, "get_kernel", original_get_kernel) + if original_entry is None: + HUB.pop(name, None) + else: + HUB[name] = original_entry + _KERNEL_MODULE_MAPPING.pop(name, None) + + +@require_kernels +class TestAttentionKernelRegistration(TestCasePlus): + def test_load_and_register_flash_attn_like_kernel(self): + kernel_obj = types.SimpleNamespace(flash_attn_varlen_func=lambda *a, **k: None) + + with ( + patch("transformers.integrations.hub_kernels.get_kernel", return_value=kernel_obj), + patch("transformers.integrations.hub_kernels.lazy_import_flash_attention", return_value=None), + ): + attn_impl = "org/model" + load_and_register_attn_kernel(attn_impl) + self.assertIn(attn_impl, ALL_ATTENTION_FUNCTIONS.valid_keys()) + + def test_load_and_register_named_function_kernel(self): + def my_attention(*args, **kwargs): + return None + + kernel_obj = types.SimpleNamespace(my_func=my_attention) + with patch("transformers.integrations.hub_kernels.get_kernel", return_value=kernel_obj): + attn_impl = "org/model:my_func" + load_and_register_attn_kernel(attn_impl) + self.assertIn(attn_impl, ALL_ATTENTION_FUNCTIONS.valid_keys()) + + +@require_kernels +class TestUseKernelsLifecycle(TestCasePlus): + def setUp(self): + self.model_id = "unsloth/Llama-3.2-1B-Instruct" + self.model = AutoModelForCausalLM.from_pretrained(self.model_id, use_kernels=False, device_map=torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + gc.collect() + + def test_setting_use_kernels_twice_does_not_rekernelize(self): + call_count = {"n": 0} + + def spy_kernelize(*args, **kwargs): + call_count["n"] += 1 + + with patch.object(kernels_pkg, "kernelize", side_effect=spy_kernelize): + self.model.use_kernels = True + self.assertTrue(self.model.use_kernels) + self.assertEqual(call_count["n"], 1) + self.model.use_kernels = True + self.assertEqual(call_count["n"], 1) + + def test_train_eval_calls_kernelize_with_correct_mode(self): + last_modes = [] + + def spy_kernelize(model, device=None, mode=None): + last_modes.append(mode) + + with patch.object(kernels_pkg, "kernelize", side_effect=spy_kernelize): + self.model.use_kernels = True + self.model.train(True) + self.assertTrue(any(m == Mode.TRAINING for m in last_modes)) + self.model.eval() + self.assertTrue(any(m == Mode.INFERENCE for m in last_modes)) From d671ef569d78f5f249e434740b68c0b78092de67 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 10:40:23 +0000 Subject: [PATCH 05/14] add ci --- .github/workflows/self-scheduled-caller.yml | 142 +++++++++++--------- .github/workflows/self-scheduled.yml | 63 +++++++++ tests/kernels/test_kernels.py | 1 - utils/notification_service.py | 4 + 4 files changed, 144 insertions(+), 66 deletions(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 01f5a0a48bdd..a938e2507a71 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -6,7 +6,7 @@ on: - cron: "17 2 * * *" push: branches: - - run_nvidia_ci* + - adding_kernels_tests workflow_dispatch: inputs: prev_workflow_run_id: @@ -44,77 +44,89 @@ jobs: name: setup_values path: setup_values - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-models" - docker: huggingface/transformers-all-latest-gpu - ci_event: Daily CI - runner_type: "a10" - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # model-ci: + # name: Model CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_models_gpu + # slack_report_channel: "#transformers-ci-daily-models" + # docker: huggingface/transformers-all-latest-gpu + # ci_event: Daily CI + # runner_type: "a10" + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-torch" - docker: huggingface/transformers-pytorch-gpu - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # torch-pipeline: + # name: Torch pipeline CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_pipelines_torch_gpu + # slack_report_channel: "#transformers-ci-daily-pipeline-torch" + # docker: huggingface/transformers-pytorch-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-examples" - docker: huggingface/transformers-all-latest-gpu - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # example-ci: + # name: Example CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_examples_gpu + # slack_report_channel: "#transformers-ci-daily-examples" + # docker: huggingface/transformers-all-latest-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - trainer-fsdp-ci: - name: Trainer/FSDP CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_trainer_and_fsdp_gpu - slack_report_channel: "#transformers-ci-daily-training" - docker: huggingface/transformers-all-latest-gpu - runner_type: "a10" - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # trainer-fsdp-ci: + # name: Trainer/FSDP CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_trainer_and_fsdp_gpu + # slack_report_channel: "#transformers-ci-daily-training" + # docker: huggingface/transformers-all-latest-gpu + # runner_type: "a10" + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - deepspeed-ci: - name: DeepSpeed CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-training" - docker: huggingface/transformers-pytorch-deepspeed-latest-gpu - ci_event: Daily CI - working-directory-prefix: /workspace - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # deepspeed-ci: + # name: DeepSpeed CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_torch_cuda_extensions_gpu + # slack_report_channel: "#transformers-ci-daily-training" + # docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + # ci_event: Daily CI + # working-directory-prefix: /workspace + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit + + # quantization-ci: + # name: Quantization CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_quantization_torch_gpu + # slack_report_channel: "#transformers-ci-daily-quantization" + # docker: huggingface/transformers-quantization-latest-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - quantization-ci: - name: Quantization CI + kernels-ci: + name: Kernels CI uses: ./.github/workflows/self-scheduled.yml with: - job: run_quantization_torch_gpu - slack_report_channel: "#transformers-ci-daily-quantization" - docker: huggingface/transformers-quantization-latest-gpu + job: run_kernels_gpu + slack_report_channel: "#transformers-ci-daily-kernels" + docker: huggingface/transformers-all-latest-gpu ci_event: Daily CI report_repo_id: hf-internal-testing/transformers_daily_ci commit_sha: ${{ github.sha }} - secrets: inherit + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 070662293770..b517d1ef3e1c 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -463,6 +463,69 @@ jobs: name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports + run_kernels_gpu: + if: ${{ inputs.job == 'run_kernels_gpu' }} + name: Kernel tests + strategy: + fail-fast: false + matrix: + machine_type: [aws-g5-4xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' + container: + image: ${{ inputs.docker }} + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }} + + - name: Reinstall transformers in edit mode + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[testing] + + - name: NVIDIA-SMI + run: nvidia-smi + + - name: Environment + working-directory: /transformers + run: python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then + machine_type=single-gpu + else + machine_type=${{ matrix.machine_type }} + fi + echo "machine_type=$machine_type" >> $GITHUB_ENV + + - name: Install kernels + working-directory: /transformers + run: python3 -m pip install -U kernels + + - name: Run kernel tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_kernels_gpu_test_reports tests/kernels/test_kernels.py + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports/failures_short.txt + + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_kernels_gpu_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ env.machine_type }}_run_kernels_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports run_extract_warnings: # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. if: ${{ always() && inputs.job == 'run_models_gpu' }} diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index cad0cf70e446..5a2925dfd788 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -282,7 +282,6 @@ def fake_get_kernel(repo_id, revision=None, version=None, user_agent=None): else: HUB[name] = original_entry _KERNEL_MODULE_MAPPING.pop(name, None) - @require_kernels class TestAttentionKernelRegistration(TestCasePlus): diff --git a/utils/notification_service.py b/utils/notification_service.py index 258ccc2772db..64d1fc28e7f3 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -40,6 +40,7 @@ "run_examples_gpu": "Examples directory", "run_torch_cuda_extensions_gpu": "DeepSpeed", "run_quantization_torch_gpu": "Quantization", + "run_kernels_gpu": "Kernels", } # The values are used as the file names where to save the corresponding CI job results. @@ -50,6 +51,7 @@ "Examples directory": "example", "DeepSpeed": "deepspeed", "Quantization": "quantization", + "Kernels": "kernels", } NON_MODEL_TEST_MODULES = [ @@ -65,6 +67,7 @@ "utils", "fsdp", "quantization", + "kernels", ] @@ -1301,6 +1304,7 @@ def pop_default(l: list[Any], i: int, default: Any) -> Any: "PyTorch pipelines": "run_pipelines_torch_gpu_test_reports", "Examples directory": "run_examples_gpu_test_reports", "DeepSpeed": "run_torch_cuda_extensions_gpu_test_reports", + "Kernels": "run_kernels_gpu_test_reports", } if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"): From d9dfd7f94ef023648bc19469f367e33b21c8edd6 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 10:45:06 +0000 Subject: [PATCH 06/14] small fix --- .github/workflows/self-scheduled.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index b517d1ef3e1c..69abb39a2a35 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -526,6 +526,7 @@ jobs: with: name: ${{ env.machine_type }}_run_kernels_gpu_test_reports path: /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports + run_extract_warnings: # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. if: ${{ always() && inputs.job == 'run_models_gpu' }} @@ -578,6 +579,7 @@ jobs: run_examples_gpu, run_torch_cuda_extensions_gpu, run_quantization_torch_gpu, + run_kernels_gpu, run_extract_warnings ] if: always() && !cancelled() From 32310d8ff0a4fa69d815c1b863195f43b5cf1d52 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 11:03:52 +0000 Subject: [PATCH 07/14] change branch name --- .github/workflows/self-scheduled-caller.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index a938e2507a71..fe4effeb9d42 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -6,7 +6,7 @@ on: - cron: "17 2 * * *" push: branches: - - adding_kernels_tests + - add_tests_for_kernels workflow_dispatch: inputs: prev_workflow_run_id: From 4eda7fe58431319935e1d721f5ff236f8b2b0905 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 11:17:57 +0000 Subject: [PATCH 08/14] update tests --- tests/kernels/test_kernels.py | 61 ++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 5a2925dfd788..3e1ec6c2be9c 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -16,17 +16,17 @@ import copy import gc import types -import unittest from unittest.mock import patch from transformers import AutoModelForCausalLM, AutoTokenizer, KernelConfig from transformers.integrations.hub_kernels import ( - _KERNEL_MODULE_MAPPING, _HUB_KERNEL_MAPPING, + _KERNEL_MODULE_MAPPING, is_kernel, lazy_load_kernel, load_and_register_attn_kernel, ) +from transformers.masking_utils import ALL_MASK_ATTENTION_FUNCTIONS from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS from transformers.testing_utils import ( TestCasePlus, @@ -57,6 +57,29 @@ def setUp(self): self.input = "Hello" def tearDown(self): + # Delete large objects to drop references early + for attr in [ + "model_kernelized", + "model_not_kernelized", + "tokenizer", + ]: + if hasattr(self, attr): + try: + delattr(self, attr) + except Exception: + pass + + # Clear any temporary kernel module cache entries populated by tests + try: + keys_to_remove = [ + k for k, v in list(_KERNEL_MODULE_MAPPING.items()) if v is None or isinstance(v, types.ModuleType) + ] + for k in keys_to_remove: + _KERNEL_MODULE_MAPPING.pop(k, None) + except Exception: + pass + + # Free accelerator memory/cache and trigger GC gc.collect() backend_empty_cache(torch_device) gc.collect() @@ -130,7 +153,7 @@ def test_kernelized_forward_is_the_same(self, model_1, model_2): def test_kernelize(self): model = copy.deepcopy(self.model_not_kernelized) - kernelize(model, mode=Mode.INFERENCE, device=Device(type=model.device.type)) + kernelize(model, mode=Mode.INFERENCE, device=Device(type=model.device.type)) # type: ignore[arg-type] self.test_kernelized_forward_is_different(model, self.model_not_kernelized) self.test_kernelized_forward_is_the_same(model, self.model_kernelized) del model @@ -232,6 +255,8 @@ def fake_get_kernel(repo_id, revision=None): self.assertIs(mod2, sentinel) finally: setattr(kernels_pkg, "get_kernel", original_get_kernel) + # Ensure cache is cleared to avoid holding onto module references across tests + _KERNEL_MODULE_MAPPING.pop("causal-conv1d", None) def test_lazy_load_kernel_unknown(self): name = "unknown-kernel-name" @@ -239,6 +264,8 @@ def test_lazy_load_kernel_unknown(self): mod = lazy_load_kernel(name) self.assertIsNone(mod) self.assertIn(name, _KERNEL_MODULE_MAPPING) + # Cleanup cache entry to avoid growth across tests + _KERNEL_MODULE_MAPPING.pop(name, None) def test_lazy_load_kernel_version(self): HUB = _HUB_KERNEL_MAPPING @@ -253,7 +280,7 @@ def test_lazy_load_kernel_version(self): try: # Inject dict-style mapping with repo_id and version - HUB[name] = {"repo_id": "kernels-community/causal-conv1d", "version": version_spec} + HUB[name] = {"repo_id": "kernels-community/causal-conv1d", "version": version_spec} # type: ignore[assignment] _KERNEL_MODULE_MAPPING.pop(name, None) def fake_get_kernel(repo_id, revision=None, version=None, user_agent=None): @@ -283,6 +310,7 @@ def fake_get_kernel(repo_id, revision=None, version=None, user_agent=None): HUB[name] = original_entry _KERNEL_MODULE_MAPPING.pop(name, None) + @require_kernels class TestAttentionKernelRegistration(TestCasePlus): def test_load_and_register_flash_attn_like_kernel(self): @@ -295,6 +323,15 @@ def test_load_and_register_flash_attn_like_kernel(self): attn_impl = "org/model" load_and_register_attn_kernel(attn_impl) self.assertIn(attn_impl, ALL_ATTENTION_FUNCTIONS.valid_keys()) + # Cleanup registration to avoid leaking functions across tests + try: + ALL_ATTENTION_FUNCTIONS.pop(attn_impl, None) + except Exception: + pass + try: + ALL_MASK_ATTENTION_FUNCTIONS.pop(attn_impl, None) + except Exception: + pass def test_load_and_register_named_function_kernel(self): def my_attention(*args, **kwargs): @@ -305,6 +342,15 @@ def my_attention(*args, **kwargs): attn_impl = "org/model:my_func" load_and_register_attn_kernel(attn_impl) self.assertIn(attn_impl, ALL_ATTENTION_FUNCTIONS.valid_keys()) + # Cleanup registration to avoid leaking functions across tests + try: + ALL_ATTENTION_FUNCTIONS.pop(attn_impl, None) + except Exception: + pass + try: + ALL_MASK_ATTENTION_FUNCTIONS.pop(attn_impl, None) + except Exception: + pass @require_kernels @@ -314,6 +360,13 @@ def setUp(self): self.model = AutoModelForCausalLM.from_pretrained(self.model_id, use_kernels=False, device_map=torch_device) def tearDown(self): + # Delete large objects to drop references early + if hasattr(self, "model"): + try: + del self.model + except Exception: + pass + # Free accelerator memory/cache and trigger GC gc.collect() backend_empty_cache(torch_device) gc.collect() From b5278150e0d30545dfb94f9fab495409f8394121 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 11:35:38 +0000 Subject: [PATCH 09/14] nit --- tests/kernels/test_kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 3e1ec6c2be9c..b8bc67f22ff8 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -242,7 +242,7 @@ def test_lazy_load_kernel_success_and_cache(self): original_get_kernel = getattr(kernels_pkg, "get_kernel") try: - def fake_get_kernel(repo_id, revision=None): + def fake_get_kernel(repo_id, revision=None, version=None): self.assertIn(repo_id, {"kernels-community/causal-conv1d"}) return sentinel From 2a680e599cbd545778423debbb86c1ba82311337 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 11:44:25 +0000 Subject: [PATCH 10/14] change test name --- tests/kernels/test_kernels.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index b8bc67f22ff8..f9559c9d9e35 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -99,7 +99,7 @@ def test_getter_use_kernels(self): self.assertTrue(self.model_kernelized.use_kernels) self.assertFalse(self.model_not_kernelized.use_kernels) - def test_kernelized_forward_is_different(self, kernelized_model, not_kernelized_model): + def assert_kernelized_forward_is_different(self, kernelized_model, not_kernelized_model): """ Iterate over modules and check if the forward method is different between the kernelized and not kernelized models. Break on first difference, else continue. @@ -126,7 +126,7 @@ def test_kernelized_forward_is_different(self, kernelized_model, not_kernelized_ "No module's forward method was different between kernelized and not kernelized models.", ) - def test_kernelized_forward_is_the_same(self, model_1, model_2): + def assert_kernelized_forward_is_the_same(self, model_1, model_2): """ Iterate over modules and check if the forward method is the same between the kernelized and not kernelized models. Break on first difference, else continue. @@ -154,16 +154,16 @@ def test_kernelized_forward_is_the_same(self, model_1, model_2): def test_kernelize(self): model = copy.deepcopy(self.model_not_kernelized) kernelize(model, mode=Mode.INFERENCE, device=Device(type=model.device.type)) # type: ignore[arg-type] - self.test_kernelized_forward_is_different(model, self.model_not_kernelized) - self.test_kernelized_forward_is_the_same(model, self.model_kernelized) + self.assert_kernelized_forward_is_different(model, self.model_not_kernelized) + self.assert_kernelized_forward_is_the_same(model, self.model_kernelized) del model def test_setter_use_kernels(self): model = copy.deepcopy(self.model_not_kernelized) model.use_kernels = True self.assertTrue(model.use_kernels) - self.test_kernelized_forward_is_different(model, self.model_not_kernelized) - self.test_kernelized_forward_is_the_same(model, self.model_kernelized) + self.assert_kernelized_forward_is_different(model, self.model_not_kernelized) + self.assert_kernelized_forward_is_the_same(model, self.model_kernelized) del model def test_unkernelize(self): From fcfd640428ecacbf6c8bd0512937fb71c94a273a Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 11:51:26 +0000 Subject: [PATCH 11/14] revert jobs --- .github/workflows/self-scheduled-caller.yml | 140 ++++++++++---------- tests/kernels/test_kernels.py | 4 +- 2 files changed, 73 insertions(+), 71 deletions(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index fe4effeb9d42..a0ff8fd33cb1 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -6,7 +6,7 @@ on: - cron: "17 2 * * *" push: branches: - - add_tests_for_kernels + - run_nvidia_ci* workflow_dispatch: inputs: prev_workflow_run_id: @@ -44,80 +44,80 @@ jobs: name: setup_values path: setup_values - # model-ci: - # name: Model CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_models_gpu - # slack_report_channel: "#transformers-ci-daily-models" - # docker: huggingface/transformers-all-latest-gpu - # ci_event: Daily CI - # runner_type: "a10" - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + model-ci: + name: Model CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-daily-models" + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI + runner_type: "a10" + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # torch-pipeline: - # name: Torch pipeline CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_pipelines_torch_gpu - # slack_report_channel: "#transformers-ci-daily-pipeline-torch" - # docker: huggingface/transformers-pytorch-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-pipeline-torch" + docker: huggingface/transformers-pytorch-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # example-ci: - # name: Example CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_examples_gpu - # slack_report_channel: "#transformers-ci-daily-examples" - # docker: huggingface/transformers-all-latest-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-examples" + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # trainer-fsdp-ci: - # name: Trainer/FSDP CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_trainer_and_fsdp_gpu - # slack_report_channel: "#transformers-ci-daily-training" - # docker: huggingface/transformers-all-latest-gpu - # runner_type: "a10" - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + trainer-fsdp-ci: + name: Trainer/FSDP CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_trainer_and_fsdp_gpu + slack_report_channel: "#transformers-ci-daily-training" + docker: huggingface/transformers-all-latest-gpu + runner_type: "a10" + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # deepspeed-ci: - # name: DeepSpeed CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_torch_cuda_extensions_gpu - # slack_report_channel: "#transformers-ci-daily-training" - # docker: huggingface/transformers-pytorch-deepspeed-latest-gpu - # ci_event: Daily CI - # working-directory-prefix: /workspace - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-training" + docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + ci_event: Daily CI + working-directory-prefix: /workspace + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # quantization-ci: - # name: Quantization CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_quantization_torch_gpu - # slack_report_channel: "#transformers-ci-daily-quantization" - # docker: huggingface/transformers-quantization-latest-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + quantization-ci: + name: Quantization CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_quantization_torch_gpu + slack_report_channel: "#transformers-ci-daily-quantization" + docker: huggingface/transformers-quantization-latest-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit kernels-ci: name: Kernels CI diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index f9559c9d9e35..3f369a579bbc 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py +# Run the test: CUDA_VISIBLE_DEVICES=0 RUN_SLOW=1 pytest -sv tests/kernels/test_kernels.py + + import copy import gc import types From a09d2cbe7f01b42cdbbf8c873bdefe478073b863 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 14:52:35 +0000 Subject: [PATCH 12/14] addressing review --- .github/workflows/self-scheduled-caller.yml | 140 ++++++++++---------- .github/workflows/self-scheduled.yml | 12 +- tests/kernels/test_kernels.py | 30 ++--- 3 files changed, 90 insertions(+), 92 deletions(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index a0ff8fd33cb1..fe4effeb9d42 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -6,7 +6,7 @@ on: - cron: "17 2 * * *" push: branches: - - run_nvidia_ci* + - add_tests_for_kernels workflow_dispatch: inputs: prev_workflow_run_id: @@ -44,80 +44,80 @@ jobs: name: setup_values path: setup_values - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-models" - docker: huggingface/transformers-all-latest-gpu - ci_event: Daily CI - runner_type: "a10" - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # model-ci: + # name: Model CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_models_gpu + # slack_report_channel: "#transformers-ci-daily-models" + # docker: huggingface/transformers-all-latest-gpu + # ci_event: Daily CI + # runner_type: "a10" + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-torch" - docker: huggingface/transformers-pytorch-gpu - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # torch-pipeline: + # name: Torch pipeline CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_pipelines_torch_gpu + # slack_report_channel: "#transformers-ci-daily-pipeline-torch" + # docker: huggingface/transformers-pytorch-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-examples" - docker: huggingface/transformers-all-latest-gpu - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # example-ci: + # name: Example CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_examples_gpu + # slack_report_channel: "#transformers-ci-daily-examples" + # docker: huggingface/transformers-all-latest-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - trainer-fsdp-ci: - name: Trainer/FSDP CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_trainer_and_fsdp_gpu - slack_report_channel: "#transformers-ci-daily-training" - docker: huggingface/transformers-all-latest-gpu - runner_type: "a10" - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # trainer-fsdp-ci: + # name: Trainer/FSDP CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_trainer_and_fsdp_gpu + # slack_report_channel: "#transformers-ci-daily-training" + # docker: huggingface/transformers-all-latest-gpu + # runner_type: "a10" + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - deepspeed-ci: - name: DeepSpeed CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-training" - docker: huggingface/transformers-pytorch-deepspeed-latest-gpu - ci_event: Daily CI - working-directory-prefix: /workspace - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # deepspeed-ci: + # name: DeepSpeed CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_torch_cuda_extensions_gpu + # slack_report_channel: "#transformers-ci-daily-training" + # docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + # ci_event: Daily CI + # working-directory-prefix: /workspace + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit - quantization-ci: - name: Quantization CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_quantization_torch_gpu - slack_report_channel: "#transformers-ci-daily-quantization" - docker: huggingface/transformers-quantization-latest-gpu - ci_event: Daily CI - report_repo_id: hf-internal-testing/transformers_daily_ci - commit_sha: ${{ github.sha }} - secrets: inherit + # quantization-ci: + # name: Quantization CI + # uses: ./.github/workflows/self-scheduled.yml + # with: + # job: run_quantization_torch_gpu + # slack_report_channel: "#transformers-ci-daily-quantization" + # docker: huggingface/transformers-quantization-latest-gpu + # ci_event: Daily CI + # report_repo_id: hf-internal-testing/transformers_daily_ci + # commit_sha: ${{ github.sha }} + # secrets: inherit kernels-ci: name: Kernels CI diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 69abb39a2a35..f95e4a10f508 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -483,7 +483,11 @@ jobs: - name: Reinstall transformers in edit mode working-directory: /transformers run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[testing] - + + - name: Install kernels + working-directory: /transformers + run: python3 -m pip install -U kernels + - name: NVIDIA-SMI run: nvidia-smi @@ -505,11 +509,7 @@ jobs: machine_type=${{ matrix.machine_type }} fi echo "machine_type=$machine_type" >> $GITHUB_ENV - - - name: Install kernels - working-directory: /transformers - run: python3 -m pip install -U kernels - + - name: Run kernel tests on GPU working-directory: /transformers run: | diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 3f369a579bbc..57c6c75986c8 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -16,7 +16,6 @@ import copy -import gc import types from unittest.mock import patch @@ -32,9 +31,10 @@ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS from transformers.testing_utils import ( TestCasePlus, - backend_empty_cache, + cleanup, require_kernels, require_torch_accelerator, + slow, torch_device, ) from transformers.utils import is_kernels_available @@ -46,17 +46,19 @@ @require_kernels +@slow class TestHubKernels(TestCasePlus): - def setUp(self): - self.model_id = "unsloth/Llama-3.2-1B-Instruct" - self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) - self.model_kernelized = AutoModelForCausalLM.from_pretrained( - self.model_id, use_kernels=True, device_map=torch_device + @classmethod + def setUpClass(cls): + cls.model_id = "unsloth/Llama-3.2-1B-Instruct" + cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_id) + cls.model_kernelized = AutoModelForCausalLM.from_pretrained( + cls.model_id, use_kernels=True, device_map=torch_device ) - self.model_not_kernelized = AutoModelForCausalLM.from_pretrained( - self.model_id, use_kernels=False, device_map=torch_device + cls.model_not_kernelized = AutoModelForCausalLM.from_pretrained( + cls.model_id, use_kernels=False, device_map=torch_device ) - self.input = "Hello" + cls.input = "Hello" def tearDown(self): # Delete large objects to drop references early @@ -82,9 +84,7 @@ def tearDown(self): pass # Free accelerator memory/cache and trigger GC - gc.collect() - backend_empty_cache(torch_device) - gc.collect() + cleanup(torch_device, gc_collect=True) @require_torch_accelerator def test_forward(self): @@ -369,9 +369,7 @@ def tearDown(self): except Exception: pass # Free accelerator memory/cache and trigger GC - gc.collect() - backend_empty_cache(torch_device) - gc.collect() + cleanup(torch_device, gc_collect=True) def test_setting_use_kernels_twice_does_not_rekernelize(self): call_count = {"n": 0} From 6d64718c0e35703b4b87aa067ef030a8857b57d3 Mon Sep 17 00:00:00 2001 From: medmekk Date: Tue, 21 Oct 2025 15:02:24 +0000 Subject: [PATCH 13/14] reenable all jobs --- .github/workflows/self-scheduled-caller.yml | 140 ++++++++++---------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index fe4effeb9d42..a0ff8fd33cb1 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -6,7 +6,7 @@ on: - cron: "17 2 * * *" push: branches: - - add_tests_for_kernels + - run_nvidia_ci* workflow_dispatch: inputs: prev_workflow_run_id: @@ -44,80 +44,80 @@ jobs: name: setup_values path: setup_values - # model-ci: - # name: Model CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_models_gpu - # slack_report_channel: "#transformers-ci-daily-models" - # docker: huggingface/transformers-all-latest-gpu - # ci_event: Daily CI - # runner_type: "a10" - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + model-ci: + name: Model CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-daily-models" + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI + runner_type: "a10" + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # torch-pipeline: - # name: Torch pipeline CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_pipelines_torch_gpu - # slack_report_channel: "#transformers-ci-daily-pipeline-torch" - # docker: huggingface/transformers-pytorch-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-pipeline-torch" + docker: huggingface/transformers-pytorch-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # example-ci: - # name: Example CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_examples_gpu - # slack_report_channel: "#transformers-ci-daily-examples" - # docker: huggingface/transformers-all-latest-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-examples" + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # trainer-fsdp-ci: - # name: Trainer/FSDP CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_trainer_and_fsdp_gpu - # slack_report_channel: "#transformers-ci-daily-training" - # docker: huggingface/transformers-all-latest-gpu - # runner_type: "a10" - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + trainer-fsdp-ci: + name: Trainer/FSDP CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_trainer_and_fsdp_gpu + slack_report_channel: "#transformers-ci-daily-training" + docker: huggingface/transformers-all-latest-gpu + runner_type: "a10" + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # deepspeed-ci: - # name: DeepSpeed CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_torch_cuda_extensions_gpu - # slack_report_channel: "#transformers-ci-daily-training" - # docker: huggingface/transformers-pytorch-deepspeed-latest-gpu - # ci_event: Daily CI - # working-directory-prefix: /workspace - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-training" + docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + ci_event: Daily CI + working-directory-prefix: /workspace + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit - # quantization-ci: - # name: Quantization CI - # uses: ./.github/workflows/self-scheduled.yml - # with: - # job: run_quantization_torch_gpu - # slack_report_channel: "#transformers-ci-daily-quantization" - # docker: huggingface/transformers-quantization-latest-gpu - # ci_event: Daily CI - # report_repo_id: hf-internal-testing/transformers_daily_ci - # commit_sha: ${{ github.sha }} - # secrets: inherit + quantization-ci: + name: Quantization CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_quantization_torch_gpu + slack_report_channel: "#transformers-ci-daily-quantization" + docker: huggingface/transformers-quantization-latest-gpu + ci_event: Daily CI + report_repo_id: hf-internal-testing/transformers_daily_ci + commit_sha: ${{ github.sha }} + secrets: inherit kernels-ci: name: Kernels CI From 24ae78111b98586cd7af30fa6961969488a98dd7 Mon Sep 17 00:00:00 2001 From: medmekk Date: Thu, 23 Oct 2025 08:16:40 +0000 Subject: [PATCH 14/14] address second review --- tests/kernels/test_kernels.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/kernels/test_kernels.py b/tests/kernels/test_kernels.py index 57c6c75986c8..6311629ac4f2 100644 --- a/tests/kernels/test_kernels.py +++ b/tests/kernels/test_kernels.py @@ -37,7 +37,7 @@ slow, torch_device, ) -from transformers.utils import is_kernels_available +from transformers.utils.import_utils import is_kernels_available if is_kernels_available(): @@ -60,16 +60,16 @@ def setUpClass(cls): ) cls.input = "Hello" - def tearDown(self): - # Delete large objects to drop references early + @classmethod + def tearDownClass(cls): for attr in [ "model_kernelized", "model_not_kernelized", "tokenizer", ]: - if hasattr(self, attr): + if hasattr(cls, attr): try: - delattr(self, attr) + delattr(cls, attr) except Exception: pass @@ -83,6 +83,7 @@ def tearDown(self): except Exception: pass + def tearDown(self): # Free accelerator memory/cache and trigger GC cleanup(torch_device, gc_collect=True) @@ -357,17 +358,21 @@ def my_attention(*args, **kwargs): @require_kernels class TestUseKernelsLifecycle(TestCasePlus): - def setUp(self): - self.model_id = "unsloth/Llama-3.2-1B-Instruct" - self.model = AutoModelForCausalLM.from_pretrained(self.model_id, use_kernels=False, device_map=torch_device) + @classmethod + def setUpClass(cls): + cls.model_id = "unsloth/Llama-3.2-1B-Instruct" + cls.model = AutoModelForCausalLM.from_pretrained(cls.model_id, use_kernels=False, device_map=torch_device) - def tearDown(self): + @classmethod + def tearDownClass(cls): # Delete large objects to drop references early - if hasattr(self, "model"): + if hasattr(cls, "model"): try: - del self.model + del cls.model except Exception: pass + + def tearDown(self): # Free accelerator memory/cache and trigger GC cleanup(torch_device, gc_collect=True)