diff --git a/tests/quantization/torchao/test_torchao.py b/tests/quantization/torchao/test_torchao.py index 7a8e3cc67877..a722eaece4d1 100644 --- a/tests/quantization/torchao/test_torchao.py +++ b/tests/quantization/torchao/test_torchao.py @@ -74,7 +74,7 @@ @require_torch @require_torch_accelerator -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") class TorchAoConfigTest(unittest.TestCase): def test_to_dict(self): """ @@ -132,7 +132,7 @@ def test_repr(self): # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners @require_torch @require_torch_accelerator -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") class TorchAoTest(unittest.TestCase): def tearDown(self): gc.collect() @@ -587,7 +587,7 @@ def test_aobase_config(self): # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners @require_torch @require_torch_accelerator -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") class TorchAoSerializationTest(unittest.TestCase): model_name = "hf-internal-testing/tiny-flux-pipe" @@ -698,23 +698,22 @@ def test_aobase_config(self): self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device) -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") class TorchAoCompileTest(QuantCompileTests, unittest.TestCase): @property def quantization_config(self): return PipelineQuantizationConfig( - quant_mapping={ - "transformer": TorchAoConfig(quant_type="int8_weight_only"), - }, + quant_mapping={"transformer": TorchAoConfig(Int8WeightOnlyConfig())}, ) - @unittest.skip( - "Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work " - "when compiling." - ) def test_torch_compile_with_cpu_offload(self): + pipe = self._init_pipeline(self.quantization_config, torch.bfloat16) + pipe.enable_model_cpu_offload() + # No compilation because it fails with: # RuntimeError: _apply(): Couldn't swap Linear.weight - super().test_torch_compile_with_cpu_offload() + + # small resolutions to ensure speedy execution. + pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256) @parameterized.expand([False, True]) @unittest.skip( @@ -745,7 +744,7 @@ def test_torch_compile_with_group_offload_leaf(self, use_stream): # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners @require_torch @require_torch_accelerator -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") @slow @nightly class SlowTorchAoTests(unittest.TestCase): @@ -907,7 +906,7 @@ def test_memory_footprint_int8wo(self): @require_torch @require_torch_accelerator -@require_torchao_version_greater_or_equal("0.7.0") +@require_torchao_version_greater_or_equal("0.14.0") @slow @nightly class SlowTorchAoPreserializedModelTests(unittest.TestCase):