diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 35c0386b9..583237bb6 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -29,9 +29,11 @@ lib.cadam32bit_grad_fp32 # runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False lib.get_context.restype = ct.c_void_p + HIP_ENVIRONMENT = False if torch.version.cuda: lib.get_cusparse.restype = ct.c_void_p elif torch.version.hip: + HIP_ENVIRONMENT = True lib.get_hipsparse.restype = ct.c_void_p lib.cget_managed_ptr.restype = ct.c_void_p diff --git a/tests/test_autograd.py b/tests/test_autograd.py index 803fde145..787005823 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -4,6 +4,7 @@ import torch import bitsandbytes as bnb +from bitsandbytes.cextension import HIP_ENVIRONMENT n = 1 k = 25 @@ -288,7 +289,7 @@ def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): ) names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_decomp_{}_has_fp16_weights_{}_has_bias_{}".format(*vals) for vals in str_values] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, decomp, has_fp16_weights, has_bias", values, @@ -552,6 +553,7 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose)) names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values] @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names) def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py index 4973da50d..0918c885e 100644 --- a/tests/test_cuda_setup_evaluator.py +++ b/tests/test_cuda_setup_evaluator.py @@ -9,8 +9,9 @@ evaluate_cuda_setup, extract_candidate_paths, ) +from bitsandbytes.cextension import HIP_ENVIRONMENT - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_cuda_full_system(): ## this only tests the cuda version and not compute capability diff --git a/tests/test_functional.py b/tests/test_functional.py index cc58324e4..44a4e662a 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -10,6 +10,7 @@ import bitsandbytes as bnb from bitsandbytes import functional as F +from bitsandbytes.cextension import HIP_ENVIRONMENT from scipy.stats import norm torch.set_printoptions( @@ -90,7 +91,7 @@ def setup(): def teardown(): pass - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize( "dtype", [torch.float32, torch.float16], ids=["float", "half"] ) @@ -110,7 +111,7 @@ def test_estimate_quantiles(dtype): diff = torch.abs(code - quantiles) assert (diff > 5e-02).sum().item() == 0 - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_quantile_quantization(): for i in range(100): A1 = torch.randn(1024, 1024, device="cuda") @@ -153,7 +154,7 @@ def test_dynamic_quantization(): assert diff < 0.004 - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("nested", [False, True], ids=["False", "True"]) @pytest.mark.parametrize("blocksize", [4096, 2048, 1024, 512, 256, 128, 64]) def test_dynamic_blockwise_quantization(nested, blocksize): @@ -601,6 +602,7 @@ def test_vector_quant(dim1, dim2, dim3): names = ["dim1_{}_dim2_{}_dim3_{}_dims_{}_dtype_{}_orderA_{}_orderOut_{}_transpose_{}".format(*vals)for vals in values] +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose",values,ids=names) def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose): if dims == 3 and out_order != "col32": @@ -684,7 +686,7 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans for vals in values ] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, dim3, dim4, dims, ldb", values, ids=names) def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb): for i in range(k): @@ -732,7 +734,7 @@ def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb): for vals in values ] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, dim3, dim4, dims", values, ids=names) def test_igemmlt_half(dim1, dim2, dim3, dim4, dims): formatB = F.get_special_format_str() @@ -956,7 +958,7 @@ def test_bench_8bit_training(batch, seq, model, hidden): values = list(product(dim1, dim4, dims, formatB, has_bias)) names = ["dim1_{}_dim4_{}_dims_{}_formatB_{}_has_bias_{}".format(*vals) for vals in values] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim4, dims, formatB, has_bias", values, ids=names) def test_dequant_mm(dim1, dim4, dims, formatB, has_bias): inner = torch.randint(1, 128, size=(1,)).item() @@ -1109,7 +1111,7 @@ def test_double_quant(dim1, dim2): values = list(zip(dim1, dim4, inner)) names = ["dim1_{}_dim4_{}_inner_{}".format(*vals) for vals in values] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim4, inner", values, ids=names) def test_integrated_igemmlt(dim1, dim4, inner): for i in range(k): @@ -1298,7 +1300,7 @@ def test_row_scale_bench(dim1, dim4, inner): for vals in values ] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize( "dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose", values, @@ -1347,7 +1349,7 @@ def test_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose): for vals in values ] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_overflow(): formatB = F.get_special_format_str() print(formatB) @@ -1408,7 +1410,7 @@ def test_coo_double_quant(dim1, dim2): values = list(product(dim1, dim2, transposed_B)) names = ["dim1_{}_dim2_{}_transposed_B_{}".format(*vals) for vals in values] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, transposed_B", values, ids=names) def test_spmm_coo(dim1, dim2, transposed_B): threshold = 1.5 @@ -1440,6 +1442,7 @@ def test_spmm_coo(dim1, dim2, transposed_B): assert_all_approx_close(out1, out2, rtol=0.01, atol=3.0e-2, count=30) +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_spmm_bench(): batch = 2 model = 1024 * 1 @@ -1489,7 +1492,7 @@ def test_spmm_bench(): values = list(product(dim1, dim2)) names = ["dim1_{}_dim2_{}".format(*vals) for vals in values] - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2", values, ids=names) def test_integrated_sparse_decomp(dim1, dim2): threshold = 3.0 @@ -1672,6 +1675,7 @@ def test_coo2csc(): names = ["dim1_{}_dim2_{}_dtype_{}".format(*vals) for vals in values] +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, dtype", values, ids=names) def test_spmm_coo_dequant(dim1, dim2, dtype): threshold = 6.0 @@ -2178,6 +2182,7 @@ def test_few_bit_quant(): #assert False +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_kbit_quantile_estimation(): for i in range(100): data = torch.randn(1024, 1024, device='cuda') @@ -2220,7 +2225,7 @@ def test_bench_dequantization(): #print((time.time()-t0)/1e6) - +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_fp4_quant(): vals = list(product([0, 1], repeat=4)) @@ -2258,6 +2263,7 @@ def test_fp4_quant(): @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) def test_4bit_compressed_stats(quant_type): for blocksize in [128, 64]: diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 37f7af9cb..62f863554 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -10,7 +10,7 @@ from bitsandbytes import functional as F from bitsandbytes.autograd import get_inverse_transform_indices, undo_layout from bitsandbytes.nn.modules import Linear8bitLt - +from bitsandbytes.cextension import HIP_ENVIRONMENT # contributed by Alex Borzunov, see: # https://github.com/bigscience-workshop/petals/blob/main/tests/test_linear8bitlt.py @@ -69,6 +69,7 @@ def test_linear_no_igemmlt(): @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt", list(product([False, True], [False, True], [False, True], [False, True]))) def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt): diff --git a/tests/test_modules.py b/tests/test_modules.py index d0a905197..f7d8f5e3f 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -5,7 +5,7 @@ from torch import nn import bitsandbytes as bnb - +from bitsandbytes.cextension import HIP_ENVIRONMENT class MockArgs: def __init__(self, initial_data): @@ -315,6 +315,7 @@ def forward(self, x): names = [f"threshold_{vals}" for vals in values] +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("threshold", values, ids=names) def test_linear8bitlt_inference(threshold): l1 = bnb.nn.Linear8bitLt(32, 64, threshold=threshold).cuda().half() @@ -329,6 +330,7 @@ def test_linear8bitlt_inference(threshold): assert l1.state.CxB is not None +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") def test_linear8bitlt_accumulated_gradient(): l1 = torch.nn.Sequential(*[bnb.nn.Linear8bitLt(32, 32).cuda().half() for i in range(2)]) l2 = torch.nn.Sequential(*[torch.nn.Linear(32, 32).cuda().half() for i in range(2)]) @@ -518,6 +520,7 @@ def test_linear_kbit_fp32_bias(module): modules.append(lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True)) names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C'] @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("module", modules, ids=names) def test_kbit_backprop(module): b = 17 diff --git a/tests/test_optim.py b/tests/test_optim.py index 9e90083a9..a785c1ccb 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -13,6 +13,7 @@ import bitsandbytes as bnb import bitsandbytes.functional as F +from bitsandbytes.cextension import HIP_ENVIRONMENT # import apex @@ -109,6 +110,7 @@ def rm_path(path): optimizer_names = ["adam", "momentum", "rmsprop", 'paged_adamw', 'paged_adam', 'lion', 'paged_lion'] values = list(product(dim1, dim2, gtype, optimizer_names)) names = ["dim1_{}_dim2_{}_gtype_{}_optim_{}".format(*vals) for vals in values] +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, gtype, optim_name", values, ids=names) def test_optimizer32bit(dim1, dim2, gtype, optim_name): if gtype == torch.bfloat16 and optim_name in ['momentum', 'rmsprop']: pytest.skip() @@ -251,6 +253,7 @@ def test_global_config(dim1, dim2, gtype): ] +@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet") @pytest.mark.parametrize("dim1, dim2, gtype, optim_name", values, ids=names) def test_optimizer8bit(dim1, dim2, gtype, optim_name): if gtype == torch.bfloat16 and optim_name not in ['adam8bit_blockwise', 'lion8bit_blockwise']: pytest.skip()