From 591695a58f50a124b73c37add0b12958e8bf7876 Mon Sep 17 00:00:00 2001
From: terry-for-github <anzi20000525@163.com>
Date: Sun, 18 Aug 2024 01:10:42 +0800
Subject: [PATCH 1/2] set is_compatible verbose to False

---
 op_builder/async_io.py                  | 2 +-
 op_builder/builder.py                   | 4 ++--
 op_builder/cpu/comm.py                  | 4 ++--
 op_builder/evoformer_attn.py            | 2 +-
 op_builder/fp_quantizer.py              | 2 +-
 op_builder/inference_core_ops.py        | 2 +-
 op_builder/inference_cutlass_builder.py | 2 +-
 op_builder/npu/async_io.py              | 2 +-
 op_builder/ragged_ops.py                | 2 +-
 op_builder/ragged_utils.py              | 2 +-
 op_builder/sparse_attn.py               | 2 +-
 op_builder/spatial_inference.py         | 2 +-
 op_builder/transformer_inference.py     | 2 +-
 op_builder/xpu/async_io.py              | 2 +-
 14 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/op_builder/async_io.py b/op_builder/async_io.py
index b55c821910b9..047e4ad5eaf0 100644
--- a/op_builder/async_io.py
+++ b/op_builder/async_io.py
@@ -79,7 +79,7 @@ def check_for_libaio_pkg(self):
                 break
         return found
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # Check for the existence of libaio by using distutils
         # to compile and link a test program that calls io_submit,
         # which is a function provided by libaio that is used in the async_io op.
diff --git a/op_builder/builder.py b/op_builder/builder.py
index 03611bf56284..17cb6bd70f2f 100644
--- a/op_builder/builder.py
+++ b/op_builder/builder.py
@@ -275,7 +275,7 @@ def cxx_args(self):
         '''
         return []
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         '''
         Check if all non-python dependencies are satisfied to build this op
         '''
@@ -656,7 +656,7 @@ def version_dependent_macros(self):
             version_ge_1_5 = ['-DVERSION_GE_1_5']
         return version_ge_1_1 + version_ge_1_3 + version_ge_1_5
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         return super().is_compatible(verbose)
 
     def builder(self):
diff --git a/op_builder/cpu/comm.py b/op_builder/cpu/comm.py
index 69b9226fd806..fec960b63b2e 100644
--- a/op_builder/cpu/comm.py
+++ b/op_builder/cpu/comm.py
@@ -28,7 +28,7 @@ def include_paths(self):
     def cxx_args(self):
         return ['-O2', '-fopenmp']
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # TODO: add soft compatibility check for private binary release.
         #  a soft check, as in we know it can be trivially changed.
         return super().is_compatible(verbose)
@@ -65,7 +65,7 @@ def include_paths(self):
     def cxx_args(self):
         return ['-O2', '-fopenmp']
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # TODO: add soft compatibility check for private binary release.
         #  a soft check, as in we know it can be trivially changed.
         return super().is_compatible(verbose)
diff --git a/op_builder/evoformer_attn.py b/op_builder/evoformer_attn.py
index 6e7721f94e01..d47880bf5b03 100644
--- a/op_builder/evoformer_attn.py
+++ b/op_builder/evoformer_attn.py
@@ -41,7 +41,7 @@ def nvcc_args(self):
         args.append(f"-DGPU_ARCH={major}{minor}")
         return args
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/fp_quantizer.py b/op_builder/fp_quantizer.py
index c7d2e72b5408..967a9a43e78b 100644
--- a/op_builder/fp_quantizer.py
+++ b/op_builder/fp_quantizer.py
@@ -22,7 +22,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.ops.fp_quantizer.{self.NAME}_op'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/inference_core_ops.py b/op_builder/inference_core_ops.py
index d1957f39d9a8..10edcb311e28 100755
--- a/op_builder/inference_core_ops.py
+++ b/op_builder/inference_core_ops.py
@@ -19,7 +19,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.inference.v2.kernels{self.NAME}'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/inference_cutlass_builder.py b/op_builder/inference_cutlass_builder.py
index 51f7931d9435..094d16d9f67b 100644
--- a/op_builder/inference_cutlass_builder.py
+++ b/op_builder/inference_cutlass_builder.py
@@ -18,7 +18,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.inference.v2.kernels.cutlass_ops.{self.NAME}'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/npu/async_io.py b/op_builder/npu/async_io.py
index 86560353b1c7..d6df6d74133e 100644
--- a/op_builder/npu/async_io.py
+++ b/op_builder/npu/async_io.py
@@ -83,7 +83,7 @@ def check_for_libaio_pkg(self):
                 break
         return found
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # Check for the existence of libaio by using distutils
         # to compile and link a test program that calls io_submit,
         # which is a function provided by libaio that is used in the async_io op.
diff --git a/op_builder/ragged_ops.py b/op_builder/ragged_ops.py
index ec7cab91885f..ee1c18ab8de4 100644
--- a/op_builder/ragged_ops.py
+++ b/op_builder/ragged_ops.py
@@ -19,7 +19,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.inference.v2.kernels.ragged_ops.{self.NAME}'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/ragged_utils.py b/op_builder/ragged_utils.py
index 89450e1fd30d..365544d5652d 100755
--- a/op_builder/ragged_utils.py
+++ b/op_builder/ragged_utils.py
@@ -19,7 +19,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.inference.v2.{self.NAME}'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/sparse_attn.py b/op_builder/sparse_attn.py
index 188d257ff4ef..afa4c7018dc8 100644
--- a/op_builder/sparse_attn.py
+++ b/op_builder/sparse_attn.py
@@ -27,7 +27,7 @@ def sources(self):
     def cxx_args(self):
         return ['-O2', '-fopenmp']
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # Check to see if llvm and cmake are installed since they are dependencies
         #required_commands = ['llvm-config|llvm-config-9', 'cmake']
         #command_status = list(map(self.command_exists, required_commands))
diff --git a/op_builder/spatial_inference.py b/op_builder/spatial_inference.py
index 59caf57f938d..3881247958d4 100644
--- a/op_builder/spatial_inference.py
+++ b/op_builder/spatial_inference.py
@@ -17,7 +17,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.ops.spatial.{self.NAME}_op'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/transformer_inference.py b/op_builder/transformer_inference.py
index 5ee902289448..497f39c6f7d8 100755
--- a/op_builder/transformer_inference.py
+++ b/op_builder/transformer_inference.py
@@ -17,7 +17,7 @@ def __init__(self, name=None):
     def absolute_name(self):
         return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
diff --git a/op_builder/xpu/async_io.py b/op_builder/xpu/async_io.py
index 0fd43f72e60e..9448e98c40be 100644
--- a/op_builder/xpu/async_io.py
+++ b/op_builder/xpu/async_io.py
@@ -79,7 +79,7 @@ def check_for_libaio_pkg(self):
                 break
         return found
 
-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
         # Check for the existence of libaio by using distutils
         # to compile and link a test program that calls io_submit,
         # which is a function provided by libaio that is used in the async_io op.

From c07ea2b21aadede96dbd574ae6ed5db34f165e68 Mon Sep 17 00:00:00 2001
From: terry-for-github <anzi20000525@163.com>
Date: Sun, 18 Aug 2024 01:11:05 +0800
Subject: [PATCH 2/2] add verbose judgement before warning

---
 op_builder/evoformer_attn.py            | 15 ++++++++++-----
 op_builder/fp_quantizer.py              | 19 ++++++++++++-------
 op_builder/inference_core_ops.py        |  9 ++++++---
 op_builder/inference_cutlass_builder.py |  9 ++++++---
 op_builder/ragged_ops.py                |  9 ++++++---
 op_builder/ragged_utils.py              |  9 ++++++---
 op_builder/sparse_attn.py               | 20 +++++++++++++-------
 op_builder/spatial_inference.py         |  6 ++++--
 op_builder/transformer_inference.py     |  9 ++++++---
 9 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/op_builder/evoformer_attn.py b/op_builder/evoformer_attn.py
index d47880bf5b03..af3aa7429775 100644
--- a/op_builder/evoformer_attn.py
+++ b/op_builder/evoformer_attn.py
@@ -45,14 +45,17 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile kernels")
             return False
         if self.cutlass_path is None:
-            self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
+            if verbose:
+                self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
             return False
         with open(f'{self.cutlass_path}/CHANGELOG.md', 'r') as f:
             if '3.1.0' not in f.read():
-                self.warning("Please use CUTLASS version >= 3.1.0")
+                if verbose:
+                    self.warning("Please use CUTLASS version >= 3.1.0")
                 return False
         cuda_okay = True
         if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
@@ -60,10 +63,12 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 7:
-                self.warning("Please use a GPU with compute capability >= 7.0")
+                if verbose:
+                    self.warning("Please use a GPU with compute capability >= 7.0")
                 cuda_okay = False
             if torch_cuda_major < 11 or sys_cuda_major < 11:
-                self.warning("Please use CUDA 11+")
+                if verbose:
+                    self.warning("Please use CUDA 11+")
                 cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/fp_quantizer.py b/op_builder/fp_quantizer.py
index 967a9a43e78b..40cf504c2c83 100644
--- a/op_builder/fp_quantizer.py
+++ b/op_builder/fp_quantizer.py
@@ -26,7 +26,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -35,17 +36,20 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 8:
-                self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
 
         try:
             import triton
         except ImportError:
-            self.warning(f"please install triton==2.3.0 or 2.3.1 if you want to use the FP Quantizer Kernels")
+            if verbose:
+                self.warning(f"please install triton==2.3.0 or 2.3.1 if you want to use the FP Quantizer Kernels")
             return False
 
         # triton 2.3.0 and 2.3.1 are okay and the only versions released in 2.3.x before 3.x was released
@@ -59,9 +63,10 @@ def is_compatible(self, verbose=False):
             triton_mismatch = major != "2" or minor != "3"
 
         if triton_mismatch:
-            self.warning(
-                f"FP Quantizer is using an untested triton version ({installed_triton}), only 2.3.0 and 2.3.1 are known to be compatible with these kernels"
-            )
+            if verbose:
+                self.warning(
+                    f"FP Quantizer is using an untested triton version ({installed_triton}), only 2.3.0 and 2.3.1 are known to be compatible with these kernels"
+                )
             return False
 
         return super().is_compatible(verbose) and cuda_okay
diff --git a/op_builder/inference_core_ops.py b/op_builder/inference_core_ops.py
index 10edcb311e28..f7c0b47f92c6 100755
--- a/op_builder/inference_core_ops.py
+++ b/op_builder/inference_core_ops.py
@@ -23,7 +23,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -32,11 +33,13 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/inference_cutlass_builder.py b/op_builder/inference_cutlass_builder.py
index 094d16d9f67b..aa5294b1cbda 100644
--- a/op_builder/inference_cutlass_builder.py
+++ b/op_builder/inference_cutlass_builder.py
@@ -22,7 +22,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -31,11 +32,13 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/ragged_ops.py b/op_builder/ragged_ops.py
index ee1c18ab8de4..10afb193c738 100644
--- a/op_builder/ragged_ops.py
+++ b/op_builder/ragged_ops.py
@@ -23,7 +23,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -32,11 +33,13 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/ragged_utils.py b/op_builder/ragged_utils.py
index 365544d5652d..654ba07e0879 100755
--- a/op_builder/ragged_utils.py
+++ b/op_builder/ragged_utils.py
@@ -23,7 +23,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -32,11 +33,13 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
             if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/sparse_attn.py b/op_builder/sparse_attn.py
index afa4c7018dc8..9f3399647d51 100644
--- a/op_builder/sparse_attn.py
+++ b/op_builder/sparse_attn.py
@@ -34,29 +34,33 @@ def is_compatible(self, verbose=False):
         #deps_compatible = all(command_status)
 
         if self.is_rocm_pytorch():
-            self.warning(f'{self.NAME} is not compatible with ROCM')
+            if verbose:
+                self.warning(f'{self.NAME} is not compatible with ROCM')
             return False
 
         try:
             import torch
         except ImportError:
-            self.warning(f"unable to import torch, please install it first")
+            if verbose:
+                self.warning(f"unable to import torch, please install it first")
             return False
 
         # torch-cpu will not have a cuda version
         if torch.version.cuda is None:
             cuda_compatible = False
-            self.warning(f"{self.NAME} cuda is not available from torch")
+            if verbose:
+                self.warning(f"{self.NAME} cuda is not available from torch")
         else:
             major, minor = torch.version.cuda.split('.')[:2]
             cuda_compatible = (int(major) == 10 and int(minor) >= 1) or (int(major) >= 11)
             if not cuda_compatible:
-                self.warning(f"{self.NAME} requires CUDA version 10.1+")
+                if verbose:
+                    self.warning(f"{self.NAME} requires CUDA version 10.1+")
 
         TORCH_MAJOR = int(torch.__version__.split('.')[0])
         TORCH_MINOR = int(torch.__version__.split('.')[1])
         torch_compatible = (TORCH_MAJOR == 1 and TORCH_MINOR >= 5)
-        if not torch_compatible:
+        if not torch_compatible and verbose:
             self.warning(
                 f'{self.NAME} requires a torch version >= 1.5 and < 2.0 but detected {TORCH_MAJOR}.{TORCH_MINOR}')
 
@@ -65,7 +69,8 @@ def is_compatible(self, verbose=False):
         except ImportError:
             # auto-install of triton is broken on some systems, reverting to manual install for now
             # see this issue: https://github.com/microsoft/DeepSpeed/issues/1710
-            self.warning(f"please install triton==1.0.0 if you want to use sparse attention")
+            if verbose:
+                self.warning(f"please install triton==1.0.0 if you want to use sparse attention")
             return False
 
         if pkg_version:
@@ -76,7 +81,8 @@ def is_compatible(self, verbose=False):
             triton_mismatch = installed_triton != "1.0.0"
 
         if triton_mismatch:
-            self.warning(f"using untested triton version ({installed_triton}), only 1.0.0 is known to be compatible")
+            if verbose:
+                self.warning(f"using untested triton version ({installed_triton}), only 1.0.0 is known to be compatible")
             return False
 
         return super().is_compatible(verbose) and torch_compatible and cuda_compatible
diff --git a/op_builder/spatial_inference.py b/op_builder/spatial_inference.py
index 3881247958d4..d6c5fa661156 100644
--- a/op_builder/spatial_inference.py
+++ b/op_builder/spatial_inference.py
@@ -21,7 +21,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -31,7 +32,8 @@ def is_compatible(self, verbose=False):
             cuda_capability = torch.cuda.get_device_properties(0).major
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
diff --git a/op_builder/transformer_inference.py b/op_builder/transformer_inference.py
index 497f39c6f7d8..1b056ecef3ed 100755
--- a/op_builder/transformer_inference.py
+++ b/op_builder/transformer_inference.py
@@ -21,7 +21,8 @@ def is_compatible(self, verbose=False):
         try:
             import torch
         except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
             return False
 
         cuda_okay = True
@@ -30,11 +31,13 @@ def is_compatible(self, verbose=False):
             torch_cuda_major = int(torch.version.cuda.split('.')[0])
             cuda_capability = torch.cuda.get_device_properties(0).major
             if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                 cuda_okay = False
             if cuda_capability >= 8:
                 if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                     cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay