Project-MONAI · wyli · Aug 28, 2023 · Aug 28, 2023 · Aug 28, 2023 · Aug 28, 2023
@@ -33,6 +33,9 @@ Please note that there are environment variables that can override the flags abo
 
 If you are using an [NGC PyTorch container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch), the container includes a layer `ENV TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1`.
 The default value `torch.backends.cuda.matmul.allow_tf32` will be overridden to `True`.
+To restore the upstream default value, please run `unset TORCH_ALLOW_TF32_CUBLAS_OVERRIDE` in the container,
+and use the Pytorch API `torch.set_float32_matmul_precision`, `torch.backends.cudnn.allow_tf32=False` accordingly.
+
 
 We recommend that users print out these two flags for confirmation when unsure.
 

@@ -205,6 +205,8 @@ def get_gpu_info() -> OrderedDict:
         _dict_append(output, "CUDA version", lambda: torch.version.cuda)
     cudnn_ver = torch.backends.cudnn.version()
     _dict_append(output, "cuDNN enabled", lambda: bool(cudnn_ver))
+    _dict_append(output, "NVIDIA_TF32_OVERRIDE", os.environ.get("NVIDIA_TF32_OVERRIDE"))
+    _dict_append(output, "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE", os.environ.get("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"))
 
     if cudnn_ver:
         _dict_append(output, "cuDNN version", lambda: cudnn_ver)
@@ -215,12 +217,12 @@ def get_gpu_info() -> OrderedDict:
 
     for gpu in range(num_gpus):
         gpu_info = torch.cuda.get_device_properties(gpu)
-        _dict_append(output, f"GPU {gpu} Name", lambda: gpu_info.name)
-        _dict_append(output, f"GPU {gpu} Is integrated", lambda: bool(gpu_info.is_integrated))
-        _dict_append(output, f"GPU {gpu} Is multi GPU board", lambda: bool(gpu_info.is_multi_gpu_board))
-        _dict_append(output, f"GPU {gpu} Multi processor count", lambda: gpu_info.multi_processor_count)
-        _dict_append(output, f"GPU {gpu} Total memory (GB)", lambda: round(gpu_info.total_memory / 1024**3, 1))
-        _dict_append(output, f"GPU {gpu} CUDA capability (maj.min)", lambda: f"{gpu_info.major}.{gpu_info.minor}")
+        _dict_append(output, f"GPU {gpu} Name", gpu_info.name)
+        _dict_append(output, f"GPU {gpu} Is integrated", bool(gpu_info.is_integrated))
+        _dict_append(output, f"GPU {gpu} Is multi GPU board", bool(gpu_info.is_multi_gpu_board))
+        _dict_append(output, f"GPU {gpu} Multi processor count", gpu_info.multi_processor_count)
+        _dict_append(output, f"GPU {gpu} Total memory (GB)", round(gpu_info.total_memory / 1024**3, 1))
+        _dict_append(output, f"GPU {gpu} CUDA capability (maj.min)", f"{gpu_info.major}.{gpu_info.minor}")
 
     return output
 

@@ -52,7 +52,7 @@ def has_ampere_or_later() -> bool:
 @functools.lru_cache(None)
 def detect_default_tf32() -> bool:
     """
-    Dectect if there is anything that may enable TF32 mode by default.
+    Detect if there is anything that may enable TF32 mode by default.
     If any, show a warning message.
     """
     may_enable_tf32 = False
@@ -70,7 +70,7 @@ def detect_default_tf32() -> bool:
             )
             may_enable_tf32 = True
 
-        override_tf32_env_vars = {"NVIDIA_TF32_OVERRIDE": "1", "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE": "1"}
+        override_tf32_env_vars = {"NVIDIA_TF32_OVERRIDE": "1"}  # TORCH_ALLOW_TF32_CUBLAS_OVERRIDE not checked #6907
         for name, override_val in override_tf32_env_vars.items():
             if os.environ.get(name) == override_val:
                 warnings.warn(