deepmodeling · mohanchen · Dec 15, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -174,7 +174,9 @@ set(ABACUS_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${ABACUS_BIN_NAME})
 include_directories(${ABACUS_SOURCE_DIR})
 include_directories(${ABACUS_SOURCE_DIR}/source_base/module_container)
 
-set(CMAKE_CXX_STANDARD 11)
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 11)
+endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 add_executable(${ABACUS_BIN_NAME} source/source_main/main.cpp)
@@ -330,6 +332,10 @@ endif()
 if(USE_CUDA)
   cmake_minimum_required(VERSION 3.18) # required by `CUDA_ARCHITECTURES` below
   set_if_higher(CMAKE_CXX_STANDARD 14)
+  if(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
+    message(STATUS "CUDA ${CUDA_VERSION} detected. Setting CMAKE_CUDA_STANDARD to 17.")
+    set_if_higher(CMAKE_CXX_STANDARD 17)
+  endif()
   set(CMAKE_CXX_EXTENSIONS ON)
   set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
   set(CMAKE_CUDA_STANDARD_REQUIRED ON)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -81,6 +81,7 @@ if(USE_CUDA)
     source_pw/module_pwdft/kernels/cuda/stress_op.cu
     source_pw/module_pwdft/kernels/cuda/wf_op.cu
     source_pw/module_pwdft/kernels/cuda/vnl_op.cu
+    source_base/module_device/cuda_compat.cpp
     source_base/kernels/cuda/math_ylm_op.cu
     source_base/kernels/cuda/math_kernel_op.cu
     source_base/kernels/cuda/math_kernel_op_vec.cu

diff --git a/source/source_base/module_device/cuda_compat.cpp b/source/source_base/module_device/cuda_compat.cpp
@@ -0,0 +1,116 @@
+#include "cuda_compat.h"
+
+namespace ModuleBase {
+namespace cuda_compat {
+
+//---------------------------------------------------------------------------
+// Implementation of printDeprecatedDeviceInfo and printComputeModeInfo
+//---------------------------------------------------------------------------
+void printDeprecatedDeviceInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) 
+{
+#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
+  char msg[1024];
+  sprintf(msg,
+          "  GPU Max Clock rate:                            %.0f MHz (%0.2f "
+          "GHz)\n",
+          deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
+  ofs_device << msg << std::endl;
+  // This is supported in CUDA 5.0 (runtime API device properties)
+  sprintf(msg, "  Memory Clock rate:                             %.0f Mhz\n",
+          deviceProp.memoryClockRate * 1e-3f);
+  ofs_device << msg << std::endl;
+
+  sprintf(msg, "  Memory Bus Width:                              %d-bit\n",
+          deviceProp.memoryBusWidth);
+  ofs_device << msg << std::endl;
+
+  sprintf(msg,
+          "  Concurrent copy and kernel execution:          %s with %d copy "
+          "engine(s)\n",
+          (deviceProp.deviceOverlap ? "Yes" : "No"),
+          deviceProp.asyncEngineCount);
+  ofs_device << msg << std::endl;
+  sprintf(msg, "  Run time limit on kernels:                     %s\n",
+          deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
+  ofs_device << msg << std::endl;
+#endif
+}
+
+void printComputeModeInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) 
+{
+#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
+  char msg[1024];
+  sprintf(msg, "  Supports MultiDevice Co-op Kernel Launch:      %s\n",
+          deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
+  ofs_device << msg << std::endl;
+
+  const char *sComputeMode[] = {
+      "Default (multiple host threads can use ::cudaSetDevice() with device "
+      "simultaneously)",
+      "Exclusive (only one host thread in one process is able to use "
+      "::cudaSetDevice() with this device)",
+      "Prohibited (no host thread can use ::cudaSetDevice() with this "
+      "device)",
+      "Exclusive Process (many threads in one process is able to use "
+      "::cudaSetDevice() with this device)",
+      "Unknown",
+      NULL};
+  sprintf(msg, "  Compute Mode:\n");
+  ofs_device << msg << std::endl;
+  ofs_device << "  " << sComputeMode[deviceProp.computeMode] << std::endl
+             << std::endl;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------------
+// Implementation of cufftGetErrorStringCompat
+//-------------------------------------------------------------------------------------------------
+const char* cufftGetErrorStringCompat(cufftResult_t error) 
+{
+    switch (error)
+    {
+    case CUFFT_SUCCESS:
+        return "CUFFT_SUCCESS";
+    case CUFFT_INVALID_PLAN:
+        return "CUFFT_INVALID_PLAN";
+    case CUFFT_ALLOC_FAILED:
+        return "CUFFT_ALLOC_FAILED";
+    case CUFFT_INVALID_TYPE:
+        return "CUFFT_INVALID_TYPE";
+    case CUFFT_INVALID_VALUE:
+        return "CUFFT_INVALID_VALUE";
+    case CUFFT_INTERNAL_ERROR:
+        return "CUFFT_INTERNAL_ERROR";
+    case CUFFT_EXEC_FAILED:
+        return "CUFFT_EXEC_FAILED";
+    case CUFFT_SETUP_FAILED:
+        return "CUFFT_SETUP_FAILED";
+    case CUFFT_INVALID_SIZE:
+        return "CUFFT_INVALID_SIZE";
+    case CUFFT_UNALIGNED_DATA:
+        return "CUFFT_UNALIGNED_DATA";
+    case CUFFT_INVALID_DEVICE:
+        return "CUFFT_INVALID_DEVICE";
+    case CUFFT_NO_WORKSPACE:
+        return "CUFFT_NO_WORKSPACE";
+    case CUFFT_NOT_IMPLEMENTED:
+        return "CUFFT_NOT_IMPLEMENTED";
+    case CUFFT_NOT_SUPPORTED:
+        return "CUFFT_NOT_SUPPORTED";
+
+#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
+    case CUFFT_INCOMPLETE_PARAMETER_LIST:
+        return "CUFFT_INCOMPLETE_PARAMETER_LIST";
+    case CUFFT_PARSE_ERROR:
+        return "CUFFT_PARSE_ERROR";
+    case CUFFT_LICENSE_ERROR:
+        return "CUFFT_LICENSE_ERROR";
+#endif
+
+    default:
+        return "<unknown>";
+    }
+}
+
+} // namespace cuda_compat
+} // namespace ModuleBase
diff --git a/source/source_base/module_device/cuda_compat.h b/source/source_base/module_device/cuda_compat.h
@@ -12,7 +12,12 @@
 #ifndef CUDA_COMPAT_H_
 #define CUDA_COMPAT_H_
 
+#include <iostream> // For std::ostream
+#include <stdexcept> // For std::invalid_argument
 #include <cuda.h> // defines CUDA_VERSION
+#include <cuda_runtime.h>
+#include <cufft.h>
+
 
 // NVTX header for CUDA versions prior to 12.9 vs. 12.9+
 // This block ensures the correct NVTX header path is used based on CUDA_VERSION.
@@ -31,4 +36,46 @@
 #endif
 #endif
 
+//-------------------------------------------------------------------------------------------------
+// Compatibility Layer Declarations
+//-------------------------------------------------------------------------------------------------
+namespace ModuleBase {
+namespace cuda_compat {
+
+/**
+ * @brief Prints device information that was deprecated or removed in CUDA 13.0.
+ *
+ * This function handles properties like clockRate, memoryClockRate, memoryBusWidth,
+ * and concurrency flags, which are not available in newer CUDA toolkits.
+ *
+ * @param os The output stream (e.g., std::cout, std::ofstream).
+ * @param prop The cudaDeviceProp structure containing device properties.
+ */
+void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop);
+
+/**
+ * @brief Prints the device's compute mode using a legacy string mapping.
+ *
+ * The compute mode display logic is encapsulated here as it relies on aspects
+ * of the driver model that have changed.
+ *
+ * @param os The output stream (e.g., std::cout, std::ofstream).
+ * @param prop The cudaDeviceProp structure containing device properties.
+ */
+void printComputeModeInfo(std::ostream& os, const cudaDeviceProp& prop);
+
+/**
+ * @brief Provides a cross-CUDA-version string conversion for cuFFT error codes.
+ *
+ * In CUDA 13.0, several error codes were removed. This function handles
+ * these differences gracefully.
+ *
+ * @param error The cufftResult_t error code.
+ * @return const char* A descriptive string for the error.
+ */
+const char* cufftGetErrorStringCompat(cufftResult_t error);
+
+} // namespace cuda_compat
+} // namespace ModuleBase
+
 #endif // CUDA_COMPAT_H_
diff --git a/source/source_base/module_device/output_device.cpp b/source/source_base/module_device/output_device.cpp
@@ -14,6 +14,7 @@
 
 #if defined(__CUDA)
 #include <cuda_runtime.h>
+#include "source_base/module_device/cuda_compat.h"
 #endif
 
 #if defined(__ROCM)
@@ -218,27 +219,13 @@ void print_device_info<base_device::DEVICE_GPU>(
   sprintf(msg, "  CUDA Capability Major/Minor version number:    %d.%d\n",
           deviceProp.major, deviceProp.minor);
   ofs_device << msg << std::endl;
-  sprintf(msg,
-          "  GPU Max Clock rate:                            %.0f MHz (%0.2f "
-          "GHz)\n",
-          deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
-  ofs_device << msg << std::endl;
-  // This is supported in CUDA 5.0 (runtime API device properties)
-  sprintf(msg, "  Memory Clock rate:                             %.0f Mhz\n",
-          deviceProp.memoryClockRate * 1e-3f);
-  ofs_device << msg << std::endl;
-
-  sprintf(msg, "  Memory Bus Width:                              %d-bit\n",
-          deviceProp.memoryBusWidth);
-  ofs_device << msg << std::endl;
   sprintf(msg,
           "  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, "
           "%d), 3D=(%d, %d, %d)\n",
           deviceProp.maxTexture1D, deviceProp.maxTexture2D[0],
           deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0],
           deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
   ofs_device << msg << std::endl;
-
   sprintf(
       msg,
       "  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",
@@ -285,15 +272,6 @@ void print_device_info<base_device::DEVICE_GPU>(
   sprintf(msg, "  Texture alignment:                             %zu bytes\n",
           deviceProp.textureAlignment);
   ofs_device << msg << std::endl;
-  sprintf(msg,
-          "  Concurrent copy and kernel execution:          %s with %d copy "
-          "engine(s)\n",
-          (deviceProp.deviceOverlap ? "Yes" : "No"),
-          deviceProp.asyncEngineCount);
-  ofs_device << msg << std::endl;
-  sprintf(msg, "  Run time limit on kernels:                     %s\n",
-          deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
-  ofs_device << msg << std::endl;
   sprintf(msg, "  Integrated GPU sharing Host Memory:            %s\n",
           deviceProp.integrated ? "Yes" : "No");
   ofs_device << msg << std::endl;
@@ -318,28 +296,14 @@ void print_device_info<base_device::DEVICE_GPU>(
   sprintf(msg, "  Supports Cooperative Kernel Launch:            %s\n",
           deviceProp.cooperativeLaunch ? "Yes" : "No");
   ofs_device << msg << std::endl;
-  sprintf(msg, "  Supports MultiDevice Co-op Kernel Launch:      %s\n",
-          deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
-  ofs_device << msg << std::endl;
   sprintf(msg,
           "  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n",
           deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
   ofs_device << msg << std::endl;
-  const char *sComputeMode[] = {
-      "Default (multiple host threads can use ::cudaSetDevice() with device "
-      "simultaneously)",
-      "Exclusive (only one host thread in one process is able to use "
-      "::cudaSetDevice() with this device)",
-      "Prohibited (no host thread can use ::cudaSetDevice() with this "
-      "device)",
-      "Exclusive Process (many threads in one process is able to use "
-      "::cudaSetDevice() with this device)",
-      "Unknown",
-      NULL};
-  sprintf(msg, "  Compute Mode:\n");
-  ofs_device << msg << std::endl;
-  ofs_device << "  " << sComputeMode[deviceProp.computeMode] << std::endl
-             << std::endl;
+
+  ModuleBase::cuda_compat::printDeprecatedDeviceInfo(ofs_device, deviceProp);
+
+  ModuleBase::cuda_compat::printComputeModeInfo(ofs_device, deviceProp);
 
   // If there are 2 or more GPUs, query to determine whether RDMA is supported
   if (deviceCount >= 2) {
@@ -629,4 +593,4 @@ void record_device_memory<base_device::DEVICE_GPU>(
 
 #endif
 }
-}
+}
diff --git a/source/source_hsolver/kernels/cuda/helper_cuda.h b/source/source_hsolver/kernels/cuda/helper_cuda.h
@@ -40,6 +40,8 @@
 
 #include "helper_string.h"
 
+#include "source_base/module_device/cuda_compat.h"
+
 #ifndef EXIT_WAIVED
 #define EXIT_WAIVED 2
 #endif
@@ -107,60 +109,7 @@ static const char *_cudaGetErrorEnum(cublasStatus_t error) {
 #ifdef _CUFFT_H_
 // cuFFT API errors
 static const char *_cudaGetErrorEnum(cufftResult error) {
-  switch (error) {
-    case CUFFT_SUCCESS:
-      return "CUFFT_SUCCESS";
-
-    case CUFFT_INVALID_PLAN:
-      return "CUFFT_INVALID_PLAN";
-
-    case CUFFT_ALLOC_FAILED:
-      return "CUFFT_ALLOC_FAILED";
-
-    case CUFFT_INVALID_TYPE:
-      return "CUFFT_INVALID_TYPE";
-
-    case CUFFT_INVALID_VALUE:
-      return "CUFFT_INVALID_VALUE";
-
-    case CUFFT_INTERNAL_ERROR:
-      return "CUFFT_INTERNAL_ERROR";
-
-    case CUFFT_EXEC_FAILED:
-      return "CUFFT_EXEC_FAILED";
-
-    case CUFFT_SETUP_FAILED:
-      return "CUFFT_SETUP_FAILED";
-
-    case CUFFT_INVALID_SIZE:
-      return "CUFFT_INVALID_SIZE";
-
-    case CUFFT_UNALIGNED_DATA:
-      return "CUFFT_UNALIGNED_DATA";
-
-    case CUFFT_INCOMPLETE_PARAMETER_LIST:
-      return "CUFFT_INCOMPLETE_PARAMETER_LIST";
-
-    case CUFFT_INVALID_DEVICE:
-      return "CUFFT_INVALID_DEVICE";
-
-    case CUFFT_PARSE_ERROR:
-      return "CUFFT_PARSE_ERROR";
-
-    case CUFFT_NO_WORKSPACE:
-      return "CUFFT_NO_WORKSPACE";
-
-    case CUFFT_NOT_IMPLEMENTED:
-      return "CUFFT_NOT_IMPLEMENTED";
-
-    case CUFFT_LICENSE_ERROR:
-      return "CUFFT_LICENSE_ERROR";
-
-    case CUFFT_NOT_SUPPORTED:
-      return "CUFFT_NOT_SUPPORTED";
-  }
-
-  return "<unknown>";
+  return ModuleBase::cuda_compat::cufftGetErrorStringCompat(error);
 }
 #endif
 
@@ -965,4 +914,4 @@ inline bool checkCudaCapabilities(int major_version, int minor_version) {
 
   // end of CUDA Helper Functions
 
-#endif  // COMMON_HELPER_CUDA_H_
+#endif  // COMMON_HELPER_CUDA_H_