diff --git a/CMakeLists.txt b/CMakeLists.txt index 9438a1919a..37302f998f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,7 +174,9 @@ set(ABACUS_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${ABACUS_BIN_NAME}) include_directories(${ABACUS_SOURCE_DIR}) include_directories(${ABACUS_SOURCE_DIR}/source_base/module_container) -set(CMAKE_CXX_STANDARD 11) +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 11) +endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) add_executable(${ABACUS_BIN_NAME} source/source_main/main.cpp) @@ -330,6 +332,10 @@ endif() if(USE_CUDA) cmake_minimum_required(VERSION 3.18) # required by `CUDA_ARCHITECTURES` below set_if_higher(CMAKE_CXX_STANDARD 14) + if(CUDA_VERSION VERSION_GREATER_EQUAL "13.0") + message(STATUS "CUDA ${CUDA_VERSION} detected. Setting CMAKE_CUDA_STANDARD to 17.") + set_if_higher(CMAKE_CXX_STANDARD 17) + endif() set(CMAKE_CXX_EXTENSIONS ON) set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED ON) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index a4eaf0b197..4de8e373f5 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -81,6 +81,7 @@ if(USE_CUDA) source_pw/module_pwdft/kernels/cuda/stress_op.cu source_pw/module_pwdft/kernels/cuda/wf_op.cu source_pw/module_pwdft/kernels/cuda/vnl_op.cu + source_base/module_device/cuda_compat.cpp source_base/kernels/cuda/math_ylm_op.cu source_base/kernels/cuda/math_kernel_op.cu source_base/kernels/cuda/math_kernel_op_vec.cu diff --git a/source/source_base/module_device/cuda_compat.cpp b/source/source_base/module_device/cuda_compat.cpp new file mode 100644 index 0000000000..4f84c38cfe --- /dev/null +++ b/source/source_base/module_device/cuda_compat.cpp @@ -0,0 +1,116 @@ +#include "cuda_compat.h" + +namespace ModuleBase { +namespace cuda_compat { + +//--------------------------------------------------------------------------- +// Implementation of printDeprecatedDeviceInfo and printComputeModeInfo +//--------------------------------------------------------------------------- +void printDeprecatedDeviceInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) +{ +#if defined(CUDA_VERSION) && CUDA_VERSION < 13000 + char msg[1024]; + sprintf(msg, + " GPU Max Clock rate: %.0f MHz (%0.2f " + "GHz)\n", + deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); + ofs_device << msg << std::endl; + // This is supported in CUDA 5.0 (runtime API device properties) + sprintf(msg, " Memory Clock rate: %.0f Mhz\n", + deviceProp.memoryClockRate * 1e-3f); + ofs_device << msg << std::endl; + + sprintf(msg, " Memory Bus Width: %d-bit\n", + deviceProp.memoryBusWidth); + ofs_device << msg << std::endl; + + sprintf(msg, + " Concurrent copy and kernel execution: %s with %d copy " + "engine(s)\n", + (deviceProp.deviceOverlap ? "Yes" : "No"), + deviceProp.asyncEngineCount); + ofs_device << msg << std::endl; + sprintf(msg, " Run time limit on kernels: %s\n", + deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + ofs_device << msg << std::endl; +#endif +} + +void printComputeModeInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp) +{ +#if defined(CUDA_VERSION) && CUDA_VERSION < 13000 + char msg[1024]; + sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", + deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); + ofs_device << msg << std::endl; + + const char *sComputeMode[] = { + "Default (multiple host threads can use ::cudaSetDevice() with device " + "simultaneously)", + "Exclusive (only one host thread in one process is able to use " + "::cudaSetDevice() with this device)", + "Prohibited (no host thread can use ::cudaSetDevice() with this " + "device)", + "Exclusive Process (many threads in one process is able to use " + "::cudaSetDevice() with this device)", + "Unknown", + NULL}; + sprintf(msg, " Compute Mode:\n"); + ofs_device << msg << std::endl; + ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl + << std::endl; +#endif +} + +//------------------------------------------------------------------------------------------------- +// Implementation of cufftGetErrorStringCompat +//------------------------------------------------------------------------------------------------- +const char* cufftGetErrorStringCompat(cufftResult_t error) +{ + switch (error) + { + case CUFFT_SUCCESS: + return "CUFFT_SUCCESS"; + case CUFFT_INVALID_PLAN: + return "CUFFT_INVALID_PLAN"; + case CUFFT_ALLOC_FAILED: + return "CUFFT_ALLOC_FAILED"; + case CUFFT_INVALID_TYPE: + return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: + return "CUFFT_INVALID_VALUE"; + case CUFFT_INTERNAL_ERROR: + return "CUFFT_INTERNAL_ERROR"; + case CUFFT_EXEC_FAILED: + return "CUFFT_EXEC_FAILED"; + case CUFFT_SETUP_FAILED: + return "CUFFT_SETUP_FAILED"; + case CUFFT_INVALID_SIZE: + return "CUFFT_INVALID_SIZE"; + case CUFFT_UNALIGNED_DATA: + return "CUFFT_UNALIGNED_DATA"; + case CUFFT_INVALID_DEVICE: + return "CUFFT_INVALID_DEVICE"; + case CUFFT_NO_WORKSPACE: + return "CUFFT_NO_WORKSPACE"; + case CUFFT_NOT_IMPLEMENTED: + return "CUFFT_NOT_IMPLEMENTED"; + case CUFFT_NOT_SUPPORTED: + return "CUFFT_NOT_SUPPORTED"; + +#if defined(CUDA_VERSION) && CUDA_VERSION < 13000 + case CUFFT_INCOMPLETE_PARAMETER_LIST: + return "CUFFT_INCOMPLETE_PARAMETER_LIST"; + case CUFFT_PARSE_ERROR: + return "CUFFT_PARSE_ERROR"; + case CUFFT_LICENSE_ERROR: + return "CUFFT_LICENSE_ERROR"; +#endif + + default: + return ""; + } +} + +} // namespace cuda_compat +} // namespace ModuleBase diff --git a/source/source_base/module_device/cuda_compat.h b/source/source_base/module_device/cuda_compat.h index 78c0f6420c..699ce23cea 100644 --- a/source/source_base/module_device/cuda_compat.h +++ b/source/source_base/module_device/cuda_compat.h @@ -12,7 +12,12 @@ #ifndef CUDA_COMPAT_H_ #define CUDA_COMPAT_H_ +#include // For std::ostream +#include // For std::invalid_argument #include // defines CUDA_VERSION +#include +#include + // NVTX header for CUDA versions prior to 12.9 vs. 12.9+ // This block ensures the correct NVTX header path is used based on CUDA_VERSION. @@ -31,4 +36,46 @@ #endif #endif +//------------------------------------------------------------------------------------------------- +// Compatibility Layer Declarations +//------------------------------------------------------------------------------------------------- +namespace ModuleBase { +namespace cuda_compat { + +/** + * @brief Prints device information that was deprecated or removed in CUDA 13.0. + * + * This function handles properties like clockRate, memoryClockRate, memoryBusWidth, + * and concurrency flags, which are not available in newer CUDA toolkits. + * + * @param os The output stream (e.g., std::cout, std::ofstream). + * @param prop The cudaDeviceProp structure containing device properties. + */ +void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop); + +/** + * @brief Prints the device's compute mode using a legacy string mapping. + * + * The compute mode display logic is encapsulated here as it relies on aspects + * of the driver model that have changed. + * + * @param os The output stream (e.g., std::cout, std::ofstream). + * @param prop The cudaDeviceProp structure containing device properties. + */ +void printComputeModeInfo(std::ostream& os, const cudaDeviceProp& prop); + +/** + * @brief Provides a cross-CUDA-version string conversion for cuFFT error codes. + * + * In CUDA 13.0, several error codes were removed. This function handles + * these differences gracefully. + * + * @param error The cufftResult_t error code. + * @return const char* A descriptive string for the error. + */ +const char* cufftGetErrorStringCompat(cufftResult_t error); + +} // namespace cuda_compat +} // namespace ModuleBase + #endif // CUDA_COMPAT_H_ diff --git a/source/source_base/module_device/output_device.cpp b/source/source_base/module_device/output_device.cpp index 41b4c6d082..2d47919e23 100644 --- a/source/source_base/module_device/output_device.cpp +++ b/source/source_base/module_device/output_device.cpp @@ -14,6 +14,7 @@ #if defined(__CUDA) #include +#include "source_base/module_device/cuda_compat.h" #endif #if defined(__ROCM) @@ -218,19 +219,6 @@ void print_device_info( sprintf(msg, " CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor); ofs_device << msg << std::endl; - sprintf(msg, - " GPU Max Clock rate: %.0f MHz (%0.2f " - "GHz)\n", - deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); - ofs_device << msg << std::endl; - // This is supported in CUDA 5.0 (runtime API device properties) - sprintf(msg, " Memory Clock rate: %.0f Mhz\n", - deviceProp.memoryClockRate * 1e-3f); - ofs_device << msg << std::endl; - - sprintf(msg, " Memory Bus Width: %d-bit\n", - deviceProp.memoryBusWidth); - ofs_device << msg << std::endl; sprintf(msg, " Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, " "%d), 3D=(%d, %d, %d)\n", @@ -238,7 +226,6 @@ void print_device_info( deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); ofs_device << msg << std::endl; - sprintf( msg, " Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", @@ -285,15 +272,6 @@ void print_device_info( sprintf(msg, " Texture alignment: %zu bytes\n", deviceProp.textureAlignment); ofs_device << msg << std::endl; - sprintf(msg, - " Concurrent copy and kernel execution: %s with %d copy " - "engine(s)\n", - (deviceProp.deviceOverlap ? "Yes" : "No"), - deviceProp.asyncEngineCount); - ofs_device << msg << std::endl; - sprintf(msg, " Run time limit on kernels: %s\n", - deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); - ofs_device << msg << std::endl; sprintf(msg, " Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); ofs_device << msg << std::endl; @@ -318,28 +296,14 @@ void print_device_info( sprintf(msg, " Supports Cooperative Kernel Launch: %s\n", deviceProp.cooperativeLaunch ? "Yes" : "No"); ofs_device << msg << std::endl; - sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n", - deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); - ofs_device << msg << std::endl; sprintf(msg, " Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); ofs_device << msg << std::endl; - const char *sComputeMode[] = { - "Default (multiple host threads can use ::cudaSetDevice() with device " - "simultaneously)", - "Exclusive (only one host thread in one process is able to use " - "::cudaSetDevice() with this device)", - "Prohibited (no host thread can use ::cudaSetDevice() with this " - "device)", - "Exclusive Process (many threads in one process is able to use " - "::cudaSetDevice() with this device)", - "Unknown", - NULL}; - sprintf(msg, " Compute Mode:\n"); - ofs_device << msg << std::endl; - ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl - << std::endl; + + ModuleBase::cuda_compat::printDeprecatedDeviceInfo(ofs_device, deviceProp); + + ModuleBase::cuda_compat::printComputeModeInfo(ofs_device, deviceProp); // If there are 2 or more GPUs, query to determine whether RDMA is supported if (deviceCount >= 2) { @@ -629,4 +593,4 @@ void record_device_memory( #endif } -} \ No newline at end of file +} diff --git a/source/source_hsolver/kernels/cuda/helper_cuda.h b/source/source_hsolver/kernels/cuda/helper_cuda.h index e61e4597f6..4e97a98dea 100644 --- a/source/source_hsolver/kernels/cuda/helper_cuda.h +++ b/source/source_hsolver/kernels/cuda/helper_cuda.h @@ -40,6 +40,8 @@ #include "helper_string.h" +#include "source_base/module_device/cuda_compat.h" + #ifndef EXIT_WAIVED #define EXIT_WAIVED 2 #endif @@ -107,60 +109,7 @@ static const char *_cudaGetErrorEnum(cublasStatus_t error) { #ifdef _CUFFT_H_ // cuFFT API errors static const char *_cudaGetErrorEnum(cufftResult error) { - switch (error) { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; - - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; - - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; - - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; - - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; - - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; - - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; - - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; - - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; - - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - - case CUFFT_INVALID_DEVICE: - return "CUFFT_INVALID_DEVICE"; - - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - - case CUFFT_NO_WORKSPACE: - return "CUFFT_NO_WORKSPACE"; - - case CUFFT_NOT_IMPLEMENTED: - return "CUFFT_NOT_IMPLEMENTED"; - - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; - - case CUFFT_NOT_SUPPORTED: - return "CUFFT_NOT_SUPPORTED"; - } - - return ""; + return ModuleBase::cuda_compat::cufftGetErrorStringCompat(error); } #endif @@ -965,4 +914,4 @@ inline bool checkCudaCapabilities(int major_version, int minor_version) { // end of CUDA Helper Functions -#endif // COMMON_HELPER_CUDA_H_ \ No newline at end of file +#endif // COMMON_HELPER_CUDA_H_ diff --git a/source/source_pw/module_pwdft/global.h b/source/source_pw/module_pwdft/global.h index 5080ddc24d..bea93a9331 100644 --- a/source/source_pw/module_pwdft/global.h +++ b/source/source_pw/module_pwdft/global.h @@ -16,6 +16,7 @@ #ifdef __CUDA #include "cublas_v2.h" #include "cufft.h" +#include "source_base/module_device/cuda_compat.h" static const char* _cublasGetErrorString(cublasStatus_t error) { @@ -41,48 +42,6 @@ static const char* _cublasGetErrorString(cublasStatus_t error) return ""; } -static const char* _cufftGetErrorString(cufftResult_t error) -{ - switch (error) - { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - case CUFFT_INVALID_DEVICE: - return "CUFFT_INVALID_DEVICE"; - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - case CUFFT_NO_WORKSPACE: - return "CUFFT_NO_WORKSPACE"; - case CUFFT_NOT_IMPLEMENTED: - return "CUFFT_NOT_IMPLEMENTED"; - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; - case CUFFT_NOT_SUPPORTED: - return "CUFFT_NOT_SUPPORTED"; - } - return ""; -} - #define CHECK_CUDA(func) \ { \ cudaError_t status = (func); \ @@ -119,7 +78,7 @@ static const char* _cufftGetErrorString(cufftResult_t error) if (status != CUFFT_SUCCESS) \ { \ printf("In File %s : CUFFT API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ - _cufftGetErrorString(status), status); \ + ModuleBase::cuda_compat::cufftGetErrorStringCompat(status), status); \ } \ } #endif // __CUDA