Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ set(ABACUS_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${ABACUS_BIN_NAME})
include_directories(${ABACUS_SOURCE_DIR})
include_directories(${ABACUS_SOURCE_DIR}/source_base/module_container)

set(CMAKE_CXX_STANDARD 11)
if(NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)

add_executable(${ABACUS_BIN_NAME} source/source_main/main.cpp)
Expand Down Expand Up @@ -330,6 +332,10 @@ endif()
if(USE_CUDA)
cmake_minimum_required(VERSION 3.18) # required by `CUDA_ARCHITECTURES` below
set_if_higher(CMAKE_CXX_STANDARD 14)
if(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
message(STATUS "CUDA ${CUDA_VERSION} detected. Setting CMAKE_CUDA_STANDARD to 17.")
set_if_higher(CMAKE_CXX_STANDARD 17)
endif()
set(CMAKE_CXX_EXTENSIONS ON)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
Expand Down
1 change: 1 addition & 0 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ if(USE_CUDA)
source_pw/module_pwdft/kernels/cuda/stress_op.cu
source_pw/module_pwdft/kernels/cuda/wf_op.cu
source_pw/module_pwdft/kernels/cuda/vnl_op.cu
source_base/module_device/cuda_compat.cpp
source_base/kernels/cuda/math_ylm_op.cu
source_base/kernels/cuda/math_kernel_op.cu
source_base/kernels/cuda/math_kernel_op_vec.cu
Expand Down
116 changes: 116 additions & 0 deletions source/source_base/module_device/cuda_compat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "cuda_compat.h"

namespace ModuleBase {
namespace cuda_compat {

//---------------------------------------------------------------------------
// Implementation of printDeprecatedDeviceInfo and printComputeModeInfo
//---------------------------------------------------------------------------
void printDeprecatedDeviceInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp)
{
#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
char msg[1024];
sprintf(msg,
" GPU Max Clock rate: %.0f MHz (%0.2f "
"GHz)\n",
deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
ofs_device << msg << std::endl;
// This is supported in CUDA 5.0 (runtime API device properties)
sprintf(msg, " Memory Clock rate: %.0f Mhz\n",
deviceProp.memoryClockRate * 1e-3f);
ofs_device << msg << std::endl;

sprintf(msg, " Memory Bus Width: %d-bit\n",
deviceProp.memoryBusWidth);
ofs_device << msg << std::endl;

sprintf(msg,
" Concurrent copy and kernel execution: %s with %d copy "
"engine(s)\n",
(deviceProp.deviceOverlap ? "Yes" : "No"),
deviceProp.asyncEngineCount);
ofs_device << msg << std::endl;
sprintf(msg, " Run time limit on kernels: %s\n",
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
ofs_device << msg << std::endl;
#endif
}

void printComputeModeInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp)
{
#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
char msg[1024];
sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n",
deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
ofs_device << msg << std::endl;

const char *sComputeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device "
"simultaneously)",
"Exclusive (only one host thread in one process is able to use "
"::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this "
"device)",
"Exclusive Process (many threads in one process is able to use "
"::cudaSetDevice() with this device)",
"Unknown",
NULL};
sprintf(msg, " Compute Mode:\n");
ofs_device << msg << std::endl;
ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl
<< std::endl;
#endif
}

//-------------------------------------------------------------------------------------------------
// Implementation of cufftGetErrorStringCompat
//-------------------------------------------------------------------------------------------------
const char* cufftGetErrorStringCompat(cufftResult_t error)
{
switch (error)
{
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
case CUFFT_INVALID_DEVICE:
return "CUFFT_INVALID_DEVICE";
case CUFFT_NO_WORKSPACE:
return "CUFFT_NO_WORKSPACE";
case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED";
case CUFFT_NOT_SUPPORTED:
return "CUFFT_NOT_SUPPORTED";

#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
case CUFFT_INCOMPLETE_PARAMETER_LIST:
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
case CUFFT_PARSE_ERROR:
return "CUFFT_PARSE_ERROR";
case CUFFT_LICENSE_ERROR:
return "CUFFT_LICENSE_ERROR";
#endif

default:
return "<unknown>";
}
}

} // namespace cuda_compat
} // namespace ModuleBase
47 changes: 47 additions & 0 deletions source/source_base/module_device/cuda_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
#ifndef CUDA_COMPAT_H_
#define CUDA_COMPAT_H_

#include <iostream> // For std::ostream
#include <stdexcept> // For std::invalid_argument
#include <cuda.h> // defines CUDA_VERSION
#include <cuda_runtime.h>
#include <cufft.h>


// NVTX header for CUDA versions prior to 12.9 vs. 12.9+
// This block ensures the correct NVTX header path is used based on CUDA_VERSION.
Expand All @@ -31,4 +36,46 @@
#endif
#endif

//-------------------------------------------------------------------------------------------------
// Compatibility Layer Declarations
//-------------------------------------------------------------------------------------------------
namespace ModuleBase {
namespace cuda_compat {

/**
* @brief Prints device information that was deprecated or removed in CUDA 13.0.
*
* This function handles properties like clockRate, memoryClockRate, memoryBusWidth,
* and concurrency flags, which are not available in newer CUDA toolkits.
*
* @param os The output stream (e.g., std::cout, std::ofstream).
* @param prop The cudaDeviceProp structure containing device properties.
*/
void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop);

/**
* @brief Prints the device's compute mode using a legacy string mapping.
*
* The compute mode display logic is encapsulated here as it relies on aspects
* of the driver model that have changed.
*
* @param os The output stream (e.g., std::cout, std::ofstream).
* @param prop The cudaDeviceProp structure containing device properties.
*/
void printComputeModeInfo(std::ostream& os, const cudaDeviceProp& prop);

/**
* @brief Provides a cross-CUDA-version string conversion for cuFFT error codes.
*
* In CUDA 13.0, several error codes were removed. This function handles
* these differences gracefully.
*
* @param error The cufftResult_t error code.
* @return const char* A descriptive string for the error.
*/
const char* cufftGetErrorStringCompat(cufftResult_t error);

} // namespace cuda_compat
} // namespace ModuleBase

#endif // CUDA_COMPAT_H_
48 changes: 6 additions & 42 deletions source/source_base/module_device/output_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#if defined(__CUDA)
#include <cuda_runtime.h>
#include "source_base/module_device/cuda_compat.h"
#endif

#if defined(__ROCM)
Expand Down Expand Up @@ -218,27 +219,13 @@ void print_device_info<base_device::DEVICE_GPU>(
sprintf(msg, " CUDA Capability Major/Minor version number: %d.%d\n",
deviceProp.major, deviceProp.minor);
ofs_device << msg << std::endl;
sprintf(msg,
" GPU Max Clock rate: %.0f MHz (%0.2f "
"GHz)\n",
deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
ofs_device << msg << std::endl;
// This is supported in CUDA 5.0 (runtime API device properties)
sprintf(msg, " Memory Clock rate: %.0f Mhz\n",
deviceProp.memoryClockRate * 1e-3f);
ofs_device << msg << std::endl;

sprintf(msg, " Memory Bus Width: %d-bit\n",
deviceProp.memoryBusWidth);
ofs_device << msg << std::endl;
sprintf(msg,
" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, "
"%d), 3D=(%d, %d, %d)\n",
deviceProp.maxTexture1D, deviceProp.maxTexture2D[0],
deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0],
deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
ofs_device << msg << std::endl;

sprintf(
msg,
" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
Expand Down Expand Up @@ -285,15 +272,6 @@ void print_device_info<base_device::DEVICE_GPU>(
sprintf(msg, " Texture alignment: %zu bytes\n",
deviceProp.textureAlignment);
ofs_device << msg << std::endl;
sprintf(msg,
" Concurrent copy and kernel execution: %s with %d copy "
"engine(s)\n",
(deviceProp.deviceOverlap ? "Yes" : "No"),
deviceProp.asyncEngineCount);
ofs_device << msg << std::endl;
sprintf(msg, " Run time limit on kernels: %s\n",
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
ofs_device << msg << std::endl;
sprintf(msg, " Integrated GPU sharing Host Memory: %s\n",
deviceProp.integrated ? "Yes" : "No");
ofs_device << msg << std::endl;
Expand All @@ -318,28 +296,14 @@ void print_device_info<base_device::DEVICE_GPU>(
sprintf(msg, " Supports Cooperative Kernel Launch: %s\n",
deviceProp.cooperativeLaunch ? "Yes" : "No");
ofs_device << msg << std::endl;
sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n",
deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
ofs_device << msg << std::endl;
sprintf(msg,
" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n",
deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
ofs_device << msg << std::endl;
const char *sComputeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device "
"simultaneously)",
"Exclusive (only one host thread in one process is able to use "
"::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this "
"device)",
"Exclusive Process (many threads in one process is able to use "
"::cudaSetDevice() with this device)",
"Unknown",
NULL};
sprintf(msg, " Compute Mode:\n");
ofs_device << msg << std::endl;
ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl
<< std::endl;

ModuleBase::cuda_compat::printDeprecatedDeviceInfo(ofs_device, deviceProp);

ModuleBase::cuda_compat::printComputeModeInfo(ofs_device, deviceProp);

// If there are 2 or more GPUs, query to determine whether RDMA is supported
if (deviceCount >= 2) {
Expand Down Expand Up @@ -629,4 +593,4 @@ void record_device_memory<base_device::DEVICE_GPU>(

#endif
}
}
}
59 changes: 4 additions & 55 deletions source/source_hsolver/kernels/cuda/helper_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@

#include "helper_string.h"

#include "source_base/module_device/cuda_compat.h"

#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
Expand Down Expand Up @@ -107,60 +109,7 @@ static const char *_cudaGetErrorEnum(cublasStatus_t error) {
#ifdef _CUFFT_H_
// cuFFT API errors
static const char *_cudaGetErrorEnum(cufftResult error) {
switch (error) {
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";

case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";

case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";

case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";

case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";

case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";

case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";

case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";

case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";

case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";

case CUFFT_INCOMPLETE_PARAMETER_LIST:
return "CUFFT_INCOMPLETE_PARAMETER_LIST";

case CUFFT_INVALID_DEVICE:
return "CUFFT_INVALID_DEVICE";

case CUFFT_PARSE_ERROR:
return "CUFFT_PARSE_ERROR";

case CUFFT_NO_WORKSPACE:
return "CUFFT_NO_WORKSPACE";

case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED";

case CUFFT_LICENSE_ERROR:
return "CUFFT_LICENSE_ERROR";

case CUFFT_NOT_SUPPORTED:
return "CUFFT_NOT_SUPPORTED";
}

return "<unknown>";
return ModuleBase::cuda_compat::cufftGetErrorStringCompat(error);
}
#endif

Expand Down Expand Up @@ -965,4 +914,4 @@ inline bool checkCudaCapabilities(int major_version, int minor_version) {

// end of CUDA Helper Functions

#endif // COMMON_HELPER_CUDA_H_
#endif // COMMON_HELPER_CUDA_H_
Loading
Loading