From 9c1e8bd97cf816eb425157716b0c1767b95ec3fa Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 4 Feb 2026 20:58:28 +0800 Subject: [PATCH 1/9] replace libcal with NCCL --- CMakeLists.txt | 69 +++++++++++++++---- .../kernels/cuda/diag_cusolvermp.cu | 65 +++++------------ .../kernels/cuda/diag_cusolvermp.cuh | 3 +- .../kernels/cuda/helper_cusolver.h | 35 ++-------- 4 files changed, 79 insertions(+), 93 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f8fc6fe0e..5d6eaef572 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -398,20 +398,63 @@ if(USE_CUDA) endif() if (ENABLE_CUSOLVERMP) add_compile_definitions(__CUSOLVERMP) - find_library(CAL_LIBRARY - NAMES cal - PATHS ${CAL_CUSOLVERMP_PATH} - NO_DEFAULT_PATH - ) - find_library(CUSOLVERMP_LIBRARY - NAMES cusolverMp - PATHS ${CAL_CUSOLVERMP_PATH} - NO_DEFAULT_PATH - ) + + # === Find NCCL === + # try NVHPC first + find_package(NVHPC QUIET CONFIG) + if(TARGET NVHPC::NCCL) + message(STATUS "Using NVHPC::NCCL target") + if(NOT TARGET NCCL::NCCL) + add_library(NCCL::NCCL ALIAS NVHPC::NCCL) + endif() + else() + find_library(NCCL_LIBRARY NAMES nccl + HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 comm_libs/nccl/lib) + find_path(NCCL_INCLUDE_DIR NAMES nccl.h + HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} + PATHS ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include comm_libs/nccl/include) + + if(NOT NCCL_LIBRARY OR NOT NCCL_INCLUDE_DIR) + message(FATAL_ERROR "NCCL not found. Set NCCL_PATH or CUSOLVERMP_PATH.") + endif() + + message(STATUS "Found NCCL: ${NCCL_LIBRARY}") + add_library(NCCL::NCCL IMPORTED INTERFACE) + set_target_properties(NCCL::NCCL PROPERTIES + INTERFACE_LINK_LIBRARIES "${NCCL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIR}") + endif() + + # === Find cusolverMp === + find_library(CUSOLVERMP_LIBRARY NAMES cusolverMp + HINTS ${CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 math_libs/lib math_libs/lib64 math_libs/*/lib64) + + find_path(CUSOLVERMP_INCLUDE_DIR NAMES cusolverMp.h + HINTS ${CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES include math_libs/include math_libs/*/include) + + if(NOT CUSOLVERMP_LIBRARY OR NOT CUSOLVERMP_INCLUDE_DIR) + message(FATAL_ERROR + "cusolverMp not found. Set CUSOLVERMP_PATH or NVHPC_ROOT_DIR." + ) + endif() + + message(STATUS "Found cusolverMp: ${CUSOLVERMP_LIBRARY}") + + # Create cusolverMp::cusolverMp imported target + add_library(cusolverMp::cusolverMp IMPORTED INTERFACE) + set_target_properties(cusolverMp::cusolverMp PROPERTIES + INTERFACE_LINK_LIBRARIES "${CUSOLVERMP_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUSOLVERMP_INCLUDE_DIR}") + + # === Link libraries === target_link_libraries(${ABACUS_BIN_NAME} - ${CAL_LIBRARY} - ${CUSOLVERMP_LIBRARY} - ) + NCCL::NCCL + cusolverMp::cusolverMp) + endif() endif() endif() diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu index 003fa4f483..0deb9b31e0 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu @@ -14,36 +14,7 @@ extern "C" #include "helper_cusolver.h" #include "source_base/global_function.h" #include "source_base/module_device/device.h" -static calError_t allgather(void* src_buf, void* recv_buf, size_t size, void* data, void** request) -{ - MPI_Request req; - intptr_t ptr = reinterpret_cast(data); - int err = MPI_Iallgather(src_buf, size, MPI_BYTE, recv_buf, size, MPI_BYTE, (MPI_Comm)ptr, &req); - if (err != MPI_SUCCESS) - { - return CAL_ERROR; - } - *request = (void*)(req); - return CAL_OK; -} - -static calError_t request_test(void* request) -{ - intptr_t ptr = reinterpret_cast(request); - MPI_Request req = (MPI_Request)ptr; - int completed; - int err = MPI_Test(&req, &completed, MPI_STATUS_IGNORE); - if (err != MPI_SUCCESS) - { - return CAL_ERROR; - } - return completed ? CAL_OK : CAL_ERROR_INPROGRESS; -} - -static calError_t request_free(void* request) -{ - return CAL_OK; -} +#include "source_base/module_device/device_check.h" template Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, @@ -73,17 +44,13 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, int local_device_id = base_device::information::set_device_by_rank(mpi_comm); Cblacs_gridinfo(this->cblacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); - this->cusolverCalComm = NULL; - cal_comm_create_params_t params; - params.allgather = allgather; - params.req_test = request_test; - params.req_free = request_free; - params.data = (void*)(mpi_comm); - params.rank = this->globalMpiRank; - params.nranks = this->globalMpiSize; - params.local_device = local_device_id; - - CAL_CHECK(cal_comm_create(params, &this->cusolverCalComm)); + // Initialize NCCL communicator + ncclUniqueId ncclId; + if (this->globalMpiRank == 0) { + NCCL_CHECK(ncclGetUniqueId(&ncclId)); + } + MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, mpi_comm); + NCCL_CHECK(ncclCommInitRank(&this->ncclComm, this->globalMpiSize, ncclId, this->globalMpiRank)); checkCudaErrors(cudaStreamCreate(&this->localStream)); CUSOLVER_CHECK(cusolverMpCreate(&cusolverMpHandle, local_device_id, this->localStream)); @@ -116,7 +83,7 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, // Use ROW_MAJOR to match BLACS grid initialization (order='R' in parallel_2d.cpp) CUSOLVER_CHECK(cusolverMpCreateDeviceGrid(cusolverMpHandle, &this->grid, - this->cusolverCalComm, + this->ncclComm, this->nprows, this->npcols, CUSOLVERMP_GRID_MAPPING_ROW_MAJOR)); @@ -140,11 +107,11 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, template Diag_CusolverMP_gvd::~Diag_CusolverMP_gvd() { - CAL_CHECK(cal_comm_barrier(this->cusolverCalComm, this->localStream)); + checkCudaErrors(cudaStreamSynchronize(this->localStream)); CUSOLVER_CHECK(cusolverMpDestroyMatrixDesc(this->desc_for_cusolvermp)); CUSOLVER_CHECK(cusolverMpDestroyGrid(this->grid)); CUSOLVER_CHECK(cusolverMpDestroy(this->cusolverMpHandle)); - CAL_CHECK(cal_comm_destroy(this->cusolverCalComm)); + NCCL_CHECK(ncclCommDestroy(this->ncclComm)); checkCudaErrors(cudaStreamDestroy(this->localStream)); } @@ -166,7 +133,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o cudaMemcpy(d_A, (void*)A, this->n_local * this->m_local * sizeof(inputT), cudaMemcpyHostToDevice)); checkCudaErrors( cudaMemcpy(d_B, (void*)B, this->n_local * this->m_local * sizeof(inputT), cudaMemcpyHostToDevice)); - CAL_CHECK(cal_stream_sync(this->cusolverCalComm, this->localStream)); + checkCudaErrors(cudaStreamSynchronize(this->localStream)); size_t sygvdWorkspaceInBytesOnDevice = 0; size_t sygvdWorkspaceInBytesOnHost = 0; @@ -203,7 +170,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o checkCudaErrors(cudaMemset(d_sygvdInfo, 0, sizeof(int))); /* sync wait for data to arrive to device */ - CAL_CHECK(cal_stream_sync(cusolverCalComm, localStream)); + checkCudaErrors(cudaStreamSynchronize(this->localStream)); CUSOLVER_CHECK(cusolverMpSygvd(cusolverMpHandle, CUSOLVER_EIG_TYPE_1, @@ -238,7 +205,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o { ModuleBase::WARNING_QUIT("cusolvermp", "cusolverMpSygvd failed with error"); } - CAL_CHECK(cal_stream_sync(this->cusolverCalComm, this->localStream)); + checkCudaErrors(cudaStreamSynchronize(this->localStream)); checkCudaErrors(cudaFree(d_sygvdWork)); checkCudaErrors(cudaFree(d_sygvdInfo)); @@ -254,7 +221,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o // I move the free operations from destructor to here. // Because I think it is more reasonable to free the memory in the function where it is allocated. // Destructor is used to release resources that allocated in the constructor. - // And currently, we construct and destruct the object in every SCF iteration. Maybe one day we + // And currently, we construct and destruct the object in every SCF iteration. Maybe one day we // will construct the object only once during the whole program life cycle. // In that case, allocate and free memory in compute function is more reasonable. checkCudaErrors(cudaFree(d_A)); @@ -283,4 +250,4 @@ void Diag_CusolverMP_gvd::outputParameters() template class Diag_CusolverMP_gvd; template class Diag_CusolverMP_gvd>; -#endif \ No newline at end of file +#endif diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh index 0e330908dc..96afc2e464 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "source_base/macros.h" @@ -53,7 +52,7 @@ class Diag_CusolverMP_gvd int globalMpiSize; cudaDataType_t datatype; - cal_comm_t cusolverCalComm = NULL; + ncclComm_t ncclComm = NULL; cudaStream_t localStream = NULL; cusolverMpHandle_t cusolverMpHandle = NULL; cusolverMpGrid_t grid = NULL; diff --git a/source/source_hsolver/kernels/cuda/helper_cusolver.h b/source/source_hsolver/kernels/cuda/helper_cusolver.h index 1beca4c6c7..40462963fd 100644 --- a/source/source_hsolver/kernels/cuda/helper_cusolver.h +++ b/source/source_hsolver/kernels/cuda/helper_cusolver.h @@ -5,39 +5,16 @@ #define W_ABACUS_DEVELOP_ABACUS_DEVELOP_SOURCE_MODULE_HSOLVER_KERNELS_CUDA_HELPER_CUSOLVER_H #ifdef __CUSOLVERMP #include +#include -const char* calGetErrorString(calError_t status) -{ - switch (status) - { - case CAL_OK: - return "CAL_OK"; - case CAL_ERROR: - return "CAL_ERROR"; - case CAL_ERROR_INVALID_PARAMETER: - return "CAL_ERROR_INVALID_PARAMETER"; - case CAL_ERROR_INTERNAL: - return "CAL_ERROR_INTERNAL"; - case CAL_ERROR_CUDA: - return "CAL_ERROR_CUDA"; - case CAL_ERROR_UCC: - return "CAL_ERROR_UCC"; - case CAL_ERROR_NOT_SUPPORTED: - return "CAL_ERROR_NOT_SUPPORTED"; - case CAL_ERROR_INPROGRESS: - return "CAL_ERROR_INPROGRESS"; - default: - return "CAL UNKNOWN ERROR"; - } -} - -#define CAL_CHECK(cmd) \ +// NCCL error checking (replaces libcal) +#define NCCL_CHECK(cmd) \ do \ { \ - calError_t status = cmd; \ - if (status != CAL_OK) \ + ncclResult_t status = cmd; \ + if (status != ncclSuccess) \ { \ - fprintf(stderr, "ERROR: %s %s %d\n", calGetErrorString(status), __FILE__, __LINE__); \ + fprintf(stderr, "NCCL ERROR: %s %s %d\n", ncclGetErrorString(status), __FILE__, __LINE__); \ abort(); \ } \ } while (0) From 42e1357d0f5d0d0ae555bba765e763f7d7646251 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 4 Feb 2026 21:15:48 +0800 Subject: [PATCH 2/9] update some docs --- toolchain/README.md | 30 +++++++++++++++++------------ toolchain/build_abacus_aocc-aocl.sh | 1 - toolchain/build_abacus_gcc-aocl.sh | 1 - toolchain/build_abacus_gnu.sh | 1 - toolchain/build_abacus_intel.sh | 1 - 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/toolchain/README.md b/toolchain/README.md index 8727e7f35d..107b526aee 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -292,32 +292,38 @@ cmake -B $BUILD_DIR \ #### Multi-GPU with cuSolverMP -1. **Check or install cuSolverMP manually:** -One may use NVIDIA HPC_SDK as an easy way to install cuSolverMP. +**Requirements**: cuSolverMP >= 0.7.0 (corresponds to NVIDIA HPC SDK >= 25.7) -2. **Install dependencies normally:** +**Option 1: Using NVIDIA HPC SDK (Recommended)** + +1. Load the NVHPC module: ```bash -./toolchain_gnu.sh +module load nvhpc ``` -3. **Build with cuSolverMP:** +2. Build with cuSolverMP enabled: ```bash cmake -B $BUILD_DIR \ -DUSE_CUDA=ON \ -DENABLE_CUSOLVERMP=ON \ - -DCAL_CUSOLVERMP_PATH=/path/to/math_libs/lib \ # ... other options ``` -3. **Set environment variables:** +The module file will set `NVHPC_ROOT_DIR` automatically, and CMake will find cuSolverMP and NCCL from it. + +**Option 2: Manual Installation** + +If you installed cuSolverMP and NCCL separately and not in system default paths, specify the paths explicitly: + ```bash -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/hpcx/ucc/lib -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/hpcx/ucx/lib -export CPATH=$CPATH:/path/to/math_libs/include +cmake -B $BUILD_DIR \ + -DUSE_CUDA=ON \ + -DENABLE_CUSOLVERMP=ON \ + -DCUSOLVERMP_PATH=/path/to/cusolvermp/ \ + -DNCCL_PATH=/path/to/nccl/ \ + # ... other options ``` -**Note**: cuSolverMP requires NVIDIA HPC SDK or system installation via package manager. - ## Troubleshooting ### Common Issues diff --git a/toolchain/build_abacus_aocc-aocl.sh b/toolchain/build_abacus_aocc-aocl.sh index a586442421..ac174db099 100755 --- a/toolchain/build_abacus_aocc-aocl.sh +++ b/toolchain/build_abacus_aocc-aocl.sh @@ -66,7 +66,6 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ # -DDeePMD_DIR=$DEEPMD \ # -DENABLE_CUSOLVERMP=ON \ -# -D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib cmake --build $BUILD_DIR -j `nproc` cmake --install $BUILD_DIR 2>/dev/null diff --git a/toolchain/build_abacus_gcc-aocl.sh b/toolchain/build_abacus_gcc-aocl.sh index 65e2491ee9..e2515f609b 100755 --- a/toolchain/build_abacus_gcc-aocl.sh +++ b/toolchain/build_abacus_gcc-aocl.sh @@ -64,7 +64,6 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ # -DDeePMD_DIR=$DEEPMD \ # -DENABLE_CUSOLVERMP=ON \ -# -D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib cmake --build $BUILD_DIR -j `nproc` cmake --install $BUILD_DIR 2>/dev/null diff --git a/toolchain/build_abacus_gnu.sh b/toolchain/build_abacus_gnu.sh index 37f5603f6e..0485738c1e 100755 --- a/toolchain/build_abacus_gnu.sh +++ b/toolchain/build_abacus_gnu.sh @@ -62,7 +62,6 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ # -DDeePMD_DIR=$DEEPMD \ # -DENABLE_CUSOLVERMP=ON \ -# -D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib cmake --build $BUILD_DIR -j `nproc` cmake --install $BUILD_DIR 2>/dev/null diff --git a/toolchain/build_abacus_intel.sh b/toolchain/build_abacus_intel.sh index c347b623a1..06fcdc5ac8 100755 --- a/toolchain/build_abacus_intel.sh +++ b/toolchain/build_abacus_intel.sh @@ -60,7 +60,6 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ # -Dlibnpy_INCLUDE_DIR=$LIBNPY \ # -DDeePMD_DIR=$DEEPMD \ # -DENABLE_CUSOLVERMP=ON \ -# -D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib cmake --build $BUILD_DIR -j `nproc` cmake --install $BUILD_DIR 2>/dev/null From e55ff3126a92903dde7e38bb37a870ec749b6d71 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 17:35:11 +0800 Subject: [PATCH 3/9] add libcal compatibility --- CMakeLists.txt | 109 +++++++++++++++--- .../source_base/module_device/device_check.h | 18 +++ .../kernels/cuda/diag_cusolvermp.cu | 83 +++++++++++-- .../kernels/cuda/diag_cusolvermp.cuh | 11 +- 4 files changed, 194 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6eaef572..053b14e696 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,12 @@ option(ENABLE_RAPIDJSON "Enable rapid-json usage" OFF) option(ENABLE_CNPY "Enable cnpy usage" OFF) option(ENABLE_CUSOLVERMP "Enable cusolvermp" OFF) +if(NOT DEFINED NVHPC_ROOT_DIR AND DEFINED ENV{NVHPC_ROOT}) + set(NVHPC_ROOT_DIR + "$ENV{NVHPC_ROOT}" + CACHE PATH "Path to NVIDIA HPC SDK root directory.") +endif() + # enable json support if(ENABLE_RAPIDJSON) find_package(RapidJSON) @@ -399,15 +405,34 @@ if(USE_CUDA) if (ENABLE_CUSOLVERMP) add_compile_definitions(__CUSOLVERMP) - # === Find NCCL === - # try NVHPC first - find_package(NVHPC QUIET CONFIG) - if(TARGET NVHPC::NCCL) - message(STATUS "Using NVHPC::NCCL target") - if(NOT TARGET NCCL::NCCL) - add_library(NCCL::NCCL ALIAS NVHPC::NCCL) + option(USE_LIBCAL + "Use libcal for cusolvermp communication (compatible with NVHPC SDK < 25.9 or cuSolverMp < 0.7.0)" + OFF) + + if(USE_LIBCAL) + add_compile_definitions(__USE_LIBCAL) + message(STATUS "cusolverMp: Using libcal backend") + + find_library(CAL_LIBRARY NAMES cal + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 math_libs/lib64) + find_path(CAL_INCLUDE_DIR NAMES cal.h + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES include math_libs/include) + + if(NOT CAL_LIBRARY OR NOT CAL_INCLUDE_DIR) + message(FATAL_ERROR "libcal not found. Set CAL_PATH or NVHPC_ROOT_DIR.") endif() + + message(STATUS "Found libcal: ${CAL_LIBRARY}") + add_library(CAL::CAL IMPORTED INTERFACE) + set_target_properties(CAL::CAL PROPERTIES + INTERFACE_LINK_LIBRARIES "${CAL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CAL_INCLUDE_DIR}") else() + message(STATUS "cusolverMp: Using NCCL backend") + + # === Find NCCL === find_library(NCCL_LIBRARY NAMES nccl HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} PATH_SUFFIXES lib lib64 comm_libs/nccl/lib) @@ -417,7 +442,7 @@ if(USE_CUDA) PATH_SUFFIXES include comm_libs/nccl/include) if(NOT NCCL_LIBRARY OR NOT NCCL_INCLUDE_DIR) - message(FATAL_ERROR "NCCL not found. Set NCCL_PATH or CUSOLVERMP_PATH.") + message(FATAL_ERROR "NCCL not found. Set NCCL_PATH or NVHPC_ROOT_DIR.") endif() message(STATUS "Found NCCL: ${NCCL_LIBRARY}") @@ -429,12 +454,12 @@ if(USE_CUDA) # === Find cusolverMp === find_library(CUSOLVERMP_LIBRARY NAMES cusolverMp - HINTS ${CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES lib lib64 math_libs/lib math_libs/lib64 math_libs/*/lib64) + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 math_libs/lib math_libs/lib64) find_path(CUSOLVERMP_INCLUDE_DIR NAMES cusolverMp.h - HINTS ${CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES include math_libs/include math_libs/*/include) + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES include math_libs/include) if(NOT CUSOLVERMP_LIBRARY OR NOT CUSOLVERMP_INCLUDE_DIR) message(FATAL_ERROR @@ -444,6 +469,54 @@ if(USE_CUDA) message(STATUS "Found cusolverMp: ${CUSOLVERMP_LIBRARY}") + set(NVHPC_SDK_VERSION "") + if(DEFINED NVHPC_ROOT_DIR) + get_filename_component(NVHPC_ROOT_DIR_BASENAME "${NVHPC_ROOT_DIR}" NAME) + if(NVHPC_ROOT_DIR_BASENAME MATCHES "^[0-9]+\\.[0-9]+$") + set(NVHPC_SDK_VERSION "${NVHPC_ROOT_DIR_BASENAME}") + endif() + endif() + + set(CUSOLVERMP_VERSION_STR "") + set(CUSOLVERMP_VERSION_HEADER "${CUSOLVERMP_INCLUDE_DIR}/cusolverMp.h") + if(EXISTS "${CUSOLVERMP_VERSION_HEADER}") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MAJOR_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_MAJOR[ \t]+[0-9]+") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MINOR_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_MINOR[ \t]+[0-9]+") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_PATCH_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_PATCH[ \t]+[0-9]+") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MAJOR "${CUSOLVERMP_MAJOR_LINE}") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MINOR "${CUSOLVERMP_MINOR_LINE}") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_PATCH "${CUSOLVERMP_PATCH_LINE}") + if(CUSOLVERMP_VER_MAJOR AND CUSOLVERMP_VER_MINOR AND CUSOLVERMP_VER_PATCH) + set(CUSOLVERMP_VERSION_STR + "${CUSOLVERMP_VER_MAJOR}.${CUSOLVERMP_VER_MINOR}.${CUSOLVERMP_VER_PATCH}") + endif() + endif() + + set(_recommend_use_libcal OFF) + if(NVHPC_SDK_VERSION AND NVHPC_SDK_VERSION VERSION_LESS "25.9") + set(_recommend_use_libcal ON) + endif() + if(CUSOLVERMP_VERSION_STR AND CUSOLVERMP_VERSION_STR VERSION_LESS "0.7.0") + set(_recommend_use_libcal ON) + endif() + + if(_recommend_use_libcal AND NOT USE_LIBCAL) + set(_nvhpc_version_for_msg "${NVHPC_SDK_VERSION}") + if(NOT _nvhpc_version_for_msg) + set(_nvhpc_version_for_msg "unknown") + endif() + set(_cusolvermp_version_for_msg "${CUSOLVERMP_VERSION_STR}") + if(NOT _cusolvermp_version_for_msg) + set(_cusolvermp_version_for_msg "unknown") + endif() + message(WARNING + "Detected NVHPC SDK ${_nvhpc_version_for_msg} and cuSolverMp ${_cusolvermp_version_for_msg}. " + "If NVHPC SDK < 25.9 or cuSolverMp < 0.7.0, please set -DUSE_LIBCAL=ON.") + endif() + # Create cusolverMp::cusolverMp imported target add_library(cusolverMp::cusolverMp IMPORTED INTERFACE) set_target_properties(cusolverMp::cusolverMp PROPERTIES @@ -451,9 +524,15 @@ if(USE_CUDA) INTERFACE_INCLUDE_DIRECTORIES "${CUSOLVERMP_INCLUDE_DIR}") # === Link libraries === - target_link_libraries(${ABACUS_BIN_NAME} - NCCL::NCCL - cusolverMp::cusolverMp) + if(USE_LIBCAL) + target_link_libraries(${ABACUS_BIN_NAME} + CAL::CAL + cusolverMp::cusolverMp) + else() + target_link_libraries(${ABACUS_BIN_NAME} + NCCL::NCCL + cusolverMp::cusolverMp) + endif() endif() endif() diff --git a/source/source_base/module_device/device_check.h b/source/source_base/module_device/device_check.h index f0e55dbb49..1aef8468fe 100644 --- a/source/source_base/module_device/device_check.h +++ b/source/source_base/module_device/device_check.h @@ -223,6 +223,9 @@ static const char* _cufftGetErrorString(cufftResult_t error) #ifdef __CUSOLVERMP #include +#ifdef __USE_LIBCAL +#include + static const char* _calGetErrorString(calError_t error) { switch (error) @@ -259,6 +262,21 @@ static const char* _calGetErrorString(calError_t error) exit(EXIT_FAILURE); \ } \ } while (0) +#else // !__USE_LIBCAL (use NCCL) +#include + +#define CHECK_NCCL(func) \ + do \ + { \ + ncclResult_t status = (func); \ + if (status != ncclSuccess) \ + { \ + fprintf(stderr, "In File %s : NCCL API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \ + ncclGetErrorString(status), status); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) +#endif // __USE_LIBCAL #endif // __CUSOLVERMP diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu index c875859cce..f21c1acfe2 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu @@ -15,6 +15,40 @@ extern "C" #include "source_base/module_device/device.h" #include "source_base/module_device/device_check.h" +#ifdef __USE_LIBCAL +// ============================================================================ +// libcal callback functions for MPI communication +// ============================================================================ + +static calError_t allgather(void* src_buf, + void* recv_buf, + size_t size, + void* data, + void** request) +{ + MPI_Comm comm = *(MPI_Comm*)data; + MPI_Request* req = new MPI_Request; + MPI_Iallgather(src_buf, size, MPI_BYTE, recv_buf, size, MPI_BYTE, comm, req); + *request = req; + return CAL_OK; +} + +static calError_t request_test(void* request) +{ + MPI_Request* req = (MPI_Request*)request; + int flag; + MPI_Test(req, &flag, MPI_STATUS_IGNORE); + return flag ? CAL_OK : CAL_ERROR; +} + +static calError_t request_free(void* request) +{ + MPI_Request* req = (MPI_Request*)request; + delete req; + return CAL_OK; +} +#endif // __USE_LIBCAL + template Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, const int narows, @@ -44,13 +78,26 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, int local_device_id = base_device::DeviceContext::instance().get_device_id(); Cblacs_gridinfo(this->cblacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); +#ifdef __USE_LIBCAL + // Initialize libcal communicator + cal_comm_create_params_t params; + params.allgather = allgather; + params.req_test = request_test; + params.req_free = request_free; + params.data = (void*)(mpi_comm); + params.rank = this->globalMpiRank; + params.nranks = this->globalMpiSize; + params.local_device = local_device_id; + CHECK_CAL(cal_comm_create(params, &this->cusolverCalComm)); +#else // Initialize NCCL communicator ncclUniqueId ncclId; if (this->globalMpiRank == 0) { - NCCL_CHECK(ncclGetUniqueId(&ncclId)); + CHECK_NCCL(ncclGetUniqueId(&ncclId)); } MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, mpi_comm); - NCCL_CHECK(ncclCommInitRank(&this->ncclComm, this->globalMpiSize, ncclId, this->globalMpiRank)); + CHECK_NCCL(ncclCommInitRank(&this->ncclComm, this->globalMpiSize, ncclId, this->globalMpiRank)); +#endif CHECK_CUDA(cudaStreamCreate(&this->localStream)); CHECK_CUSOLVER(cusolverMpCreate(&cusolverMpHandle, local_device_id, this->localStream)); @@ -83,7 +130,11 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, // Use ROW_MAJOR to match BLACS grid initialization (order='R' in parallel_2d.cpp) CHECK_CUSOLVER(cusolverMpCreateDeviceGrid(cusolverMpHandle, &this->grid, +#ifdef __USE_LIBCAL + this->cusolverCalComm, +#else this->ncclComm, +#endif this->nprows, this->npcols, CUSOLVERMP_GRID_MAPPING_ROW_MAJOR)); @@ -107,12 +158,22 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, template Diag_CusolverMP_gvd::~Diag_CusolverMP_gvd() { - checkCudaErrors(cudaStreamSynchronize(this->localStream)); - CUSOLVER_CHECK(cusolverMpDestroyMatrixDesc(this->desc_for_cusolvermp)); - CUSOLVER_CHECK(cusolverMpDestroyGrid(this->grid)); - CUSOLVER_CHECK(cusolverMpDestroy(this->cusolverMpHandle)); - NCCL_CHECK(ncclCommDestroy(this->ncclComm)); - checkCudaErrors(cudaStreamDestroy(this->localStream)); +#ifdef __USE_LIBCAL + CHECK_CAL(cal_comm_barrier(this->cusolverCalComm, this->localStream)); + CHECK_CAL(cal_stream_sync(this->cusolverCalComm, this->localStream)); + CHECK_CUSOLVER(cusolverMpDestroyMatrixDesc(this->desc_for_cusolvermp)); + CHECK_CUSOLVER(cusolverMpDestroyGrid(this->grid)); + CHECK_CUSOLVER(cusolverMpDestroy(this->cusolverMpHandle)); + CHECK_CAL(cal_comm_destroy(this->cusolverCalComm)); + CHECK_CUDA(cudaStreamDestroy(this->localStream)); +#else + CHECK_CUDA(cudaStreamSynchronize(this->localStream)); + CHECK_CUSOLVER(cusolverMpDestroyMatrixDesc(this->desc_for_cusolvermp)); + CHECK_CUSOLVER(cusolverMpDestroyGrid(this->grid)); + CHECK_CUSOLVER(cusolverMpDestroy(this->cusolverMpHandle)); + CHECK_NCCL(ncclCommDestroy(this->ncclComm)); + CHECK_CUDA(cudaStreamDestroy(this->localStream)); +#endif } @@ -133,7 +194,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o cudaMemcpy(d_A, (void*)A, this->n_local * this->m_local * sizeof(inputT), cudaMemcpyHostToDevice)); CHECK_CUDA( cudaMemcpy(d_B, (void*)B, this->n_local * this->m_local * sizeof(inputT), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaStreamSynchronize(this->localStream)); + CHECK_CUDA(cudaStreamSynchronize(this->localStream)); size_t sygvdWorkspaceInBytesOnDevice = 0; size_t sygvdWorkspaceInBytesOnHost = 0; @@ -170,7 +231,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o CHECK_CUDA(cudaMemset(d_sygvdInfo, 0, sizeof(int))); /* sync wait for data to arrive to device */ - checkCudaErrors(cudaStreamSynchronize(this->localStream)); + CHECK_CUDA(cudaStreamSynchronize(this->localStream)); CHECK_CUSOLVER(cusolverMpSygvd(cusolverMpHandle, CUSOLVER_EIG_TYPE_1, @@ -205,7 +266,7 @@ int Diag_CusolverMP_gvd::generalized_eigenvector(inputT* A, inputT* B, o { ModuleBase::WARNING_QUIT("cusolvermp", "cusolverMpSygvd failed with error"); } - checkCudaErrors(cudaStreamSynchronize(this->localStream)); + CHECK_CUDA(cudaStreamSynchronize(this->localStream)); CHECK_CUDA(cudaFree(d_sygvdWork)); CHECK_CUDA(cudaFree(d_sygvdInfo)); diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh index 96afc2e464..a62ee3ee62 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh @@ -7,6 +7,12 @@ #include #include "source_base/macros.h" +#ifdef __USE_LIBCAL +#include +#else +#include +#endif + template class Diag_CusolverMP_gvd { @@ -52,7 +58,11 @@ class Diag_CusolverMP_gvd int globalMpiSize; cudaDataType_t datatype; +#ifdef __USE_LIBCAL + cal_comm_t cusolverCalComm = NULL; +#else ncclComm_t ncclComm = NULL; +#endif cudaStream_t localStream = NULL; cusolverMpHandle_t cusolverMpHandle = NULL; cusolverMpGrid_t grid = NULL; @@ -65,4 +75,3 @@ class Diag_CusolverMP_gvd }; // 实现模板类的成员函数 - From dca9129d7f4dbe4768e201140df6749f58beb56b Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 18:19:47 +0800 Subject: [PATCH 4/9] Refactor cuSolverMp CMake setup and auto-select backend --- CMakeLists.txt | 134 +----------------------------------- cmake/SetupCuSolverMp.cmake | 130 ++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 131 deletions(-) create mode 100644 cmake/SetupCuSolverMp.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 053b14e696..703fda65a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -403,137 +403,9 @@ if(USE_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS}" CACHE STRING "CUDA flags" FORCE) endif() if (ENABLE_CUSOLVERMP) - add_compile_definitions(__CUSOLVERMP) - - option(USE_LIBCAL - "Use libcal for cusolvermp communication (compatible with NVHPC SDK < 25.9 or cuSolverMp < 0.7.0)" - OFF) - - if(USE_LIBCAL) - add_compile_definitions(__USE_LIBCAL) - message(STATUS "cusolverMp: Using libcal backend") - - find_library(CAL_LIBRARY NAMES cal - HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES lib lib64 math_libs/lib64) - find_path(CAL_INCLUDE_DIR NAMES cal.h - HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES include math_libs/include) - - if(NOT CAL_LIBRARY OR NOT CAL_INCLUDE_DIR) - message(FATAL_ERROR "libcal not found. Set CAL_PATH or NVHPC_ROOT_DIR.") - endif() - - message(STATUS "Found libcal: ${CAL_LIBRARY}") - add_library(CAL::CAL IMPORTED INTERFACE) - set_target_properties(CAL::CAL PROPERTIES - INTERFACE_LINK_LIBRARIES "${CAL_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${CAL_INCLUDE_DIR}") - else() - message(STATUS "cusolverMp: Using NCCL backend") - - # === Find NCCL === - find_library(NCCL_LIBRARY NAMES nccl - HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES lib lib64 comm_libs/nccl/lib) - find_path(NCCL_INCLUDE_DIR NAMES nccl.h - HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} - PATHS ${CUDA_TOOLKIT_ROOT_DIR} - PATH_SUFFIXES include comm_libs/nccl/include) - - if(NOT NCCL_LIBRARY OR NOT NCCL_INCLUDE_DIR) - message(FATAL_ERROR "NCCL not found. Set NCCL_PATH or NVHPC_ROOT_DIR.") - endif() - - message(STATUS "Found NCCL: ${NCCL_LIBRARY}") - add_library(NCCL::NCCL IMPORTED INTERFACE) - set_target_properties(NCCL::NCCL PROPERTIES - INTERFACE_LINK_LIBRARIES "${NCCL_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIR}") - endif() - - # === Find cusolverMp === - find_library(CUSOLVERMP_LIBRARY NAMES cusolverMp - HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES lib lib64 math_libs/lib math_libs/lib64) - - find_path(CUSOLVERMP_INCLUDE_DIR NAMES cusolverMp.h - HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} - PATH_SUFFIXES include math_libs/include) - - if(NOT CUSOLVERMP_LIBRARY OR NOT CUSOLVERMP_INCLUDE_DIR) - message(FATAL_ERROR - "cusolverMp not found. Set CUSOLVERMP_PATH or NVHPC_ROOT_DIR." - ) - endif() - - message(STATUS "Found cusolverMp: ${CUSOLVERMP_LIBRARY}") - - set(NVHPC_SDK_VERSION "") - if(DEFINED NVHPC_ROOT_DIR) - get_filename_component(NVHPC_ROOT_DIR_BASENAME "${NVHPC_ROOT_DIR}" NAME) - if(NVHPC_ROOT_DIR_BASENAME MATCHES "^[0-9]+\\.[0-9]+$") - set(NVHPC_SDK_VERSION "${NVHPC_ROOT_DIR_BASENAME}") - endif() - endif() - - set(CUSOLVERMP_VERSION_STR "") - set(CUSOLVERMP_VERSION_HEADER "${CUSOLVERMP_INCLUDE_DIR}/cusolverMp.h") - if(EXISTS "${CUSOLVERMP_VERSION_HEADER}") - file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MAJOR_LINE - REGEX "^#define[ \t]+CUSOLVERMP_VER_MAJOR[ \t]+[0-9]+") - file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MINOR_LINE - REGEX "^#define[ \t]+CUSOLVERMP_VER_MINOR[ \t]+[0-9]+") - file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_PATCH_LINE - REGEX "^#define[ \t]+CUSOLVERMP_VER_PATCH[ \t]+[0-9]+") - string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MAJOR "${CUSOLVERMP_MAJOR_LINE}") - string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MINOR "${CUSOLVERMP_MINOR_LINE}") - string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_PATCH "${CUSOLVERMP_PATCH_LINE}") - if(CUSOLVERMP_VER_MAJOR AND CUSOLVERMP_VER_MINOR AND CUSOLVERMP_VER_PATCH) - set(CUSOLVERMP_VERSION_STR - "${CUSOLVERMP_VER_MAJOR}.${CUSOLVERMP_VER_MINOR}.${CUSOLVERMP_VER_PATCH}") - endif() - endif() - - set(_recommend_use_libcal OFF) - if(NVHPC_SDK_VERSION AND NVHPC_SDK_VERSION VERSION_LESS "25.9") - set(_recommend_use_libcal ON) - endif() - if(CUSOLVERMP_VERSION_STR AND CUSOLVERMP_VERSION_STR VERSION_LESS "0.7.0") - set(_recommend_use_libcal ON) - endif() - - if(_recommend_use_libcal AND NOT USE_LIBCAL) - set(_nvhpc_version_for_msg "${NVHPC_SDK_VERSION}") - if(NOT _nvhpc_version_for_msg) - set(_nvhpc_version_for_msg "unknown") - endif() - set(_cusolvermp_version_for_msg "${CUSOLVERMP_VERSION_STR}") - if(NOT _cusolvermp_version_for_msg) - set(_cusolvermp_version_for_msg "unknown") - endif() - message(WARNING - "Detected NVHPC SDK ${_nvhpc_version_for_msg} and cuSolverMp ${_cusolvermp_version_for_msg}. " - "If NVHPC SDK < 25.9 or cuSolverMp < 0.7.0, please set -DUSE_LIBCAL=ON.") - endif() - - # Create cusolverMp::cusolverMp imported target - add_library(cusolverMp::cusolverMp IMPORTED INTERFACE) - set_target_properties(cusolverMp::cusolverMp PROPERTIES - INTERFACE_LINK_LIBRARIES "${CUSOLVERMP_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${CUSOLVERMP_INCLUDE_DIR}") - - # === Link libraries === - if(USE_LIBCAL) - target_link_libraries(${ABACUS_BIN_NAME} - CAL::CAL - cusolverMp::cusolverMp) - else() - target_link_libraries(${ABACUS_BIN_NAME} - NCCL::NCCL - cusolverMp::cusolverMp) - endif() - + # Keep cuSolverMp discovery/linking logic in a dedicated module. + include(cmake/SetupCuSolverMp.cmake) + abacus_setup_cusolvermp(${ABACUS_BIN_NAME}) endif() endif() endif() diff --git a/cmake/SetupCuSolverMp.cmake b/cmake/SetupCuSolverMp.cmake new file mode 100644 index 0000000000..7262f18e7f --- /dev/null +++ b/cmake/SetupCuSolverMp.cmake @@ -0,0 +1,130 @@ +# ============================================================================= +# Configure cuSolverMp dependencies and linking for ABACUS +# ============================================================================= + +include_guard(GLOBAL) + +function(abacus_setup_cusolvermp target_name) + # Reads hint variables from parent/cache scope (for example: + # NVHPC_ROOT_DIR, CAL_CUSOLVERMP_PATH, NCCL_PATH). + # In CMake, function bodies can read outer-scope variables unless shadowed. + add_compile_definitions(__CUSOLVERMP) + + # Find cuSolverMp first, then decide communicator backend. + find_library(CUSOLVERMP_LIBRARY NAMES cusolverMp + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 math_libs/lib math_libs/lib64) + + find_path(CUSOLVERMP_INCLUDE_DIR NAMES cusolverMp.h + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES include math_libs/include) + + if(NOT CUSOLVERMP_LIBRARY OR NOT CUSOLVERMP_INCLUDE_DIR) + message(FATAL_ERROR + "cusolverMp not found. Set CUSOLVERMP_PATH or NVHPC_ROOT_DIR." + ) + endif() + + message(STATUS "Found cusolverMp: ${CUSOLVERMP_LIBRARY}") + + set(CUSOLVERMP_VERSION_STR "") + set(CUSOLVERMP_VERSION_HEADER "${CUSOLVERMP_INCLUDE_DIR}/cusolverMp.h") + if(EXISTS "${CUSOLVERMP_VERSION_HEADER}") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MAJOR_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_MAJOR[ \t]+[0-9]+") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_MINOR_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_MINOR[ \t]+[0-9]+") + file(STRINGS "${CUSOLVERMP_VERSION_HEADER}" CUSOLVERMP_PATCH_LINE + REGEX "^#define[ \t]+CUSOLVERMP_VER_PATCH[ \t]+[0-9]+") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MAJOR "${CUSOLVERMP_MAJOR_LINE}") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_MINOR "${CUSOLVERMP_MINOR_LINE}") + string(REGEX MATCH "([0-9]+)" CUSOLVERMP_VER_PATCH "${CUSOLVERMP_PATCH_LINE}") + if(NOT CUSOLVERMP_VER_MAJOR STREQUAL "" + AND NOT CUSOLVERMP_VER_MINOR STREQUAL "" + AND NOT CUSOLVERMP_VER_PATCH STREQUAL "") + set(CUSOLVERMP_VERSION_STR + "${CUSOLVERMP_VER_MAJOR}.${CUSOLVERMP_VER_MINOR}.${CUSOLVERMP_VER_PATCH}") + endif() + endif() + + # Auto-select communicator backend by cuSolverMp version. + # cuSolverMp < 0.7.0 -> libcal, otherwise -> NCCL. + set(_use_libcal OFF) + if(CUSOLVERMP_VERSION_STR AND CUSOLVERMP_VERSION_STR VERSION_LESS "0.7.0") + set(_use_libcal ON) + message(STATUS + "Detected cuSolverMp ${CUSOLVERMP_VERSION_STR} (< 0.7.0). Using libcal backend.") + elseif(CUSOLVERMP_VERSION_STR) + message(STATUS + "Detected cuSolverMp ${CUSOLVERMP_VERSION_STR} (>= 0.7.0). Using NCCL backend.") + elseif(NOT CUSOLVERMP_VERSION_STR) + message(WARNING + "Unable to detect cuSolverMp version from header. Using NCCL backend by default.") + endif() + + # Backend selection: + # - _use_libcal=ON -> libcal communicator backend + # - _use_libcal=OFF -> NCCL communicator backend + if(_use_libcal) + add_compile_definitions(__USE_LIBCAL) + + find_library(CAL_LIBRARY NAMES cal + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 math_libs/lib64) + find_path(CAL_INCLUDE_DIR NAMES cal.h + HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES include math_libs/include) + + if(NOT CAL_LIBRARY OR NOT CAL_INCLUDE_DIR) + message(FATAL_ERROR "libcal not found. Set CAL_PATH or NVHPC_ROOT_DIR.") + endif() + + message(STATUS "Found libcal: ${CAL_LIBRARY}") + if(NOT TARGET CAL::CAL) + add_library(CAL::CAL IMPORTED INTERFACE) + set_target_properties(CAL::CAL PROPERTIES + INTERFACE_LINK_LIBRARIES "${CAL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CAL_INCLUDE_DIR}") + endif() + else() + + find_library(NCCL_LIBRARY NAMES nccl + HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} + PATH_SUFFIXES lib lib64 comm_libs/nccl/lib) + find_path(NCCL_INCLUDE_DIR NAMES nccl.h + HINTS ${NCCL_PATH} ${NVHPC_ROOT_DIR} + PATHS ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include comm_libs/nccl/include) + + if(NOT NCCL_LIBRARY OR NOT NCCL_INCLUDE_DIR) + message(FATAL_ERROR "NCCL not found. Set NCCL_PATH or NVHPC_ROOT_DIR.") + endif() + + message(STATUS "Found NCCL: ${NCCL_LIBRARY}") + if(NOT TARGET NCCL::NCCL) + add_library(NCCL::NCCL IMPORTED INTERFACE) + set_target_properties(NCCL::NCCL PROPERTIES + INTERFACE_LINK_LIBRARIES "${NCCL_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIR}") + endif() + endif() + + # Create cusolverMp::cusolverMp imported target + if(NOT TARGET cusolverMp::cusolverMp) + add_library(cusolverMp::cusolverMp IMPORTED INTERFACE) + set_target_properties(cusolverMp::cusolverMp PROPERTIES + INTERFACE_LINK_LIBRARIES "${CUSOLVERMP_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUSOLVERMP_INCLUDE_DIR}") + endif() + + # === Link libraries === + if(_use_libcal) + target_link_libraries(${target_name} + CAL::CAL + cusolverMp::cusolverMp) + else() + target_link_libraries(${target_name} + NCCL::NCCL + cusolverMp::cusolverMp) + endif() +endfunction() From 29713cd3fea1709a4b94bfbb54c229b2599b6fca Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 18:30:15 +0800 Subject: [PATCH 5/9] fix an error --- .../kernels/cuda/diag_cusolvermp.cu | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu index f21c1acfe2..a36705bfa3 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu @@ -20,31 +20,34 @@ extern "C" // libcal callback functions for MPI communication // ============================================================================ -static calError_t allgather(void* src_buf, - void* recv_buf, - size_t size, - void* data, - void** request) +static calError_t allgather(void* src_buf, void* recv_buf, size_t size, void* data, void** request) { - MPI_Comm comm = *(MPI_Comm*)data; - MPI_Request* req = new MPI_Request; - MPI_Iallgather(src_buf, size, MPI_BYTE, recv_buf, size, MPI_BYTE, comm, req); - *request = req; + MPI_Request req; + intptr_t ptr = reinterpret_cast(data); + int err = MPI_Iallgather(src_buf, size, MPI_BYTE, recv_buf, size, MPI_BYTE, (MPI_Comm)ptr, &req); + if (err != MPI_SUCCESS) + { + return CAL_ERROR; + } + *request = (void*)(req); return CAL_OK; } static calError_t request_test(void* request) { - MPI_Request* req = (MPI_Request*)request; - int flag; - MPI_Test(req, &flag, MPI_STATUS_IGNORE); - return flag ? CAL_OK : CAL_ERROR; + intptr_t ptr = reinterpret_cast(request); + MPI_Request req = (MPI_Request)ptr; + int completed; + int err = MPI_Test(&req, &completed, MPI_STATUS_IGNORE); + if (err != MPI_SUCCESS) + { + return CAL_ERROR; + } + return completed ? CAL_OK : CAL_ERROR_INPROGRESS; } static calError_t request_free(void* request) { - MPI_Request* req = (MPI_Request*)request; - delete req; return CAL_OK; } #endif // __USE_LIBCAL From 80fdf10a2e552e3a62d4b69561813a6012055a1e Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 18:32:41 +0800 Subject: [PATCH 6/9] update toolchain doc --- toolchain/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/toolchain/README.md b/toolchain/README.md index 107b526aee..f59b14894f 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -292,8 +292,6 @@ cmake -B $BUILD_DIR \ #### Multi-GPU with cuSolverMP -**Requirements**: cuSolverMP >= 0.7.0 (corresponds to NVIDIA HPC SDK >= 25.7) - **Option 1: Using NVIDIA HPC SDK (Recommended)** 1. Load the NVHPC module: @@ -309,7 +307,7 @@ cmake -B $BUILD_DIR \ # ... other options ``` -The module file will set `NVHPC_ROOT_DIR` automatically, and CMake will find cuSolverMP and NCCL from it. +The module file will set `NVHPC_ROOT` automatically, and CMake will find cuSolverMP and NCCL(or libcal) from it. **Option 2: Manual Installation** From 5096a8aca8e812f911e4266537dae83cb01a7349 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 18:44:36 +0800 Subject: [PATCH 7/9] update some comments --- cmake/SetupCuSolverMp.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmake/SetupCuSolverMp.cmake b/cmake/SetupCuSolverMp.cmake index 7262f18e7f..5426351552 100644 --- a/cmake/SetupCuSolverMp.cmake +++ b/cmake/SetupCuSolverMp.cmake @@ -5,9 +5,6 @@ include_guard(GLOBAL) function(abacus_setup_cusolvermp target_name) - # Reads hint variables from parent/cache scope (for example: - # NVHPC_ROOT_DIR, CAL_CUSOLVERMP_PATH, NCCL_PATH). - # In CMake, function bodies can read outer-scope variables unless shadowed. add_compile_definitions(__CUSOLVERMP) # Find cuSolverMp first, then decide communicator backend. From 548f4be74384f636de6c040c2f11541843b58dd2 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Fri, 6 Feb 2026 20:28:52 +0800 Subject: [PATCH 8/9] change libcal to CAL --- cmake/SetupCuSolverMp.cmake | 22 +++++++++---------- .../source_base/module_device/device_check.h | 6 ++--- .../kernels/cuda/diag_cusolvermp.cu | 14 ++++++------ .../kernels/cuda/diag_cusolvermp.cuh | 4 ++-- toolchain/README.md | 2 +- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/cmake/SetupCuSolverMp.cmake b/cmake/SetupCuSolverMp.cmake index 5426351552..f0505c77c0 100644 --- a/cmake/SetupCuSolverMp.cmake +++ b/cmake/SetupCuSolverMp.cmake @@ -45,12 +45,12 @@ function(abacus_setup_cusolvermp target_name) endif() # Auto-select communicator backend by cuSolverMp version. - # cuSolverMp < 0.7.0 -> libcal, otherwise -> NCCL. - set(_use_libcal OFF) + # cuSolverMp < 0.7.0 -> CAL, otherwise -> NCCL. + set(_use_cal OFF) if(CUSOLVERMP_VERSION_STR AND CUSOLVERMP_VERSION_STR VERSION_LESS "0.7.0") - set(_use_libcal ON) + set(_use_cal ON) message(STATUS - "Detected cuSolverMp ${CUSOLVERMP_VERSION_STR} (< 0.7.0). Using libcal backend.") + "Detected cuSolverMp ${CUSOLVERMP_VERSION_STR} (< 0.7.0). Using CAL backend.") elseif(CUSOLVERMP_VERSION_STR) message(STATUS "Detected cuSolverMp ${CUSOLVERMP_VERSION_STR} (>= 0.7.0). Using NCCL backend.") @@ -60,10 +60,10 @@ function(abacus_setup_cusolvermp target_name) endif() # Backend selection: - # - _use_libcal=ON -> libcal communicator backend - # - _use_libcal=OFF -> NCCL communicator backend - if(_use_libcal) - add_compile_definitions(__USE_LIBCAL) + # - _use_cal=ON -> cal communicator backend + # - _use_cal=OFF -> NCCL communicator backend + if(_use_cal) + add_compile_definitions(__USE_CAL) find_library(CAL_LIBRARY NAMES cal HINTS ${CAL_CUSOLVERMP_PATH} ${NVHPC_ROOT_DIR} @@ -73,10 +73,10 @@ function(abacus_setup_cusolvermp target_name) PATH_SUFFIXES include math_libs/include) if(NOT CAL_LIBRARY OR NOT CAL_INCLUDE_DIR) - message(FATAL_ERROR "libcal not found. Set CAL_PATH or NVHPC_ROOT_DIR.") + message(FATAL_ERROR "CAL not found. Set CAL_PATH or NVHPC_ROOT_DIR.") endif() - message(STATUS "Found libcal: ${CAL_LIBRARY}") + message(STATUS "Found CAL: ${CAL_LIBRARY}") if(NOT TARGET CAL::CAL) add_library(CAL::CAL IMPORTED INTERFACE) set_target_properties(CAL::CAL PROPERTIES @@ -115,7 +115,7 @@ function(abacus_setup_cusolvermp target_name) endif() # === Link libraries === - if(_use_libcal) + if(_use_cal) target_link_libraries(${target_name} CAL::CAL cusolverMp::cusolverMp) diff --git a/source/source_base/module_device/device_check.h b/source/source_base/module_device/device_check.h index 1aef8468fe..f649676001 100644 --- a/source/source_base/module_device/device_check.h +++ b/source/source_base/module_device/device_check.h @@ -223,7 +223,7 @@ static const char* _cufftGetErrorString(cufftResult_t error) #ifdef __CUSOLVERMP #include -#ifdef __USE_LIBCAL +#ifdef __USE_CAL #include static const char* _calGetErrorString(calError_t error) @@ -262,7 +262,7 @@ static const char* _calGetErrorString(calError_t error) exit(EXIT_FAILURE); \ } \ } while (0) -#else // !__USE_LIBCAL (use NCCL) +#else // !__USE_CAL (use NCCL) #include #define CHECK_NCCL(func) \ @@ -276,7 +276,7 @@ static const char* _calGetErrorString(calError_t error) exit(EXIT_FAILURE); \ } \ } while (0) -#endif // __USE_LIBCAL +#endif // __USE_CAL #endif // __CUSOLVERMP diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu index a36705bfa3..c53139897f 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cu @@ -15,9 +15,9 @@ extern "C" #include "source_base/module_device/device.h" #include "source_base/module_device/device_check.h" -#ifdef __USE_LIBCAL +#ifdef __USE_CAL // ============================================================================ -// libcal callback functions for MPI communication +// CAL callback functions for MPI communication // ============================================================================ static calError_t allgather(void* src_buf, void* recv_buf, size_t size, void* data, void** request) @@ -50,7 +50,7 @@ static calError_t request_free(void* request) { return CAL_OK; } -#endif // __USE_LIBCAL +#endif // __USE_CAL template Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, @@ -81,8 +81,8 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, int local_device_id = base_device::DeviceContext::instance().get_device_id(); Cblacs_gridinfo(this->cblacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); -#ifdef __USE_LIBCAL - // Initialize libcal communicator +#ifdef __USE_CAL + // Initialize CAL communicator cal_comm_create_params_t params; params.allgather = allgather; params.req_test = request_test; @@ -133,7 +133,7 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, // Use ROW_MAJOR to match BLACS grid initialization (order='R' in parallel_2d.cpp) CHECK_CUSOLVER(cusolverMpCreateDeviceGrid(cusolverMpHandle, &this->grid, -#ifdef __USE_LIBCAL +#ifdef __USE_CAL this->cusolverCalComm, #else this->ncclComm, @@ -161,7 +161,7 @@ Diag_CusolverMP_gvd::Diag_CusolverMP_gvd(const MPI_Comm mpi_comm, template Diag_CusolverMP_gvd::~Diag_CusolverMP_gvd() { -#ifdef __USE_LIBCAL +#ifdef __USE_CAL CHECK_CAL(cal_comm_barrier(this->cusolverCalComm, this->localStream)); CHECK_CAL(cal_stream_sync(this->cusolverCalComm, this->localStream)); CHECK_CUSOLVER(cusolverMpDestroyMatrixDesc(this->desc_for_cusolvermp)); diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh index a62ee3ee62..0b89de792a 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh @@ -7,7 +7,7 @@ #include #include "source_base/macros.h" -#ifdef __USE_LIBCAL +#ifdef __USE_CAL #include #else #include @@ -58,7 +58,7 @@ class Diag_CusolverMP_gvd int globalMpiSize; cudaDataType_t datatype; -#ifdef __USE_LIBCAL +#ifdef __USE_CAL cal_comm_t cusolverCalComm = NULL; #else ncclComm_t ncclComm = NULL; diff --git a/toolchain/README.md b/toolchain/README.md index f59b14894f..e6b81038a4 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -307,7 +307,7 @@ cmake -B $BUILD_DIR \ # ... other options ``` -The module file will set `NVHPC_ROOT` automatically, and CMake will find cuSolverMP and NCCL(or libcal) from it. +The module file will set `NVHPC_ROOT` automatically, and CMake will find cuSolverMP and NCCL(or CAL) from it. **Option 2: Manual Installation** From 5566529fc20cfb6fba42275b831dbc2aa4c88931 Mon Sep 17 00:00:00 2001 From: dzzz2001 <153698752+dzzz2001@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:00:56 +0800 Subject: [PATCH 9/9] remove meaningless comment --- source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh index 0b89de792a..fe49677447 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolvermp.cuh @@ -73,5 +73,3 @@ class Diag_CusolverMP_gvd int64_t matrix_i; int64_t matrix_j; }; - -// 实现模板类的成员函数