From 2af2d47e1680f3ec44af96a0c81869c5fd7d9fa1 Mon Sep 17 00:00:00 2001 From: Chen Nuo <49788094+Cstandardlib@users.noreply.github.com> Date: Tue, 9 Dec 2025 19:54:49 +0800 Subject: [PATCH 1/2] Support different CUDA versions in one single cuda_compat.h --- .../source_base/module_device/cuda_compat.h | 34 +++++++++++++++++++ source/source_base/timer.cpp | 6 +--- .../kernels/cuda/diag_cusolver.cuh | 20 +++++------ 3 files changed, 43 insertions(+), 17 deletions(-) create mode 100644 source/source_base/module_device/cuda_compat.h diff --git a/source/source_base/module_device/cuda_compat.h b/source/source_base/module_device/cuda_compat.h new file mode 100644 index 0000000000..78c0f6420c --- /dev/null +++ b/source/source_base/module_device/cuda_compat.h @@ -0,0 +1,34 @@ +/** + * @file cuda_compat.h + * @brief Compatibility layer for CUDA and NVTX headers across different CUDA Toolkit versions. + * + * This header abstracts the differences in NVTX (NVIDIA Tools Extension) header locations + * between CUDA Toolkit versions. + * + * @note Depends on the CUDA_VERSION macro defined in . + * + */ + +#ifndef CUDA_COMPAT_H_ +#define CUDA_COMPAT_H_ + +#include // defines CUDA_VERSION + +// NVTX header for CUDA versions prior to 12.9 vs. 12.9+ +// This block ensures the correct NVTX header path is used based on CUDA_VERSION. +// - For CUDA Toolkit < 12.9, the legacy header "nvToolsExt.h" is included. +// - For CUDA Toolkit >= 12.9, the modern header "nvtx3/nvToolsExt.h" is included, +// and NVTX v2 is removed from 12.9. +// This allows NVTX profiling APIs (e.g. nvtxRangePush) to be used consistently +// across different CUDA versions. +// See: +// https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#id4 +#if defined(__CUDA) && defined(__USE_NVTX) +#if CUDA_VERSION < 12090 + #include "nvToolsExt.h" +#else + #include "nvtx3/nvToolsExt.h" +#endif +#endif + +#endif // CUDA_COMPAT_H_ diff --git a/source/source_base/timer.cpp b/source/source_base/timer.cpp index f55bd6776a..c8cd3bc6ac 100644 --- a/source/source_base/timer.cpp +++ b/source/source_base/timer.cpp @@ -15,11 +15,7 @@ #include "source_base/formatter.h" #if defined(__CUDA) && defined(__USE_NVTX) -#if CUDA_VERSION < 12090 -#include "nvToolsExt.h" -#else -#include "nvtx3/nvToolsExt.h" -#endif +#include "source_base/module_device/cuda_compat.h" #include "source_io/module_parameter/parameter.h" #endif diff --git a/source/source_hsolver/kernels/cuda/diag_cusolver.cuh b/source/source_hsolver/kernels/cuda/diag_cusolver.cuh index faf4ec0a09..3a9a19e4e8 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolver.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolver.cuh @@ -3,11 +3,7 @@ #include #include -#if CUDA_VERSION < 12090 -#include "nvToolsExt.h" -#else -#include "nvtx3/nvToolsExt.h" -#endif +// #include "source_base/module_device/cuda_compat.h" #include #include @@ -39,7 +35,7 @@ class Diag_Cusolver_gvd{ double *d_A = nullptr; double *d_B = nullptr; double *d_work = nullptr; - + cuDoubleComplex *d_A2 = nullptr; cuDoubleComplex *d_B2 = nullptr; cuDoubleComplex *d_work2 = nullptr; @@ -54,7 +50,7 @@ class Diag_Cusolver_gvd{ // - init_double : initializing relevant double type data structures and gpu apis' handle and memory // - init_complex : initializing relevant complex type data structures and gpu apis' handle and memory // Input Parameters -// N: the dimension of the matrix +// N: the dimension of the matrix void init_double(int N); void init_complex(int N); @@ -70,17 +66,17 @@ public: // - Dngvd_double : dense double type matrix // - Dngvd_complex : dense complex type matrix // Input Parameters -// N: the number of rows of the matrix -// M: the number of cols of the matrix -// A: the hermitian matrix A in A x=lambda B (column major) -// B: the SPD matrix B in A x=lambda B (column major) +// N: the number of rows of the matrix +// M: the number of cols of the matrix +// A: the hermitian matrix A in A x=lambda B (column major) +// B: the SPD matrix B in A x=lambda B (column major) // Output Parameter // W: generalized eigenvalues // V: generalized eigenvectors (column major) void Dngvd_double(int N, int M, double *A, double *B, double *W, double *V); void Dngvd_complex(int N, int M, std::complex *A, std::complex *B, double *W, std::complex *V); - + void Dngvd(int N, int M, double *A, double *B, double *W, double *V) { return Dngvd_double(N, M, A, B, W, V); From 0f2278415b0fc43e3092c8c4f7253687e00fc931 Mon Sep 17 00:00:00 2001 From: Chen Nuo <49788094+Cstandardlib@users.noreply.github.com> Date: Tue, 9 Dec 2025 23:54:58 +0800 Subject: [PATCH 2/2] Remove useless nvtx header --- source/source_hsolver/kernels/cuda/diag_cusolver.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/source_hsolver/kernels/cuda/diag_cusolver.cuh b/source/source_hsolver/kernels/cuda/diag_cusolver.cuh index 3a9a19e4e8..e47d43be50 100644 --- a/source/source_hsolver/kernels/cuda/diag_cusolver.cuh +++ b/source/source_hsolver/kernels/cuda/diag_cusolver.cuh @@ -3,8 +3,6 @@ #include #include -// #include "source_base/module_device/cuda_compat.h" - #include #include