Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions source/source_base/module_device/cuda_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* @file cuda_compat.h
* @brief Compatibility layer for CUDA and NVTX headers across different CUDA Toolkit versions.
*
* This header abstracts the differences in NVTX (NVIDIA Tools Extension) header locations
* between CUDA Toolkit versions.
*
* @note Depends on the CUDA_VERSION macro defined in <cuda.h>.
*
*/

#ifndef CUDA_COMPAT_H_
#define CUDA_COMPAT_H_

#include <cuda.h> // defines CUDA_VERSION

// NVTX header for CUDA versions prior to 12.9 vs. 12.9+
// This block ensures the correct NVTX header path is used based on CUDA_VERSION.
// - For CUDA Toolkit < 12.9, the legacy header "nvToolsExt.h" is included.
// - For CUDA Toolkit >= 12.9, the modern header "nvtx3/nvToolsExt.h" is included,
// and NVTX v2 is removed from 12.9.
// This allows NVTX profiling APIs (e.g. nvtxRangePush) to be used consistently
// across different CUDA versions.
// See:
// https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#id4
#if defined(__CUDA) && defined(__USE_NVTX)
#if CUDA_VERSION < 12090
#include "nvToolsExt.h"
#else
#include "nvtx3/nvToolsExt.h"
#endif
#endif

#endif // CUDA_COMPAT_H_
6 changes: 1 addition & 5 deletions source/source_base/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@
#include "source_base/formatter.h"

#if defined(__CUDA) && defined(__USE_NVTX)
#if CUDA_VERSION < 12090
#include "nvToolsExt.h"
#else
#include "nvtx3/nvToolsExt.h"
#endif
#include "source_base/module_device/cuda_compat.h"
#include "source_io/module_parameter/parameter.h"
#endif

Expand Down
20 changes: 7 additions & 13 deletions source/source_hsolver/kernels/cuda/diag_cusolver.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@
#include <cuda.h>
#include <complex>

#if CUDA_VERSION < 12090
#include "nvToolsExt.h"
#else
#include "nvtx3/nvToolsExt.h"
#endif

#include <cuda_runtime.h>
#include <cusolverDn.h>

Expand Down Expand Up @@ -39,7 +33,7 @@ class Diag_Cusolver_gvd{
double *d_A = nullptr;
double *d_B = nullptr;
double *d_work = nullptr;

cuDoubleComplex *d_A2 = nullptr;
cuDoubleComplex *d_B2 = nullptr;
cuDoubleComplex *d_work2 = nullptr;
Expand All @@ -54,7 +48,7 @@ class Diag_Cusolver_gvd{
// - init_double : initializing relevant double type data structures and gpu apis' handle and memory
// - init_complex : initializing relevant complex type data structures and gpu apis' handle and memory
// Input Parameters
// N: the dimension of the matrix
// N: the dimension of the matrix
void init_double(int N);
void init_complex(int N);

Expand All @@ -70,17 +64,17 @@ public:
// - Dngvd_double : dense double type matrix
// - Dngvd_complex : dense complex type matrix
// Input Parameters
// N: the number of rows of the matrix
// M: the number of cols of the matrix
// A: the hermitian matrix A in A x=lambda B (column major)
// B: the SPD matrix B in A x=lambda B (column major)
// N: the number of rows of the matrix
// M: the number of cols of the matrix
// A: the hermitian matrix A in A x=lambda B (column major)
// B: the SPD matrix B in A x=lambda B (column major)
// Output Parameter
// W: generalized eigenvalues
// V: generalized eigenvectors (column major)

void Dngvd_double(int N, int M, double *A, double *B, double *W, double *V);
void Dngvd_complex(int N, int M, std::complex<double> *A, std::complex<double> *B, double *W, std::complex<double> *V);

void Dngvd(int N, int M, double *A, double *B, double *W, double *V)
{
return Dngvd_double(N, M, A, B, W, V);
Expand Down
Loading