From 749ae819013732ca47a5a9d751f4792574f836a6 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 12 Dec 2023 10:01:10 -0500 Subject: [PATCH 1/5] Only pick between host and device in handlers. --- resolve/matrix/MatrixHandler.cpp | 29 ++++++++++++------------ resolve/matrix/MatrixHandler.hpp | 8 +++---- resolve/vector/VectorHandler.cpp | 38 ++++++++++++++++---------------- resolve/vector/VectorHandler.hpp | 5 ++--- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index e053ee4d2..40e258b53 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -43,8 +43,9 @@ namespace ReSolve { MatrixHandler::~MatrixHandler() { delete cpuImpl_; - if (isCudaEnabled_) delete cudaImpl_; - if (isHipEnabled_) delete hipImpl_; + if (isCudaEnabled_ || isHipEnabled_) { + delete devImpl_; + } } /** @@ -71,8 +72,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - cudaImpl_ = new MatrixHandlerCuda(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerCuda(new_workspace); isCpuEnabled_ = true; isCudaEnabled_ = true; } @@ -89,8 +90,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - hipImpl_ = new MatrixHandlerHip(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerHip(new_workspace); isCpuEnabled_ = true; isHipEnabled_ = true; } @@ -100,9 +101,9 @@ namespace ReSolve { if (memspace == "cpu") { cpuImpl_->setValuesChanged(isValuesChanged); } else if (memspace == "cuda") { - cudaImpl_->setValuesChanged(isValuesChanged); + devImpl_->setValuesChanged(isValuesChanged); } else if (memspace == "hip") { - hipImpl_->setValuesChanged(isValuesChanged); + devImpl_->setValuesChanged(isValuesChanged); } else { out::error() << "Unsupported device " << memspace << "\n"; } @@ -291,11 +292,11 @@ namespace ReSolve { std::string memspace) { if (memspace == "cuda" ) { - return cudaImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + return devImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); } else if (memspace == "cpu") { return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); } else if (memspace == "hip") { - return hipImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + return devImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); } else { out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; return 1; @@ -305,11 +306,11 @@ namespace ReSolve { int MatrixHandler::matrixInfNorm(matrix::Sparse *A, real_type* norm, std::string memspace) { if (memspace == "cuda" ) { - return cudaImpl_->matrixInfNorm(A, norm); + return devImpl_->matrixInfNorm(A, norm); } else if (memspace == "cpu") { return cpuImpl_->matrixInfNorm(A, norm); } else if (memspace == "hip") { - return hipImpl_->matrixInfNorm(A, norm); + return devImpl_->matrixInfNorm(A, norm); } else { out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; return 1; @@ -320,9 +321,9 @@ namespace ReSolve { int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) { if (memspace == "cuda") { - return cudaImpl_->csc2csr(A_csc, A_csr); + return devImpl_->csc2csr(A_csc, A_csr); } else if (memspace == "hip") { - return hipImpl_->csc2csr(A_csc, A_csr); + return devImpl_->csc2csr(A_csc, A_csr); } else if (memspace == "cpu") { out::warning() << "Using untested csc2csr on CPU ..." << std::endl; return cpuImpl_->csc2csr(A_csc, A_csr); diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 74a994b78..bfdfeb318 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -69,14 +69,12 @@ namespace ReSolve { private: bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - MemoryHandler mem_; ///< Device memory manager object - MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to CPU implementation - MatrixHandlerImpl* cudaImpl_{nullptr}; ///< Pointer to CUDA implementation - MatrixHandlerImpl* hipImpl_{nullptr}; ///< Pointer to HIP implementation + MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to host implementation + MatrixHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated - bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated + bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated }; } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index d9b6c08e4..efc95d4f7 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -46,7 +46,7 @@ namespace ReSolve { */ VectorHandler::VectorHandler(LinAlgWorkspaceCUDA* new_workspace) { - cudaImpl_ = new VectorHandlerCuda(new_workspace); + devImpl_ = new VectorHandlerCuda(new_workspace); cpuImpl_ = new VectorHandlerCpu(); isCudaEnabled_ = true; @@ -61,7 +61,7 @@ namespace ReSolve { */ VectorHandler::VectorHandler(LinAlgWorkspaceHIP* new_workspace) { - hipImpl_ = new VectorHandlerHip(new_workspace); + devImpl_ = new VectorHandlerHip(new_workspace); cpuImpl_ = new VectorHandlerCpu(); isHipEnabled_ = true; @@ -75,9 +75,9 @@ namespace ReSolve { VectorHandler::~VectorHandler() { delete cpuImpl_; - if (isCudaEnabled_) delete cudaImpl_; - if (isHipEnabled_) delete hipImpl_; - //delete the workspace TODO + if (isCudaEnabled_ || isHipEnabled_) { + delete devImpl_; + } } /** @@ -93,10 +93,10 @@ namespace ReSolve { real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, std::string memspace) { if (memspace == "cuda" ) { - return cudaImpl_->dot(x, y); + return devImpl_->dot(x, y); } else { if (memspace == "hip") { - return hipImpl_->dot(x, y); + return devImpl_->dot(x, y); } else if (memspace == "cpu") { return cpuImpl_->dot(x, y); } else { @@ -117,9 +117,9 @@ namespace ReSolve { void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace) { if (memspace == "cuda" ) { - cudaImpl_->scal(alpha, x); + devImpl_->scal(alpha, x); } else if (memspace == "hip") { - hipImpl_->scal(alpha, x); + devImpl_->scal(alpha, x); } else { if (memspace == "cpu") { cpuImpl_->scal(alpha, x); @@ -141,9 +141,9 @@ namespace ReSolve { real_type VectorHandler::infNorm(vector::Vector* x, std::string memspace) { if (memspace == "cuda" ) { - return cudaImpl_->infNorm(x); + return devImpl_->infNorm(x); } else if (memspace == "hip") { - return hipImpl_->infNorm(x); + return devImpl_->infNorm(x); } else { if (memspace == "cpu") { return cpuImpl_->infNorm(x); @@ -166,10 +166,10 @@ namespace ReSolve { { //AXPY: y = alpha * x + y if (memspace == "cuda" ) { - cudaImpl_->axpy(alpha, x, y); + devImpl_->axpy(alpha, x, y); } else { if (memspace == "hip" ) { - hipImpl_->axpy(alpha, x, y); + devImpl_->axpy(alpha, x, y); } else { if (memspace == "cpu") { cpuImpl_->axpy(alpha, x, y); @@ -200,9 +200,9 @@ namespace ReSolve { void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace) { if (memspace == "cuda") { - cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); } else if (memspace == "hip") { - hipImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); } else if (memspace == "cpu") { cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); } else { @@ -226,9 +226,9 @@ namespace ReSolve { { using namespace constants; if (memspace == "cuda") { - cudaImpl_->massAxpy(size, alpha, k, x, y); + devImpl_->massAxpy(size, alpha, k, x, y); } else if (memspace == "hip") { - hipImpl_->massAxpy(size, alpha, k, x, y); + devImpl_->massAxpy(size, alpha, k, x, y); } else if (memspace == "cpu") { cpuImpl_->massAxpy(size, alpha, k, x, y); } else { @@ -253,9 +253,9 @@ namespace ReSolve { void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace) { if (memspace == "cuda") { - cudaImpl_->massDot2Vec(size, V, k, x, res); + devImpl_->massDot2Vec(size, V, k, x, res); } else if (memspace == "hip") { - hipImpl_->massDot2Vec(size, V, k, x, res); + devImpl_->massDot2Vec(size, V, k, x, res); } else if (memspace == "cpu") { cpuImpl_->massDot2Vec(size, V, k, x, res); } else { diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index f07165f7c..c04e79261 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -62,9 +62,8 @@ namespace ReSolve { //namespace vector { private: - VectorHandlerImpl* cpuImpl_{nullptr}; - VectorHandlerImpl* cudaImpl_{nullptr}; - VectorHandlerImpl* hipImpl_{nullptr}; + VectorHandlerImpl* cpuImpl_{nullptr}; + VectorHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation bool isCpuEnabled_{false}; bool isCudaEnabled_{false}; From f3a2caa533020ce995495eb884797ac0b688b994 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 12 Dec 2023 13:46:57 -0500 Subject: [PATCH 2/5] Use HOST/DEVICE in handler calls instead of cpu/cuda/hip. --- examples/r_KLU_GLU.cpp | 14 +- examples/r_KLU_GLU_matrix_values_update.cpp | 6 +- examples/r_KLU_KLU.cpp | 12 +- examples/r_KLU_KLU_standalone.cpp | 6 +- .../r_KLU_cusolverrf_redo_factorization.cpp | 26 +-- examples/r_KLU_rf.cpp | 10 +- examples/r_KLU_rf_FGMRES.cpp | 36 ++-- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 26 +-- examples/r_KLU_rocSolverRf_FGMRES.cpp | 24 +-- examples/r_KLU_rocsolverrf.cpp | 14 +- .../r_KLU_rocsolverrf_redo_factorization.cpp | 14 +- examples/r_SysSolver.cpp | 6 +- examples/r_randGMRES.cpp | 6 +- examples/r_randGMRES_CUDA.cpp | 8 +- resolve/GramSchmidt.cpp | 4 +- resolve/GramSchmidt.hpp | 2 +- resolve/LinSolverIterativeFGMRES.cpp | 40 +++-- resolve/LinSolverIterativeFGMRES.hpp | 2 +- resolve/LinSolverIterativeRandFGMRES.cpp | 27 ++- resolve/LinSolverIterativeRandFGMRES.hpp | 2 +- resolve/SystemSolver.cpp | 29 +-- resolve/matrix/MatrixHandler.cpp | 95 +++++----- resolve/matrix/MatrixHandler.hpp | 10 +- resolve/vector/VectorHandler.cpp | 170 +++++++++--------- resolve/vector/VectorHandler.hpp | 15 +- tests/functionality/testKLU.cpp | 38 ++-- tests/functionality/testKLU_GLU.cpp | 38 ++-- tests/functionality/testKLU_Rf.cpp | 42 ++--- tests/functionality/testKLU_Rf_FGMRES.cpp | 42 ++--- tests/functionality/testKLU_RocSolver.cpp | 40 ++--- .../testKLU_RocSolver_FGMRES.cpp | 38 ++-- tests/functionality/testRandGMRES_Cuda.cpp | 6 +- tests/functionality/testRandGMRES_Rocm.cpp | 6 +- tests/functionality/testSysCuda.cpp | 38 ++-- tests/functionality/testSysHipRefine.cpp | 38 ++-- tests/unit/matrix/MatrixHandlerTests.hpp | 11 +- tests/unit/vector/GramSchmidtTests.hpp | 10 +- tests/unit/vector/VectorHandlerTests.hpp | 16 +- 38 files changed, 497 insertions(+), 470 deletions(-) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index 864b5f835..a050e3245 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -141,14 +141,14 @@ int main(int argc, char *argv[]) // Estimate solution error vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); - matrix_handler->matrixInfNorm(A, &norm_A, "cuda"); - norm_x = vector_handler->infNorm(vec_x, "cuda"); - norm_r = vector_handler->infNorm(vec_r, "cuda"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" << "\t Residual inf norm: " << norm_r <<"\n" << "\t Solution inf norm: " << norm_x <<"\n" @@ -156,7 +156,7 @@ int main(int argc, char *argv[]) std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/bnorm << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/bnorm << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index b4f003c6e..1fe217d0c 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -153,12 +153,12 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) << "\n"; } diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index d5824c391..3c34a14d4 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -134,16 +134,16 @@ int main(int argc, char *argv[]) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - matrix_handler->setValuesChanged(true, "cpu"); + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); - norm_r = vector_handler->infNorm(vec_r, "cpu"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::HOST); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::HOST); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; - matrix_handler->matrixInfNorm(A, &norm_A, "cpu"); - norm_x = vector_handler->infNorm(vec_x, "cpu"); + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::HOST); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::HOST); std::cout << "\tMatrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" << "\tResidual inf norm: " << norm_r <<"\n" << "\tSolution inf norm: " << norm_x <<"\n" diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index a2aa6d81a..3b4497f81 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -97,13 +97,13 @@ int main(int argc, char *argv[]) std::cout << "KLU solve status: " << status << std::endl; vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - matrix_handler->setValuesChanged(true, "cpu"); + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::HOST); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; diff --git a/examples/r_KLU_cusolverrf_redo_factorization.cpp b/examples/r_KLU_cusolverrf_redo_factorization.cpp index 1900dfefd..3f53a0204 100644 --- a/examples/r_KLU_cusolverrf_redo_factorization.cpp +++ b/examples/r_KLU_cusolverrf_redo_factorization.cpp @@ -116,11 +116,11 @@ int main(int argc, char *argv[] ) //Now convert to CSR. if (i < 2) { - matrix_handler->coo2csr(A_coo, A, "cpu"); + matrix_handler->coo2csr(A_coo, A, ReSolve::memory::HOST); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { - matrix_handler->coo2csr(A_coo, A, "cuda"); + matrix_handler->coo2csr(A_coo, A, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); + matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) { std::cout << "ERROR\n"; } @@ -159,11 +159,11 @@ int main(int argc, char *argv[] ) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) << res_nrm/b_nrm << "\n"; @@ -184,10 +184,10 @@ int main(int argc, char *argv[] ) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout <<"\t New residual norm: " << std::scientific << std::setprecision(16) @@ -199,8 +199,8 @@ int main(int argc, char *argv[] ) ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc, L, "cuda"); - matrix_handler->csc2csr(U_csc, U, "cuda"); + matrix_handler->csc2csr(L_csc, L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc, U, ReSolve::memory::DEVICE); P = KLU->getPOrdering(); Q = KLU->getQOrdering(); diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index 1dbd4413d..376e4be21 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -129,8 +129,8 @@ int main(int argc, char *argv[] ) ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); + matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) {printf("ERROR");} index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); @@ -152,13 +152,13 @@ int main(int argc, char *argv[] ) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 6f7c1fa42..683c07865 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -124,7 +124,7 @@ int main(int argc, char *argv[]) real_type norm_b; if (i < 2){ KLU->setup(A); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); @@ -132,14 +132,14 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); - matrix_handler->matrixInfNorm(A, &norm_A, "cuda"); - norm_x = vector_handler->infNorm(vec_x, "cuda"); - norm_r = vector_handler->infNorm(vec_r, "cuda"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" << "\t Residual inf norm: " << norm_r <<"\n" << "\t Solution inf norm: " << norm_x <<"\n" @@ -147,14 +147,14 @@ int main(int argc, char *argv[]) std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; if (i == 1) { ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); + matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) { std::cout << "ERROR\n"; } @@ -174,18 +174,18 @@ int main(int argc, char *argv[]) std::cout<<"CUSOLVER RF solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - //matrix_handler->setValuesChanged(true, "cuda"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); - matrix_handler->matrixInfNorm(A, &norm_A, "cuda"); - norm_x = vector_handler->infNorm(vec_x, "cuda"); - norm_r = vector_handler->infNorm(vec_r, "cuda"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" << "\t Residual inf norm: " << norm_r <<"\n" << "\t Solution inf norm: " << norm_x <<"\n" @@ -193,9 +193,9 @@ int main(int argc, char *argv[]) std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; - matrix_handler->matrixInfNorm(A, &norm_A, "cuda"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); if(!std::isnan(norm_r) && !std::isinf(norm_r)) { diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index 4610be4a0..f37151215 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) real_type norm_b; if (i < 2){ KLU->setup(A); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); @@ -133,21 +133,21 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual : " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; if (i == 1) { ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); + matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) { std::cout << "ERROR\n"; } @@ -173,7 +173,7 @@ int main(int argc, char *argv[]) FGMRES->setupPreconditioner("LU", Rf); } //if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); - real_type norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); + real_type norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (before solve): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; @@ -181,17 +181,17 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); FGMRES->resetMatrix(A); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " << std::scientific << std::setprecision(16) << norm_b << "\n"; @@ -205,7 +205,7 @@ int main(int argc, char *argv[]) << " final nrm: " << FGMRES->getFinalResidualNorm()/norm_b << " iter: " << FGMRES->getNumIter() << "\n"; - norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); + norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (after IR): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index 66bf81d33..56ae49eff 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) real_type norm_b; if (i < 2){ KLU->setup(A); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); @@ -131,13 +131,13 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "hip"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "hip"))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; if (i == 1) { ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); @@ -161,22 +161,22 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); std::cout<<"ROCSOLVER RF solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - norm_b = vector_handler->dot(vec_r, vec_r, "hip"); + norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); - //matrix_handler->setValuesChanged(true, "hip"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); - real_type rnrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); + real_type rnrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) << rnrm/norm_b << "\n"; - matrix_handler->matrixInfNorm(A, &norm_A, "hip"); - norm_x = vector_handler->infNorm(vec_x, "hip"); - norm_r = vector_handler->infNorm(vec_r, "hip"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" << "\t Residual inf norm: " << norm_r <<"\n" << "\t Solution inf norm: " << norm_x <<"\n" diff --git a/examples/r_KLU_rocsolverrf.cpp b/examples/r_KLU_rocsolverrf.cpp index c5122d089..0d9b97a24 100644 --- a/examples/r_KLU_rocsolverrf.cpp +++ b/examples/r_KLU_rocsolverrf.cpp @@ -142,17 +142,17 @@ int main(int argc, char *argv[] ) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); + real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "hip"))/bnorm << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/bnorm << "\n"; - matrix_handler->matrixInfNorm(A, &norm_A, "hip"); - norm_x = vector_handler->infNorm(vec_x, "hip"); - norm_r = vector_handler->infNorm(vec_r, "hip"); + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A <<"\n" << "\t Residual inf norm: " << norm_r <<"\n" << "\t Solution inf norm: " << norm_x <<"\n" diff --git a/examples/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/r_KLU_rocsolverrf_redo_factorization.cpp index 54f28e8fe..e0bedc19d 100644 --- a/examples/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/r_KLU_rocsolverrf_redo_factorization.cpp @@ -144,11 +144,11 @@ int main(int argc, char *argv[] ) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << res_nrm/b_nrm << "\n"; @@ -167,10 +167,10 @@ int main(int argc, char *argv[] ) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"\t New residual norm: " << std::scientific << std::setprecision(16) diff --git a/examples/r_SysSolver.cpp b/examples/r_SysSolver.cpp index bc1703513..8ba8e05f7 100644 --- a/examples/r_SysSolver.cpp +++ b/examples/r_SysSolver.cpp @@ -134,13 +134,13 @@ int main(int argc, char *argv[]) } vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - matrix_handler->setValuesChanged(true, "cpu"); + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::HOST); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; } //now DELETE diff --git a/examples/r_randGMRES.cpp b/examples/r_randGMRES.cpp index a491ca2d1..c9ad01a5e 100644 --- a/examples/r_randGMRES.cpp +++ b/examples/r_randGMRES.cpp @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); //Now call direct solver real_type norm_b; - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); Rf->setup(A); FGMRES->setRestart(150); @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) FGMRES->setup(A); GS->setup(FGMRES->getKrand(), FGMRES->getRestart()); - //matrix_handler->setValuesChanged(true, "hip"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); FGMRES->setFlexible(1); @@ -103,7 +103,7 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); - norm_b = vector_handler->dot(vec_rhs, vec_rhs, "hip"); + norm_b = vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) diff --git a/examples/r_randGMRES_CUDA.cpp b/examples/r_randGMRES_CUDA.cpp index 9e4513957..3fc6814b1 100644 --- a/examples/r_randGMRES_CUDA.cpp +++ b/examples/r_randGMRES_CUDA.cpp @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); ReSolve::LinSolverDirectCuSparseILU0* Rf = new ReSolve::LinSolverDirectCuSparseILU0(workspace_CUDA); - ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler,ReSolve::LinSolverIterativeRandFGMRES::fwht, GS, "cuda"); + ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler, ReSolve::LinSolverIterativeRandFGMRES::fwht, GS, "cuda"); std::cout << std::endl << std::endl << std::endl; std::cout << "========================================================================================================================"<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); //Now call direct solver real_type norm_b; - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); Rf->setup(A, nullptr, nullptr, nullptr, nullptr, vec_rhs); FGMRES->setRestart(800); @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) FGMRES->setup(A); GS->setup(FGMRES->getKrand(), FGMRES->getRestart()); - //matrix_handler->setValuesChanged(true, "cuda"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); FGMRES->setFlexible(1); @@ -103,7 +103,7 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); - norm_b = vector_handler->dot(vec_rhs, vec_rhs, "cuda"); + norm_b = vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 8385d9d39..79a9fa567 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -111,11 +111,11 @@ namespace ReSolve } //this always happen on the GPU - int GramSchmidt::orthogonalize(index_type n, vector::Vector* V, real_type* H, index_type i, std::string memspace) + int GramSchmidt::orthogonalize(index_type n, vector::Vector* V, real_type* H, index_type i, memory::MemorySpace memspace) { using namespace constants; - if ((memspace == "cuda") || (memspace == "hip")) { // or hip + if (memspace == memory::DEVICE) { double t; double s; diff --git a/resolve/GramSchmidt.hpp b/resolve/GramSchmidt.hpp index 68ea8b09b..841d63af1 100644 --- a/resolve/GramSchmidt.hpp +++ b/resolve/GramSchmidt.hpp @@ -24,7 +24,7 @@ namespace ReSolve real_type* getL(); //only for low synch, returns null ptr otherwise int setup(index_type n, index_type restart); - int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i, std::string memspace); + int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i, memory::MemorySpace memspace); bool isSetupComplete(); private: diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 12e97a16b..901e285d6 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -13,16 +13,16 @@ namespace ReSolve LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } + this->matrix_handler_ = nullptr; this->vector_handler_ = nullptr; - // Defaults: - // tol_ = 1e-14; - // maxit_= 100; - // restart_ = 10; - // conv_cond_ = 0; - // flexible_ = true; - d_V_ = nullptr; d_Z_ = nullptr; } @@ -32,18 +32,18 @@ namespace ReSolve GramSchmidt* gs, std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } + this->matrix_handler_ = matrix_handler; this->vector_handler_ = vector_handler; this->GS_ = gs; - // Defaults: - // tol_ = 1e-14; - // maxit_= 100; - // restart_ = 10; - // conv_cond_ = 0; - // flexible_ = true; - d_V_ = nullptr; d_Z_ = nullptr; } @@ -57,7 +57,13 @@ namespace ReSolve GramSchmidt* gs, std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } this->matrix_handler_ = matrix_handler; this->vector_handler_ = vector_handler; this->GS_ = gs; diff --git a/resolve/LinSolverIterativeFGMRES.hpp b/resolve/LinSolverIterativeFGMRES.hpp index d018e6ec0..04dcac712 100644 --- a/resolve/LinSolverIterativeFGMRES.hpp +++ b/resolve/LinSolverIterativeFGMRES.hpp @@ -37,7 +37,7 @@ namespace ReSolve private: //remember matrix handler and vector handler are inherited. - std::string memspace_; + memory::MemorySpace memspace_; std::string orth_option_; vector_type* d_V_{nullptr}; diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 24d3790d4..6b7b3b1e6 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -15,7 +15,14 @@ namespace ReSolve LinSolverIterativeRandFGMRES::LinSolverIterativeRandFGMRES(std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } + this->matrix_handler_ = nullptr; this->vector_handler_ = nullptr; this->rand_method_ = cs; @@ -36,7 +43,14 @@ namespace ReSolve GramSchmidt* gs, std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } + this->matrix_handler_ = matrix_handler; this->vector_handler_ = vector_handler; this->rand_method_ = rand_method; @@ -62,7 +76,14 @@ namespace ReSolve GramSchmidt* gs, std::string memspace) { - memspace_ = memspace; + if (memspace == "cpu") { + memspace_ = memory::HOST; + } else if ((memspace == "cuda") || (memspace == "hip")) { + memspace_ = memory::DEVICE; + } else { + out::error() << "Unrecognized device " << memspace << "\n"; + } + this->matrix_handler_ = matrix_handler; this->vector_handler_ = vector_handler; this->rand_method_ = rand_method; diff --git a/resolve/LinSolverIterativeRandFGMRES.hpp b/resolve/LinSolverIterativeRandFGMRES.hpp index d695021a3..62601cb0a 100644 --- a/resolve/LinSolverIterativeRandFGMRES.hpp +++ b/resolve/LinSolverIterativeRandFGMRES.hpp @@ -60,7 +60,7 @@ namespace ReSolve private: //remember matrix handler and vector handler are inherited. - std::string memspace_; + memory::MemorySpace memspace_; real_type tol_; index_type maxit_; diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 59d160bbd..e95f2ec8a 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -69,10 +69,10 @@ namespace ReSolve #ifdef RESOLVE_USE_HIP SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, - std::string factor, - std::string refactor, - std::string solve, - std::string ir) + std::string factor, + std::string refactor, + std::string solve, + std::string ir) : workspaceHip_(workspaceHip), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -409,11 +409,15 @@ namespace ReSolve { using namespace ReSolve::constants; assert(rhs->getSize() == resVector_->getSize()); - real_type norm_b = 0.0; + real_type norm_b = 0.0; + real_type resnorm = 0.0; if (memspace_ == "cpu") { resVector_->update(rhs, memory::HOST, memory::HOST); - matrixHandler_->setValuesChanged(true, "cpu"); + matrixHandler_->setValuesChanged(true, memory::HOST); + norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); + matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUSONE, "csr", memory::HOST); + resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); #ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { if (isSolveOnDevice_) { @@ -421,7 +425,10 @@ namespace ReSolve } else { resVector_->update(rhs, memory::HOST, memory::DEVICE); } - matrixHandler_->setValuesChanged(true, "cuda"); + matrixHandler_->setValuesChanged(true, memory::DEVICE); + norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); + matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUSONE, "csr", memory::DEVICE); + resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); #endif #ifdef RESOLVE_USE_HIP } else if (memspace_ == "hip") { @@ -430,15 +437,15 @@ namespace ReSolve } else { resVector_->update(rhs, memory::HOST, memory::DEVICE); } - matrixHandler_->setValuesChanged(true, "hip"); + matrixHandler_->setValuesChanged(true, memory::DEVICE); + norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); + matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUSONE, "csr", memory::DEVICE); + resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); #endif } else { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } - norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memspace_)); - matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUSONE, "csr", memspace_); - real_type resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memspace_)); return resnorm/norm_b; } diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 40e258b53..b6e4f709f 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -92,20 +92,20 @@ namespace ReSolve { { cpuImpl_ = new MatrixHandlerCpu(); devImpl_ = new MatrixHandlerHip(new_workspace); - isCpuEnabled_ = true; + isCpuEnabled_ = true; isHipEnabled_ = true; } #endif - void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) + void MatrixHandler::setValuesChanged(bool isValuesChanged, memory::MemorySpace memspace) { - if (memspace == "cpu") { - cpuImpl_->setValuesChanged(isValuesChanged); - } else if (memspace == "cuda") { - devImpl_->setValuesChanged(isValuesChanged); - } else if (memspace == "hip") { - devImpl_->setValuesChanged(isValuesChanged); - } else { - out::error() << "Unsupported device " << memspace << "\n"; + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpuImpl_->setValuesChanged(isValuesChanged); + break; + case DEVICE: + devImpl_->setValuesChanged(isValuesChanged); + break; } } @@ -114,7 +114,7 @@ namespace ReSolve { * * Conversion takes place on CPU, and then CSR matrix is copied to `memspace`. */ - int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) + int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { //count nnzs first index_type nnz_unpacked = 0; @@ -249,17 +249,8 @@ namespace ReSolve { } #endif A_csr->setNnz(nnz_no_duplicates); - if (memspace == "cpu"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::HOST); - } else { - if (memspace == "cuda"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); - } else if (memspace == "hip"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); - } else { - //display error - } - } + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); + delete [] nnz_counts; delete [] tmp; delete [] nnz_shifts; @@ -289,48 +280,46 @@ namespace ReSolve { const real_type* alpha, const real_type* beta, std::string matrixFormat, - std::string memspace) + memory::MemorySpace memspace) { - if (memspace == "cuda" ) { - return devImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); - } else if (memspace == "cpu") { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); - } else if (memspace == "hip") { + break; + case DEVICE: return devImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); - } else { - out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; - return 1; + break; } + return 1; } - int MatrixHandler::matrixInfNorm(matrix::Sparse *A, real_type* norm, std::string memspace) { - - if (memspace == "cuda" ) { - return devImpl_->matrixInfNorm(A, norm); - } else if (memspace == "cpu") { - return cpuImpl_->matrixInfNorm(A, norm); - } else if (memspace == "hip") { - return devImpl_->matrixInfNorm(A, norm); - } else { - out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; - return 1; + int MatrixHandler::matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace) + { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + return cpuImpl_->matrixInfNorm(A, norm); + break; + case DEVICE: + return devImpl_->matrixInfNorm(A, norm); + break; } - + return 1; } - int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) + int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace) { - if (memspace == "cuda") { - return devImpl_->csc2csr(A_csc, A_csr); - } else if (memspace == "hip") { - return devImpl_->csc2csr(A_csc, A_csr); - } else if (memspace == "cpu") { - out::warning() << "Using untested csc2csr on CPU ..." << std::endl; - return cpuImpl_->csc2csr(A_csc, A_csr); - } else { - out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl; - return -1; + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + return cpuImpl_->csc2csr(A_csc, A_csr); + break; + case DEVICE: + return devImpl_->csc2csr(A_csc, A_csr); + break; } + return 1; } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index bfdfeb318..dc26f44a2 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -52,8 +52,8 @@ namespace ReSolve { MatrixHandler(LinAlgWorkspaceHIP* workspace); ~MatrixHandler(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); - int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace); + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace); /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped int matvec(matrix::Sparse* A, @@ -62,9 +62,9 @@ namespace ReSolve { const real_type* alpha, const real_type* beta, std::string matrix_type, - std::string memspace); - int matrixInfNorm(matrix::Sparse *A, real_type* norm, std::string memspace); - void setValuesChanged(bool toWhat, std::string memspace); + memory::MemorySpace memspace); + int matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace); + void setValuesChanged(bool toWhat, memory::MemorySpace memspace); private: bool new_matrix_{true}; ///< if the structure changed, you need a new handler. diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index efc95d4f7..9dcabd249 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -90,20 +90,18 @@ namespace ReSolve { * @return dot product (real number) of _x_ and _y_ */ - real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, std::string memspace) + real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { - if (memspace == "cuda" ) { - return devImpl_->dot(x, y); - } else { - if (memspace == "hip") { - return devImpl_->dot(x, y); - } else if (memspace == "cpu") { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: return cpuImpl_->dot(x, y); - } else { - out::error() << "Not implemented (yet)" << std::endl; - return NAN; - } + break; + case DEVICE: + return devImpl_->dot(x, y); + break; } + return NAN; } /** @@ -114,18 +112,16 @@ namespace ReSolve { * @param memspace[in] string containg memspace (cpu or cuda or hip) * */ - void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace) + void VectorHandler::scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace) { - if (memspace == "cuda" ) { - devImpl_->scal(alpha, x); - } else if (memspace == "hip") { - devImpl_->scal(alpha, x); - } else { - if (memspace == "cpu") { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: cpuImpl_->scal(alpha, x); - } else { - out::error() << "Not implemented (yet)" << std::endl; - } + break; + case DEVICE: + devImpl_->scal(alpha, x); + break; } } @@ -138,21 +134,20 @@ namespace ReSolve { * @return infinity norm (real number) of _x_ * */ - real_type VectorHandler::infNorm(vector::Vector* x, std::string memspace) + real_type VectorHandler::infNorm(vector::Vector* x, memory::MemorySpace memspace) { - if (memspace == "cuda" ) { - return devImpl_->infNorm(x); - } else if (memspace == "hip") { - return devImpl_->infNorm(x); - } else { - if (memspace == "cpu") { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: return cpuImpl_->infNorm(x); - } else { - out::error() << "Not implemented (yet)" << std::endl; - return -1.0; // note inf norm cannot be negative! - } + break; + case DEVICE: + return devImpl_->infNorm(x); + break; } + return -1.0; } + /** * @brief axpy i.e, y = alpha*x+y where alpha is a constant * @@ -162,21 +157,17 @@ namespace ReSolve { * @param[in] memspace String containg memspace (cpu or cuda or hip) * */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace) + void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { //AXPY: y = alpha * x + y - if (memspace == "cuda" ) { - devImpl_->axpy(alpha, x, y); - } else { - if (memspace == "hip" ) { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpuImpl_->axpy(alpha, x, y); + break; + case DEVICE: devImpl_->axpy(alpha, x, y); - } else { - if (memspace == "cpu") { - cpuImpl_->axpy(alpha, x, y); - } else { - out::error() <<"Not implemented (yet)" << std::endl; - } - } + break; } } @@ -185,28 +176,36 @@ namespace ReSolve { * i.e., x = beta*x + alpha*V*y * * @param[in] Transpose - yes (T) or no (N) - * @param[in] n Number of rows in (non-transposed) matrix - * @param[in] k Number of columns in (non-transposed) - * @param[in] alpha Constant real number - * @param[in] beta Constant real number - * @param[in] V Multivector containing the matrix, organized columnwise - * @param[in] y Vector, k x 1 if N and n x 1 if T - * @param[in,out] x Vector, n x 1 if N and k x 1 if T - * @param[in] memspace cpu or cuda or hip (for now) + * @param[in] n - Number of rows in (non-transposed) matrix + * @param[in] k - Number of columns in (non-transposed) + * @param[in] alpha - Constant real number + * @param[in] beta - Constant real number + * @param[in] V - Multivector containing the matrix, organized columnwise + * @param[in] y - Vector, k x 1 if N and n x 1 if T + * @param[in,out] x - Vector, n x 1 if N and k x 1 if T + * @param[in] memspace - cpu or cuda or hip (for now) * * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace) + void VectorHandler::gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, + memory::MemorySpace memspace) { - if (memspace == "cuda") { - devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else if (memspace == "hip") { - devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else if (memspace == "cpu") { - cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else { - out::error() << "Not implemented (yet)" << std::endl; + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + break; + case DEVICE: + devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + break; } } @@ -222,17 +221,16 @@ namespace ReSolve { * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandler::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, std::string memspace) + void VectorHandler::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { - using namespace constants; - if (memspace == "cuda") { - devImpl_->massAxpy(size, alpha, k, x, y); - } else if (memspace == "hip") { - devImpl_->massAxpy(size, alpha, k, x, y); - } else if (memspace == "cpu") { - cpuImpl_->massAxpy(size, alpha, k, x, y); - } else { - out::error() << "Not implemented (yet)" << std::endl; + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpuImpl_->massAxpy(size, alpha, k, x, y); + break; + case DEVICE: + devImpl_->massAxpy(size, alpha, k, x, y); + break; } } @@ -240,26 +238,26 @@ namespace ReSolve { * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) * - * @param[in] size Number of elements in a single vector in V - * @param[in] V Multivector; k vectors size n x 1 each - * @param[in] k Number of vectors in V - * @param[in] x Multivector; 2 vectors size n x 1 each - * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace String containg memspace (cpu or cuda or hip) + * @param[in] size - Number of elements in a single vector in V + * @param[in] V - Multivector; k vectors size n x 1 each + * @param[in] k - Number of vectors in V + * @param[in] x - Multivector; 2 vectors size n x 1 each + * @param[out] res - Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace - String containg memspace (cpu or cuda or hip) * * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace) + void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace) { - if (memspace == "cuda") { - devImpl_->massDot2Vec(size, V, k, x, res); - } else if (memspace == "hip") { - devImpl_->massDot2Vec(size, V, k, x, res); - } else if (memspace == "cpu") { - cpuImpl_->massDot2Vec(size, V, k, x, res); - } else { - out::error() << "Not implemented (yet)" << std::endl; + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpuImpl_->massDot2Vec(size, V, k, x, res); + break; + case DEVICE: + devImpl_->massDot2Vec(size, V, k, x, res); + break; } } diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index c04e79261..2299e662d 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include namespace ReSolve { @@ -24,20 +25,20 @@ namespace ReSolve { //namespace vector { ~VectorHandler(); //y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace); + void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); //dot: x \cdot y - real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace); + real_type dot(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); //scal = alpha * x - void scal(const real_type* alpha, vector::Vector* x, std::string memspace); + void scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace); //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, std::string memspace); + void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise //Size = n - void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace); + void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, @@ -53,12 +54,12 @@ namespace ReSolve { //namespace vector { vector::Vector* V, vector::Vector* y, vector::Vector* x, - std::string memspace); + memory::MemorySpace memspace); /** infNorm: * Returns infinity norm of a vector (i.e., entry with max abs value) */ - real_type infNorm(vector::Vector* x, std::string memspace); + real_type infNorm(vector::Vector* x, memory::MemorySpace memspace); private: diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 40b7b1404..99c6c8b2d 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -99,36 +99,36 @@ int main(int argc, char *argv[]) vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::HOST)); - matrix_handler->setValuesChanged(true, "cpu"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cpu"); + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cpu")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::HOST)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::HOST)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::HOST); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::HOST)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - matrix_handler->setValuesChanged(true, "cpu"); + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::HOST)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::HOST); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::HOST)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::HOST); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - error_sum += matrix_handler->csc2csr(L_csc,L, "cuda"); - error_sum += matrix_handler->csc2csr(U_csc,U, "cuda"); + error_sum += matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + error_sum += matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) { std::cout << "ERROR!\n"; } @@ -186,26 +186,26 @@ int main(int argc, char *argv[]) error_sum += status; vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate the residual ||b-Ax|| - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - error_sum += matrix_handler->csc2csr(L_csc,L, "cuda"); - error_sum += matrix_handler->csc2csr(U_csc,U, "cuda"); + error_sum += matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); + error_sum += matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); if (L == nullptr) { printf("ERROR"); @@ -216,28 +216,28 @@ int main(int argc, char *argv[]) error_sum += status; vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate final residual - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "hip")); - matrix_handler->setValuesChanged(true, "hip"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::DEVICE)); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "hip")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate the residual ||b-Ax|| - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "hip")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate final residual - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<setToZero(ReSolve::memory::DEVICE); real_type norm_b; - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = Rf->setup(A); error_sum += status; @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) status = GS->setup(FGMRES->getKrand(), FGMRES->getRestart()); error_sum += status; - //matrix_handler->setValuesChanged(true, "cuda"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = FGMRES->resetMatrix(A); error_sum += status; @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) FGMRES->solve(vec_rhs, vec_x); - norm_b = vector_handler->dot(vec_rhs, vec_rhs, "cuda"); + norm_b = vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE); norm_b = std::sqrt(norm_b); real_type final_norm_first = FGMRES->getFinalResidualNorm(); std::cout << "Randomized FGMRES results (first run): \n" diff --git a/tests/functionality/testRandGMRES_Rocm.cpp b/tests/functionality/testRandGMRES_Rocm.cpp index 7ea91eb76..5aa23e944 100644 --- a/tests/functionality/testRandGMRES_Rocm.cpp +++ b/tests/functionality/testRandGMRES_Rocm.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) vec_x->setToZero(ReSolve::memory::DEVICE); real_type norm_b; - matrix_handler->setValuesChanged(true, "hip"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = Rf->setup(A); error_sum += status; @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) status = GS->setup(FGMRES->getKrand(), FGMRES->getRestart()); error_sum += status; - //matrix_handler->setValuesChanged(true, "hip"); + //matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = FGMRES->resetMatrix(A); error_sum += status; @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) FGMRES->solve(vec_rhs, vec_x); - norm_b = vector_handler->dot(vec_rhs, vec_rhs, "hip"); + norm_b = vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE); norm_b = std::sqrt(norm_b); real_type final_norm_first = FGMRES->getFinalResidualNorm(); std::cout << "Randomized FGMRES results (first run): \n" diff --git a/tests/functionality/testSysCuda.cpp b/tests/functionality/testSysCuda.cpp index 67068ac8b..6c296c5a0 100644 --- a/tests/functionality/testSysCuda.cpp +++ b/tests/functionality/testSysCuda.cpp @@ -111,38 +111,38 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler.setValuesChanged(true, "hip"); + matrix_handler.setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate the residual ||b-Ax|| - status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr",ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + real_type normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normXtrue = sqrt(vector_handler.dot(vec_x, vec_x, "hip")); - real_type normB1 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, "hip")); + real_type normXtrue = sqrt(vector_handler.dot(vec_x, vec_x, ReSolve::memory::DEVICE)); + real_type normB1 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true - vector_handler.axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler.axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::HOST); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + real_type normRmatrix1CPU = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler.setValuesChanged(true, "hip"); + matrix_handler.setValuesChanged(true, ReSolve::memory::DEVICE); //evaluate final residual - status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + real_type normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); //for testing only - control - real_type normB2 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, "hip")); + real_type normB2 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); //compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler.axpy(&MINUSONE, vec_x, vec_diff, "hip"); + vector_handler.axpy(&MINUSONE, vec_x, vec_diff, ReSolve::memory::DEVICE); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler.dot(vec_diff, vec_diff, "hip")); + real_type normDiffMatrix2 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); //compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", ReSolve::memory::DEVICE); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); std::cout<<"Results (second matrix): "<matrixInfNorm(A, &norm, memspace_); + handler->matrixInfNorm(A, &norm, ms); status *= (norm == 30.0); delete handler; @@ -70,8 +75,8 @@ class MatrixHandlerTests : TestBase real_type alpha = 2.0/30.0; real_type beta = 2.0; - handler->setValuesChanged(true, memspace_); - handler->matvec(A, &x, &y, &alpha, &beta, "csr", memspace_); + handler->setValuesChanged(true, ms); + handler->matvec(A, &x, &y, &alpha, &beta, "csr", ms); status *= verifyAnswer(y, 4.0, memspace_); diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 4837b57ba..676292008 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -109,13 +109,13 @@ namespace ReSolve { //set the first vector to all 1s, normalize V->setToConst(0, 1.0, ms); - real_type nrm = handler->dot(V, V, memspace_); + real_type nrm = handler->dot(V, V, ms); nrm = sqrt(nrm); nrm = 1.0 / nrm; - handler->scal(&nrm, V, memspace_); + handler->scal(&nrm, V, ms); - GS->orthogonalize(N, V, H, 0, memspace_ ); - GS->orthogonalize(N, V, H, 1, memspace_ ); + GS->orthogonalize(N, V, H, 0, ms ); + GS->orthogonalize(N, V, H, 1, ms ); status *= verifyAnswer(V, 3, handler, memspace_); @@ -166,7 +166,7 @@ namespace ReSolve { for (index_type j = 0; j < K; ++j) { a->update(x->getVectorData(i, ms), ms, memory::HOST); b->update(x->getVectorData(j, ms), ms, memory::HOST); - ip = handler->dot(a, b, "cpu"); + ip = handler->dot(a, b, memory::HOST); if ( (i != j) && (abs(ip) > 1e-14)) { status = false; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index c5e95170a..50093ab4a 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -55,7 +55,7 @@ namespace ReSolve { } x->update(data, memory::HOST, ms); - real_type ans = handler->infNorm(x, memspace_); + real_type ans = handler->infNorm(x, ms); bool st = true; if (ans != (real_type) (N - 1) * 0.1) { st = false; @@ -92,7 +92,7 @@ namespace ReSolve { real_type alpha = 0.5; //the result is a vector with y[i] = 2.5; - handler->axpy(&alpha, x, y, memspace_); + handler->axpy(&alpha, x, y, ms); status *= verifyAnswer(y, 2.5, memspace_); delete handler; @@ -124,7 +124,7 @@ namespace ReSolve { y->setToConst(4.0, ms); real_type ans; //the result is N - ans = handler->dot(x, y, memspace_); + ans = handler->dot(x, y, ms); bool st = true;; if (ans != (real_type) N) { @@ -161,7 +161,7 @@ namespace ReSolve { real_type alpha = 3.5; //the answer is x[i] = 4.375; - handler->scal(&alpha, x, memspace_); + handler->scal(&alpha, x, ms); status *= verifyAnswer(x, 4.375, memspace_); delete handler; @@ -204,7 +204,7 @@ namespace ReSolve { index_type r = K % 2; real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - handler->massAxpy(N, alpha, K, x, y, memspace_); + handler->massAxpy(N, alpha, K, x, y, ms); status *= verifyAnswer(y, 2.0 - res, memspace_); delete handler; @@ -236,7 +236,7 @@ namespace ReSolve { x->setToConst(1.0, ms); y->setToConst(-1.0, ms); - handler->massDot2Vec(N, x, K, y, res, memspace_); + handler->massDot2Vec(N, x, K, y, res, ms); status *= verifyAnswer(res, (-1.0) * (real_type) N, memspace_); @@ -280,9 +280,9 @@ namespace ReSolve { real_type alpha = -1.0; real_type beta = 1.0; - handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); + handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, ms); status *= verifyAnswer(xN, (real_type) (K) + 0.5, memspace_); - handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); + handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, ms); status *= verifyAnswer(xT, (real_type) (N) + 0.5, memspace_); return status.report(__func__); From 15ebaa7f0a65c7c4ade15816f3551749cffa6ba8 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 12 Dec 2023 15:18:45 -0500 Subject: [PATCH 3/5] Coding style fixes and minor cleanup in LinSolverIterativeRandFGMRES. --- resolve/LinSolverIterativeRandFGMRES.cpp | 5 +- resolve/LinSolverIterativeRandFGMRES.hpp | 162 ++++++++++++----------- 2 files changed, 85 insertions(+), 82 deletions(-) diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 6b7b3b1e6..641deb265 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -60,7 +60,7 @@ namespace ReSolve maxit_= 100; //default restart_ = 10; conv_cond_ = 0;//default - flexible_ = 1; + flexible_ = true; d_V_ = nullptr; d_Z_ = nullptr; @@ -93,7 +93,7 @@ namespace ReSolve maxit_= maxit; restart_ = restart; conv_cond_ = conv_cond; - flexible_ = 1; + flexible_ = true; d_V_ = nullptr; d_Z_ = nullptr; @@ -503,7 +503,6 @@ namespace ReSolve void LinSolverIterativeRandFGMRES::precV(vector_type* rhs, vector_type* x) { LU_solver_->solve(rhs, x); - // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); } } // namespace ReSolve diff --git a/resolve/LinSolverIterativeRandFGMRES.hpp b/resolve/LinSolverIterativeRandFGMRES.hpp index 62601cb0a..412528252 100644 --- a/resolve/LinSolverIterativeRandFGMRES.hpp +++ b/resolve/LinSolverIterativeRandFGMRES.hpp @@ -6,90 +6,94 @@ #include "GramSchmidt.hpp" #include "RandSketchingManager.hpp" -namespace ReSolve +namespace ReSolve { - + /** + * @brief Randomized (F)GMRES + * + * @note MatrixHandler and VectorHandler objects are inherited from + * LinSolver base class. + * + */ class LinSolverIterativeRandFGMRES : public LinSolverIterative { - using vector_type = vector::Vector; + private: + using vector_type = vector::Vector; public: - - enum SketchingMethod { cs = 0, // count sketch - fwht = 1}; // fast Walsh-Hadamard transform - - LinSolverIterativeRandFGMRES(std::string memspace = "cuda"); - - LinSolverIterativeRandFGMRES( MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs, - std::string memspace = "cuda"); - - LinSolverIterativeRandFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs, - std::string memspace = "cuda"); - ~LinSolverIterativeRandFGMRES(); - - int solve(vector_type* rhs, vector_type* x); - int setup(matrix::Sparse* A); - int resetMatrix(matrix::Sparse* new_A); - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver); - - real_type getTol(); - index_type getMaxit(); - index_type getRestart(); - index_type getConvCond(); - bool getFlexible(); - std::string getRandSketchingMethod(); - index_type getKrand(); - - void setTol(real_type new_tol); - void setMaxit(index_type new_maxit); - void setRestart(index_type new_restart); - void setConvCond(index_type new_conv_cond); - void setFlexible(bool new_flexible); - void getRandSketchingMethod(std::string rand_method); + enum SketchingMethod { cs = 0, // count sketch + fwht = 1}; // fast Walsh-Hadamard transform + + LinSolverIterativeRandFGMRES(std::string memspace = "cuda"); + + LinSolverIterativeRandFGMRES(MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs, + std::string memspace = "cuda"); + + LinSolverIterativeRandFGMRES(index_type restart, + real_type tol, + index_type maxit, + index_type conv_cond, + MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs, + std::string memspace = "cuda"); + + ~LinSolverIterativeRandFGMRES(); + + int solve(vector_type* rhs, vector_type* x); + int setup(matrix::Sparse* A); + int resetMatrix(matrix::Sparse* new_A); + int setupPreconditioner(std::string name, LinSolverDirect* LU_solver); + + real_type getTol(); + index_type getMaxit(); + index_type getRestart(); + index_type getConvCond(); + bool getFlexible(); + // std::string getRandSketchingMethod(); + index_type getKrand(); + + void setTol(real_type new_tol); + void setMaxit(index_type new_maxit); + void setRestart(index_type new_restart); + void setConvCond(index_type new_conv_cond); + void setFlexible(bool new_flexible); + // void setRandSketchingMethod(std::string rand_method); private: - //remember matrix handler and vector handler are inherited. - - memory::MemorySpace memspace_; - - real_type tol_; - index_type maxit_; - index_type restart_; - std::string orth_option_; - index_type conv_cond_; - bool flexible_{1}; // if can be run as "normal" GMRES if needed, set flexible_ to 0. Default is 1 of course. - - vector_type* d_V_{nullptr}; - vector_type* d_Z_{nullptr}; - // for performing Gram-Schmidt - vector_type* d_S_{nullptr}; - - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; - real_type* d_aux_{nullptr}; - - - GramSchmidt* GS_; - void precV(vector_type* rhs, vector_type* x); //multiply the vector by preconditioner - LinSolverDirect* LU_solver_; - index_type n_;// for simplicity - real_type one_over_k_{1.0}; - - index_type k_rand_{0}; // size of sketch space, we need to know it so we can allocate S! - MemoryHandler mem_; ///< Device memory manager object - RandSketchingManager* rand_manager_; - SketchingMethod rand_method_; + memory::MemorySpace memspace_; + + real_type tol_; + index_type maxit_; + index_type restart_; + std::string orth_option_; + index_type conv_cond_; + bool flexible_{true}; ///< if can be run as "normal" GMRES if needed, set flexible_ to `false`. Default is `true`, of course. + + vector_type* d_V_{nullptr}; + vector_type* d_Z_{nullptr}; + // for performing Gram-Schmidt + vector_type* d_S_{nullptr}; + + real_type* h_H_{nullptr}; + real_type* h_c_{nullptr}; + real_type* h_s_{nullptr}; + real_type* h_rs_{nullptr}; + real_type* d_aux_{nullptr}; + + GramSchmidt* GS_; + void precV(vector_type* rhs, vector_type* x); ///< multiply the vector by preconditioner + LinSolverDirect* LU_solver_; + index_type n_; + real_type one_over_k_{1.0}; + + index_type k_rand_{0}; ///< size of sketch space. We need to know it so we can allocate S! + MemoryHandler mem_; ///< Device memory manager object + RandSketchingManager* rand_manager_{nullptr}; + SketchingMethod rand_method_; }; -} +} // namespace ReSolve From be6a19c0dffd8b3f901f131516b4bf4687b0c2df Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 12 Dec 2023 16:01:40 -0500 Subject: [PATCH 4/5] Use char instead of std::string for transpose parameter. --- resolve/GramSchmidt.cpp | 8 +-- resolve/LinSolverIterativeRandFGMRES.cpp | 2 +- resolve/vector/VectorHandler.cpp | 2 +- resolve/vector/VectorHandler.hpp | 3 +- resolve/vector/VectorHandlerCpu.cpp | 4 +- resolve/vector/VectorHandlerCpu.hpp | 3 +- resolve/vector/VectorHandlerCuda.cpp | 62 ++++++++++++----------- resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 63 +++++++++++++----------- resolve/vector/VectorHandlerHip.hpp | 3 +- resolve/vector/VectorHandlerImpl.hpp | 3 +- tests/unit/vector/VectorHandlerTests.hpp | 4 +- 12 files changed, 81 insertions(+), 78 deletions(-) diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 79a9fa567..75c571e6c 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -148,9 +148,9 @@ namespace ReSolve case cgs2: vec_v_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); mem_.deviceSynchronize(); // copy H_col to aux, we will need it later @@ -160,11 +160,11 @@ namespace ReSolve mem_.deviceSynchronize(); //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); mem_.deviceSynchronize(); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); mem_.deviceSynchronize(); // copy H_col to H diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 641deb265..cb29f197a 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -298,7 +298,7 @@ namespace ReSolve //V(:, i+1) =w-V(:, 1:i)*d_H_col = V(:, i+1)-d_H_col * V(:,1:i); //checkCudaErrors( cublasDgemv(cublas_handle, CUBLAS_OP_N, n, i + 1, &minusone, d_V, n, d_Hcolumn, 1,&one , &d_V[n * (i + 1)], 1)); - vector_handler_->gemv("N", n_, i + 1, &MINUSONE, &ONE, d_V_, vec_z, vec_v, memspace_ ); + vector_handler_->gemv('N', n_, i + 1, &MINUSONE, &ONE, d_V_, vec_z, vec_v, memspace_ ); vec_z->setCurrentSize(n_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 9dcabd249..1ab134f3e 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -188,7 +188,7 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandler::gemv(std::string transpose, + void VectorHandler::gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 2299e662d..f5b6015b8 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -1,5 +1,4 @@ #pragma once -#include #include namespace ReSolve @@ -46,7 +45,7 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - void gemv(std::string transpose, + void gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index e4f69754c..15b416067 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -119,7 +119,7 @@ namespace ReSolve { * @brief gemv computes matrix-vector product where both matrix and vectors are dense. * i.e., x = beta*x + alpha*V*y * - * @param[in] Transpose - yes (T) or no (N) + * @param[in] Transpose - transposed = 'T' or not 'N' * @param[in] n Number of rows in (non-transposed) matrix * @param[in] k Number of columns in (non-transposed) * @param[in] alpha Constant real number @@ -131,7 +131,7 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCpu::gemv(std::string /* transpose */, + void VectorHandlerCpu::gemv(char /* transpose */, index_type /* n */, index_type /* k */, const real_type* /* alpha */, diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index c02a69646..e56aabd77 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -1,5 +1,4 @@ #pragma once -#include namespace ReSolve { @@ -45,7 +44,7 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(std::string transpose, + virtual void gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index e9f4fa8d7..bcf3f53be 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -140,7 +140,7 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCuda::gemv(std::string transpose, + void VectorHandlerCuda::gemv(char transpose, index_type n, index_type k, const real_type* alpha, @@ -151,34 +151,38 @@ namespace ReSolve { { LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - if (transpose == "T") { - - cublasDgemv(handle_cublas, - CUBLAS_OP_T, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - - } else { - cublasDgemv(handle_cublas, - CUBLAS_OP_N, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); + switch (transpose) { + case 'T': + cublasDgemv(handle_cublas, + CUBLAS_OP_T, + n, + k, + alpha, + V->getData(memory::DEVICE), + n, + y->getData(memory::DEVICE), + 1, + beta, + x->getData(memory::DEVICE), + 1); + return; + default: + cublasDgemv(handle_cublas, + CUBLAS_OP_N, + n, + k, + alpha, + V->getData(memory::DEVICE), + n, + y->getData(memory::DEVICE), + 1, + beta, + x->getData(memory::DEVICE), + 1); + if (transpose != 'N') { + out::warning() << "Unrecognized transpose option " << transpose + << " in gemv. Using non-transposed multivector.\n"; + } } } diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index e122d7b65..1c9eef81b 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -45,7 +45,7 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(std::string transpose, + virtual void gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 04da782cc..0ead2980d 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -137,7 +137,7 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerHip::gemv(std::string transpose, + void VectorHandlerHip::gemv(char transpose, index_type n, index_type k, const real_type* alpha, @@ -148,36 +148,39 @@ namespace ReSolve { { LinAlgWorkspaceHIP* workspaceHIP = workspace_; rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - if (transpose == "T") { - - rocblas_dgemv(handle_rocblas, - rocblas_operation_transpose, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - - } else { - rocblas_dgemv(handle_rocblas, - rocblas_operation_none, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); + switch (transpose) { + case 'T': + rocblas_dgemv(handle_rocblas, + rocblas_operation_transpose, + n, + k, + alpha, + V->getData(memory::DEVICE), + n, + y->getData(memory::DEVICE), + 1, + beta, + x->getData(memory::DEVICE), + 1); + return; + default: + rocblas_dgemv(handle_rocblas, + rocblas_operation_none, + n, + k, + alpha, + V->getData(memory::DEVICE), + n, + y->getData(memory::DEVICE), + 1, + beta, + x->getData(memory::DEVICE), + 1); + if (transpose != 'N') { + out::warning() << "Unrecognized transpose option " << transpose + << " in gemv. Using non-transposed multivector.\n"; + } } - } /** diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 3ad431d93..075ed70a3 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -1,5 +1,4 @@ #pragma once -#include namespace ReSolve { @@ -45,7 +44,7 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(std::string transpose, + virtual void gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index b6ce76c27..694efc14a 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -1,5 +1,4 @@ #pragma once -#include namespace ReSolve { @@ -47,7 +46,7 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(std::string transpose, + virtual void gemv(char transpose, index_type n, index_type k, const real_type* alpha, diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 50093ab4a..82b8b9bf6 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -280,9 +280,9 @@ namespace ReSolve { real_type alpha = -1.0; real_type beta = 1.0; - handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, ms); + handler->gemv('N', N, K, &alpha, &beta, V, yN, xN, ms); status *= verifyAnswer(xN, (real_type) (K) + 0.5, memspace_); - handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, ms); + handler->gemv('T', N, K, &alpha, &beta, V, yT, xT, ms); status *= verifyAnswer(xT, (real_type) (N) + 0.5, memspace_); return status.report(__func__); From 10ea360a5729c8914607e9a6f620d9e6524b69c2 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 12 Dec 2023 16:15:46 -0500 Subject: [PATCH 5/5] Clean up access to workspace pointers in handlers. --- resolve/matrix/MatrixHandlerCuda.cpp | 24 +++++++++++------------- resolve/matrix/MatrixHandlerHip.cpp | 20 +++++++++----------- resolve/vector/VectorHandlerCuda.cpp | 21 +++++++-------------- resolve/vector/VectorHandlerHip.cpp | 24 ++++++++---------------- 4 files changed, 35 insertions(+), 54 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index bfaf3658a..7f6746a73 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -42,18 +42,17 @@ namespace ReSolve { matrix::Csr* A = dynamic_cast(Ageneric); //result = alpha *A*x + beta * result cusparseStatus_t status; - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); + cusparseDnVecDescr_t vecx = workspace_->getVecX(); cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); - cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); + cusparseDnVecDescr_t vecAx = workspace_->getVecY(); cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); - cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); + cusparseSpMatDescr_t matA = workspace_->getSpmvMatrixDescriptor(); - void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); - cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); + void* buffer_spmv = workspace_->getSpmvBuffer(); + cusparseHandle_t handle_cusparse = workspace_->getCusparseHandle(); if (values_changed_) { status = cusparseCreateCsr(&matA, A->getNumRows(), @@ -69,7 +68,7 @@ namespace ReSolve { error_sum += status; values_changed_ = false; } - if (!workspaceCUDA->matvecSetup()) { + if (!workspace_->matvecSetup()) { //setup first, allocate, etc. size_t bufferSize = 0; @@ -86,10 +85,10 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); mem_.allocateBufferOnDevice(&buffer_spmv, bufferSize); - workspaceCUDA->setSpmvMatrixDescriptor(matA); - workspaceCUDA->setSpmvBuffer(buffer_spmv); + workspace_->setSpmvMatrixDescriptor(matA); + workspace_->setSpmvBuffer(buffer_spmv); - workspaceCUDA->matvecSetupDone(); + workspace_->matvecSetupDone(); } status = cusparseSpMV(handle_cusparse, @@ -159,7 +158,6 @@ namespace ReSolve { int MatrixHandlerCuda::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) { index_type error_sum = 0; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; A_csr->allocateMatrixData(memory::DEVICE); index_type n = A_csc->getNumRows(); @@ -167,7 +165,7 @@ namespace ReSolve { index_type nnz = A_csc->getNnz(); size_t bufferSize; void* d_work; - cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), + cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), n, m, nnz, @@ -184,7 +182,7 @@ namespace ReSolve { &bufferSize); error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), + status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), n, m, nnz, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index d97e13f67..44d9d8bec 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -41,14 +41,13 @@ namespace ReSolve { matrix::Csr* A = dynamic_cast(Ageneric); //result = alpha *A*x + beta * result rocsparse_status status; - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocsparse_handle handle_rocsparse = workspaceHIP->getRocsparseHandle(); + rocsparse_handle handle_rocsparse = workspace_->getRocsparseHandle(); - rocsparse_mat_info infoA = workspaceHIP->getSpmvMatrixInfo(); - rocsparse_mat_descr descrA = workspaceHIP->getSpmvMatrixDescriptor(); + rocsparse_mat_info infoA = workspace_->getSpmvMatrixInfo(); + rocsparse_mat_descr descrA = workspace_->getSpmvMatrixDescriptor(); - if (!workspaceHIP->matvecSetup()) { + if (!workspace_->matvecSetup()) { //setup first, allocate, etc. rocsparse_create_mat_descr(&(descrA)); rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero); @@ -69,9 +68,9 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); - workspaceHIP->setSpmvMatrixDescriptor(descrA); - workspaceHIP->setSpmvMatrixInfo(infoA); - workspaceHIP->matvecSetupDone(); + workspace_->setSpmvMatrixDescriptor(descrA); + workspace_->setSpmvMatrixInfo(infoA); + workspace_->matvecSetupDone(); } status = rocsparse_dcsrmv(handle_rocsparse, @@ -144,7 +143,6 @@ namespace ReSolve { int MatrixHandlerHip::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) { index_type error_sum = 0; - LinAlgWorkspaceHIP* workspaceHIP = (LinAlgWorkspaceHIP*) workspace_; rocsparse_status status; @@ -155,7 +153,7 @@ namespace ReSolve { size_t bufferSize; void* d_work; - status = rocsparse_csr2csc_buffer_size(workspaceHIP->getRocsparseHandle(), + status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), n, m, nnz, @@ -167,7 +165,7 @@ namespace ReSolve { error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = rocsparse_dcsr2csc(workspaceHIP->getRocsparseHandle(), + status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), n, m, nnz, diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index bcf3f53be..06319a94f 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -47,8 +47,7 @@ namespace ReSolve { real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); double nrm = 0.0; cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); if (st!=0) {printf("dot product crashed with code %d \n", st);} @@ -64,8 +63,7 @@ namespace ReSolve { */ void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); if (st!=0) { ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; @@ -103,7 +101,7 @@ namespace ReSolve { } /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * @brief axpy i.e, y = alpha*x + y where alpha is a constant * * @param[in] alpha The constant * @param[in] x The first vector @@ -112,9 +110,7 @@ namespace ReSolve { */ void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) { - //AXPY: y = alpha * x + y - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasDaxpy(handle_cublas, x->getSize(), alpha, @@ -149,8 +145,7 @@ namespace ReSolve { vector::Vector* y, vector::Vector* x) { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); switch (transpose) { case 'T': cublasDgemv(handle_cublas, @@ -203,8 +198,7 @@ namespace ReSolve { if (k < 200) { mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); } else { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasDgemm(handle_cublas, CUBLAS_OP_N, CUBLAS_OP_N, @@ -243,8 +237,7 @@ namespace ReSolve { if (k < 200) { mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); } else { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasDgemm(handle_cublas, CUBLAS_OP_T, CUBLAS_OP_N, diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 0ead2980d..69248cc26 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -45,9 +45,8 @@ namespace ReSolve { */ real_type VectorHandlerHip::dot(vector::Vector* x, vector::Vector* y) - { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + { + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); double nrm = 0.0; rocblas_status st= rocblas_ddot (handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); @@ -64,8 +63,7 @@ namespace ReSolve { */ void VectorHandlerHip::scal(const real_type* alpha, vector::Vector* x) { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); if (st!=0) { @@ -99,7 +97,7 @@ namespace ReSolve { } /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * @brief axpy i.e, y = alpha*x + y where alpha is a constant * * @param[in] alpha The constant * @param[in] x The first vector @@ -108,9 +106,7 @@ namespace ReSolve { */ void VectorHandlerHip::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) { - //AXPY: y = alpha * x + y - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_daxpy(handle_rocblas, x->getSize(), alpha, @@ -146,8 +142,7 @@ namespace ReSolve { vector::Vector* y, vector::Vector* x) { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); switch (transpose) { case 'T': rocblas_dgemv(handle_rocblas, @@ -201,8 +196,7 @@ namespace ReSolve { mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); } else { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_dgemm(handle_rocblas, rocblas_operation_none, rocblas_operation_none, @@ -241,10 +235,8 @@ namespace ReSolve { if (k < 200) { mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); - } else { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_dgemm(handle_rocblas, rocblas_operation_transpose, rocblas_operation_none,