From 6340345ec4198ed4af76734fae86a4102454bb5b Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 12:08:43 -0500 Subject: [PATCH 01/23] Rename 'mass' vector handler methods. --- resolve/GramSchmidt.cpp | 8 ++++---- resolve/cuda/cudaKernels.cu | 4 ++-- resolve/hip/hipKernels.hip | 4 ++-- resolve/vector/VectorHandler.cpp | 12 ++++++------ resolve/vector/VectorHandler.hpp | 4 ++-- resolve/vector/VectorHandlerCpu.cpp | 4 ++-- resolve/vector/VectorHandlerCpu.hpp | 4 ++-- resolve/vector/VectorHandlerCuda.cpp | 4 ++-- resolve/vector/VectorHandlerCuda.hpp | 4 ++-- resolve/vector/VectorHandlerHip.cpp | 4 ++-- resolve/vector/VectorHandlerHip.hpp | 4 ++-- resolve/vector/VectorHandlerImpl.hpp | 4 ++-- tests/unit/vector/VectorHandlerTests.hpp | 6 +++--- tests/unit/vector/runVectorHandlerTests.cpp | 10 +++++----- 14 files changed, 38 insertions(+), 38 deletions(-) diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 533f9b02d..827922647 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -233,7 +233,7 @@ namespace ReSolve vec_w_->setData(V->getData(i + 1, memspace_), memspace_); vec_rv_->resize(i + 1); - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vector_handler_->dot2Multi(n, V, i + 1, vec_x_, vec_rv_, memspace_); vec_rv_->setDataUpdated(memspace_); if (memspace_ == memory::DEVICE) { @@ -260,7 +260,7 @@ namespace ReSolve } // for j vec_Hcolumn_->resize(i + 1); vec_Hcolumn_->copyFromExternal(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + vector_handler_->axpyMulti(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); // normalize (second synch) t = vector_handler_->dot(vec_w_, vec_w_, memspace_); @@ -290,7 +290,7 @@ namespace ReSolve vec_w_->setData(V->getData(i + 1, memspace_), memspace_); vec_rv_->resize(i + 1); - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vector_handler_->dot2Multi(n, V, i + 1, vec_x_, vec_rv_, memspace_); vec_rv_->setDataUpdated(memspace_); if (memspace_ == memory::DEVICE) { @@ -351,7 +351,7 @@ namespace ReSolve vec_Hcolumn_->resize(i + 1); vec_Hcolumn_->copyFromExternal(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + vector_handler_->axpyMulti(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); // normalize (second synch) t = vector_handler_->dot(vec_w_, vec_w_, memspace_); // set the last entry in Hessenberg matrix diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index bdd8a1f76..393422708 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -141,7 +141,7 @@ namespace ReSolve * @param[in] alpha - doble array, size [k x 1] */ template - __global__ void massAxpy3(index_type N, + __global__ void axpyMulti3(index_type N, index_type k, const real_type* x_data, real_type* y_data, @@ -308,7 +308,7 @@ namespace ReSolve */ void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha) { - kernels::massAxpy3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha); + kernels::axpyMulti3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha); } /** diff --git a/resolve/hip/hipKernels.hip b/resolve/hip/hipKernels.hip index fdf67bfcb..04fe0c063 100644 --- a/resolve/hip/hipKernels.hip +++ b/resolve/hip/hipKernels.hip @@ -131,7 +131,7 @@ namespace ReSolve { * @param[in] alpha - */ template - __global__ void massAxpy3_kernel(index_type N, + __global__ void axpyMulti3_kernel(index_type N, index_type k, const real_type* x_data, real_type* y_data, @@ -419,7 +419,7 @@ namespace ReSolve { */ void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha) { - hipLaunchKernelGGL(kernels::massAxpy3_kernel, + hipLaunchKernelGGL(kernels::axpyMulti3_kernel, dim3((n + 384 - 1) / 384), dim3(384), 0, diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 86e30ba4b..96909df32 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -233,16 +233,16 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandler::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) { case HOST: - cpuImpl_->massAxpy(size, alpha, k, x, y); + cpuImpl_->axpyMulti(size, alpha, k, x, y); break; case DEVICE: - devImpl_->massAxpy(size, alpha, k, x, y); + devImpl_->axpyMulti(size, alpha, k, x, y); break; } y->setDataUpdated(memspace); @@ -262,16 +262,16 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace) + void VectorHandler::dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) { case HOST: - cpuImpl_->massDot2Vec(size, V, k, x, res); + cpuImpl_->dot2Multi(size, V, k, x, res); break; case DEVICE: - devImpl_->massDot2Vec(size, V, k, x, res); + devImpl_->dot2Multi(size, V, k, x, res); break; } res->setDataUpdated(memspace); diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index cbea0c28e..cb95af72b 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -37,11 +37,11 @@ namespace ReSolve void scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); + void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index de7ff3869..d143ec0da 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -205,7 +205,7 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCpu::massAxpy(index_type size, + void VectorHandlerCpu::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, @@ -242,7 +242,7 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCpu::massDot2Vec(index_type size, + void VectorHandlerCpu::dot2Multi(index_type size, vector::Vector* V, index_type q, vector::Vector* x, diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index bf3c2ba83..2182d7fb1 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -35,11 +35,11 @@ namespace ReSolve virtual real_type infNorm(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index e60cad347..534ddb79d 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -203,7 +203,7 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCuda::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerCuda::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) { using namespace constants; if (k < 200) @@ -244,7 +244,7 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCuda::massDot2Vec(index_type size, + void VectorHandlerCuda::dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 5a5b500c7..948269fcf 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -36,11 +36,11 @@ namespace ReSolve virtual real_type infNorm(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index f3930d8ac..5a98c8d9f 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -198,7 +198,7 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerHip::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerHip::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) { using namespace constants; if (k < 200) @@ -239,7 +239,7 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerHip::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + void VectorHandlerHip::dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) { using namespace constants; diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index a1601dddb..f4d6a409f 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -36,11 +36,11 @@ namespace ReSolve virtual real_type infNorm(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 9b6c0a073..c0392adc1 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -36,11 +36,11 @@ namespace ReSolve virtual real_type infNorm(vector::Vector* x) = 0; // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; // Scale a vector by a diagonal matrix virtual int scale(vector::Vector* diag, vector::Vector* vec) = 0; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 04444a815..3bc272071 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -147,7 +147,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome massAxpy(index_type N, index_type K) + TestOutcome axpyMulti(index_type N, index_type K) { TestStatus status; @@ -180,7 +180,7 @@ namespace ReSolve index_type r = K % 2; real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - handler_.massAxpy(N, &alpha, K, &x, &y, memspace_); + handler_.axpyMulti(N, &alpha, K, &x, &y, memspace_); status *= verifyAnswer(y, 2.0 - res); return status.report(__func__); @@ -199,7 +199,7 @@ namespace ReSolve x.setToConst(1.0, memspace_); y.setToConst(-1.0, memspace_); - handler_.massDot2Vec(N, &x, K, &y, &res, memspace_); + handler_.dot2Multi(N, &x, K, &y, &res, memspace_); status *= verifyAnswer(res, (-1.0) * (real_type) N); diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 74fae7e41..cb86fe1ff 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -22,7 +22,7 @@ int main(int, char**) result += test.scal(50); result += test.infNorm(50); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); + result += test.axpyMulti(100, 10); result += test.massDot(100, 10); result += test.scale(100); @@ -42,8 +42,8 @@ int main(int, char**) result += test.axpy(5000); result += test.scal(5000); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); - result += test.massAxpy(1000, 30); + result += test.axpyMulti(100, 10); + result += test.axpyMulti(1000, 30); result += test.massDot(100, 10); result += test.massDot(1000, 30); result += test.infNorm(1000); @@ -66,8 +66,8 @@ int main(int, char**) result += test.axpy(5000); result += test.scal(5000); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); - result += test.massAxpy(1000, 300); + result += test.axpyMulti(100, 10); + result += test.axpyMulti(1000, 300); result += test.massDot(100, 10); result += test.massDot(1000, 30); result += test.infNorm(1000); From 19e0286abdf504d9ba997ef6d3fda3ffb80b4c9a Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 12:13:05 -0500 Subject: [PATCH 02/23] Rename scale --> scal to match BLAS naming. --- resolve/vector/VectorHandler.cpp | 6 +++--- resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 2 +- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 2 +- resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 2 +- resolve/vector/VectorHandlerHip.hpp | 2 +- resolve/vector/VectorHandlerImpl.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 96909df32..50ae8cb16 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -289,7 +289,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::scale(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) + int VectorHandler::scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) { assert(diag->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); @@ -298,10 +298,10 @@ namespace ReSolve switch (memspace) { case HOST: - return cpuImpl_->scale(diag, vec); + return cpuImpl_->scal(diag, vec); break; case DEVICE: - return devImpl_->scale(diag, vec); + return devImpl_->scal(diag, vec); break; } return 1; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index cb95af72b..2d0ccc8bb 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -59,7 +59,7 @@ namespace ReSolve vector::Vector* x, memory::MemorySpace memspace); - int scale(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); + int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); /** infNorm: * Returns infinity norm of a vector (i.e., entry with max abs value) diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index d143ec0da..07a0607cc 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -284,7 +284,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::scale(vector::Vector* diag, vector::Vector* vec) + int VectorHandlerCpu::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::HOST); real_type* vec_data = vec->getData(memory::HOST); diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 2182d7fb1..e667cae72 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -56,7 +56,7 @@ namespace ReSolve vector::Vector* y, vector::Vector* x); - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual int scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceCpu* workspace_; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 534ddb79d..7252eae87 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -294,7 +294,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::scale(vector::Vector* diag, vector::Vector* vec) + int VectorHandlerCuda::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 948269fcf..2e6e9ea8b 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -65,7 +65,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual int scal(vector::Vector* diag, vector::Vector* vec); private: MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 5a98c8d9f..9ad6db76b 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -280,7 +280,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::scale(vector::Vector* diag, vector::Vector* vec) + int VectorHandlerHip::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index f4d6a409f..7c43b67b4 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -65,7 +65,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual int scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceHIP* workspace_; diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index c0392adc1..26e28d2a0 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -43,7 +43,7 @@ namespace ReSolve virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; // Scale a vector by a diagonal matrix - virtual int scale(vector::Vector* diag, vector::Vector* vec) = 0; + virtual int scal(vector::Vector* diag, vector::Vector* vec) = 0; /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 3bc272071..eebfb662d 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -259,7 +259,7 @@ namespace ReSolve } diag.copyFromExternal(diag_data.get(), memory::HOST, memspace_); - handler_.scale(&diag, &vec, memspace_); + handler_.scal(&diag, &vec, memspace_); if (memspace_ == memory::DEVICE) { From e8a13a75b493c3e6e6a6654b3a1ea61240da6d9e Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 12:16:20 -0500 Subject: [PATCH 03/23] Change infNorm --> iamax to match BLAS naming. --- examples/ExampleHelper.hpp | 8 ++++---- resolve/SystemSolver.cpp | 4 ++-- resolve/vector/VectorHandler.cpp | 6 +++--- resolve/vector/VectorHandler.hpp | 4 ++-- resolve/vector/VectorHandlerCpu.cpp | 2 +- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 2 +- resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 2 +- resolve/vector/VectorHandlerHip.hpp | 2 +- resolve/vector/VectorHandlerImpl.hpp | 4 ++-- tests/functionality/TestHelper.hpp | 4 ++-- tests/functionality/testSysGLU.cpp | 8 ++++---- tests/unit/vector/VectorHandlerTests.hpp | 4 ++-- tests/unit/vector/runVectorHandlerTests.cpp | 6 +++--- 15 files changed, 30 insertions(+), 30 deletions(-) diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index cd78f7c43..238100fe5 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -226,8 +226,8 @@ namespace ReSolve // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type inf_norm_x = vh_.iamax(x_, memspace_); + real_type inf_norm_res = vh_.iamax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -318,8 +318,8 @@ namespace ReSolve // Compute norm of scaled residuals mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); - inf_norm_x_ = vh_.infNorm(x_, memspace_); - inf_norm_res_ = vh_.infNorm(res_, memspace_); + inf_norm_x_ = vh_.iamax(x_, memspace_); + inf_norm_res_ = vh_.iamax(res_, memspace_); nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); } diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index b4df7ae6a..17773fb20 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -876,8 +876,8 @@ namespace ReSolve } matrixHandler_->setValuesChanged(true, ms); matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUS_ONE, ms); - resnorm = vectorHandler_->infNorm(resVector_, ms); - norm_x = vectorHandler_->infNorm(x, ms); + resnorm = vectorHandler_->iamax(resVector_, ms); + norm_x = vectorHandler_->iamax(x, ms); matrixHandler_->matrixInfNorm(A_, &norm_A, ms); return resnorm / (norm_x * norm_A); } diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 50ae8cb16..877ca5ce2 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -141,16 +141,16 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandler::infNorm(vector::Vector* x, memory::MemorySpace memspace) + real_type VectorHandler::iamax(vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->infNorm(x); + return cpuImpl_->iamax(x); break; case DEVICE: - return devImpl_->infNorm(x); + return devImpl_->iamax(x); break; } return -1.0; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 2d0ccc8bb..9ecc50102 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -61,10 +61,10 @@ namespace ReSolve int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); - /** infNorm: + /** iamax: * Returns infinity norm of a vector (i.e., entry with max abs value) */ - real_type infNorm(vector::Vector* x, memory::MemorySpace memspace); + real_type iamax(vector::Vector* x, memory::MemorySpace memspace); bool getIsCudaEnabled() const; bool getIsHipEnabled() const; diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 07a0607cc..8f5ca4781 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -86,7 +86,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCpu::infNorm(vector::Vector* x) + real_type VectorHandlerCpu::iamax(vector::Vector* x) { real_type* x_data = x->getData(memory::HOST); real_type vecmax = std::abs(x_data[0]); diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index e667cae72..bb67730bb 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -32,7 +32,7 @@ namespace ReSolve virtual void scal(const real_type* alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type iamax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 7252eae87..7e4e394e5 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -85,7 +85,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCuda::infNorm(vector::Vector* x) + real_type VectorHandlerCuda::iamax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 2e6e9ea8b..4201aad76 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -33,7 +33,7 @@ namespace ReSolve virtual void scal(const real_type* alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type iamax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 9ad6db76b..1ead333d3 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -86,7 +86,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerHip::infNorm(vector::Vector* x) + real_type VectorHandlerHip::iamax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 7c43b67b4..298b22f94 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -33,7 +33,7 @@ namespace ReSolve virtual void scal(const real_type* alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type iamax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 26e28d2a0..af892ff36 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -32,8 +32,8 @@ namespace ReSolve // scal = alpha * x virtual void scal(const real_type* alpha, vector::Vector* x) = 0; - // infNorm = ||x||_\infty - virtual real_type infNorm(vector::Vector* x) = 0; + // iamax = ||x||_\infty + virtual real_type iamax(vector::Vector* x) = 0; // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; diff --git a/tests/functionality/TestHelper.hpp b/tests/functionality/TestHelper.hpp index d7542a257..04b19d9e0 100644 --- a/tests/functionality/TestHelper.hpp +++ b/tests/functionality/TestHelper.hpp @@ -305,8 +305,8 @@ class TestHelper // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type inf_norm_x = vh_.iamax(x_, memspace_); + real_type inf_norm_res = vh_.iamax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index 543726fd8..488997636 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -153,8 +153,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals real_type inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - real_type inf_norm_x = vector_handler.infNorm(vec_x, ReSolve::memory::DEVICE); - real_type inf_norm_r = vector_handler.infNorm(vec_r, ReSolve::memory::DEVICE); + real_type inf_norm_x = vector_handler.iamax(vec_x, ReSolve::memory::DEVICE); + real_type inf_norm_r = vector_handler.iamax(vec_r, ReSolve::memory::DEVICE); real_type nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); real_type nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -268,8 +268,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals for the second system inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - inf_norm_x = vector_handler.infNorm(vec_x, ReSolve::memory::DEVICE); - inf_norm_r = vector_handler.infNorm(vec_r, ReSolve::memory::DEVICE); + inf_norm_x = vector_handler.iamax(vec_x, ReSolve::memory::DEVICE); + inf_norm_r = vector_handler.iamax(vec_r, ReSolve::memory::DEVICE); nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); error = std::abs(nsr_system - nsr_norm) / nsr_norm; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index eebfb662d..47ab278a0 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -49,7 +49,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome infNorm(index_type N) + TestOutcome iamax(index_type N) { TestStatus status; status = true; @@ -63,7 +63,7 @@ namespace ReSolve } x.copyFromExternal(data, memory::HOST, memspace_); - real_type result = handler_.infNorm(&x, memspace_); + real_type result = handler_.iamax(&x, memspace_); real_type answer = static_cast(N - 1) * 0.1; if (!isEqual(result, answer)) diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index cb86fe1ff..9f3091820 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -20,7 +20,7 @@ int main(int, char**) result += test.dot(50); result += test.axpy(50); result += test.scal(50); - result += test.infNorm(50); + result += test.iamax(50); result += test.gemv(5000, 10); result += test.axpyMulti(100, 10); result += test.massDot(100, 10); @@ -46,7 +46,7 @@ int main(int, char**) result += test.axpyMulti(1000, 30); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.infNorm(1000); + result += test.iamax(1000); result += test.scale(1000); std::cout << "\n"; @@ -70,7 +70,7 @@ int main(int, char**) result += test.axpyMulti(1000, 300); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.infNorm(1000); + result += test.iamax(1000); result += test.scale(1000); std::cout << "\n"; From ea5521375d7df81c8a4ff4508a1e72e09e53809d Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 13:04:38 -0500 Subject: [PATCH 04/23] Pass scalars by value in VectorHandler. --- resolve/GramSchmidt.cpp | 24 +++++++-------- resolve/LinSolverIterativeFGMRES.cpp | 8 ++--- resolve/LinSolverIterativeRandFGMRES.cpp | 20 ++++++------ resolve/vector/VectorHandler.cpp | 20 ++++++------ resolve/vector/VectorHandler.hpp | 20 ++++++------ resolve/vector/VectorHandlerCpu.cpp | 32 +++++++++---------- resolve/vector/VectorHandlerCpu.hpp | 20 ++++++------ resolve/vector/VectorHandlerCuda.cpp | 28 ++++++++--------- resolve/vector/VectorHandlerCuda.hpp | 20 ++++++------ resolve/vector/VectorHandlerHip.cpp | 39 +++++++++++++----------- resolve/vector/VectorHandlerHip.hpp | 20 ++++++------ resolve/vector/VectorHandlerImpl.hpp | 20 ++++++------ tests/functionality/TestHelper.hpp | 2 +- tests/functionality/testLUSOL.cpp | 4 +-- tests/functionality/testSysGLU.cpp | 7 ++++- tests/unit/vector/GramSchmidtTests.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 8 ++--- 17 files changed, 152 insertions(+), 142 deletions(-) diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 827922647..56b973230 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -158,7 +158,7 @@ namespace ReSolve t = vector_handler_->dot(vec_v_, vec_w_, memspace_); H[idxmap(i, j, num_vecs_ + 1)] = t; t *= -1.0; - vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); + vector_handler_->axpy(t, vec_v_, vec_w_, memspace_); } t = 0.0; t = vector_handler_->dot(vec_w_, vec_w_, memspace_); @@ -168,7 +168,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); } else { @@ -179,9 +179,9 @@ namespace ReSolve case CGS2: vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to aux, we will need it later @@ -191,11 +191,11 @@ namespace ReSolve mem_.deviceSynchronize(); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); mem_.deviceSynchronize(); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H @@ -218,7 +218,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_v_, memspace_); + vector_handler_->scal(t, vec_v_, memspace_); } else { @@ -270,7 +270,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); for (int ii = 0; ii <= i; ++ii) { vec_v_->setData(V->getData(ii, memspace_), memspace_); @@ -360,7 +360,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); } else { @@ -373,9 +373,9 @@ namespace ReSolve case CGS1: vec_v_->setData(V->getData(i + 1, memspace_), memspace_); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H @@ -391,7 +391,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_v_, memspace_); + vector_handler_->scal(t, vec_v_, memspace_); } else { diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 67ee67a7a..67efc53e3 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -176,7 +176,7 @@ namespace ReSolve // normalize first vector t = 1.0 / rnorm; - vector_handler_->scal(&t, vec_V_, memspace_); + vector_handler_->scal(t, vec_V_, memspace_); // initialize norm history h_rs_[0] = rnorm; i = -1; @@ -272,7 +272,7 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_z, x, memspace_); } } else @@ -282,14 +282,14 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x - vector_handler_->axpy(&ONE, &vec_v, x, memspace_); + vector_handler_->axpy(ONE, &vec_v, x, memspace_); } /* test solution */ diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index bc03bf4f0..9978bbdc7 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -169,7 +169,7 @@ namespace ReSolve if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -219,8 +219,8 @@ namespace ReSolve // normalize first vector t = 1.0 / rnorm; - vector_handler_->scal(&t, vec_V_, memspace_); - vector_handler_->scal(&t, vec_S_, memspace_); + vector_handler_->scal(t, vec_V_, memspace_); + vector_handler_->scal(t, vec_S_, memspace_); mem_.deviceSynchronize(); @@ -259,7 +259,7 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); GS_->orthogonalize(k_rand_, vec_S_, h_H_, i); @@ -269,10 +269,10 @@ namespace ReSolve vec_aux_->copyFromExternal(&h_H_[i * (restart_ + 1)], memory::HOST, memspace_); // V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i); - vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_); + vector_handler_->gemv('N', n_, i + 1, MINUS_ONE, ONE, vec_V_, vec_aux_, &vec_v, memspace_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; - vector_handler_->scal(&t, &vec_v, memspace_); + vector_handler_->scal(t, &vec_v, memspace_); mem_.deviceSynchronize(); vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); @@ -340,7 +340,7 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_z, x, memspace_); } } else @@ -350,14 +350,14 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x - vector_handler_->axpy(&ONE, &vec_v, x, memspace_); + vector_handler_->axpy(ONE, &vec_v, x, memspace_); } /* test solution */ @@ -382,7 +382,7 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); rnorm = vector_handler_->dot(vec_S_, vec_S_, memspace_); diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 877ca5ce2..ebb382fe6 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -117,7 +117,7 @@ namespace ReSolve * @param memspace[in] string containg memspace (cpu or cuda or hip) * */ - void VectorHandler::scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace) + void VectorHandler::scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) @@ -165,7 +165,7 @@ namespace ReSolve * @param[in] memspace String containg memspace (cpu or cuda or hip) * */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { // AXPY: y = alpha * x + y using namespace ReSolve::memory; @@ -198,14 +198,14 @@ namespace ReSolve * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandler::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x, + void VectorHandler::gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 9ecc50102..74cd3a12c 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -28,13 +28,13 @@ namespace ReSolve ~VectorHandler(); // y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); // dot: x \cdot y real_type dot(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); // scal = alpha * x - void scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace); + void scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); @@ -49,14 +49,14 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x, + void gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, memory::MemorySpace memspace); int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 8f5ca4781..bcd901328 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -68,13 +68,13 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerCpu::scal(const real_type alpha, vector::Vector* x) { real_type* x_data = x->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i) { - x_data[i] *= (*alpha); + x_data[i] *= alpha; } } @@ -110,14 +110,14 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerCpu::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y) { // AXPY: y = alpha * x + y real_type* x_data = x->getData(memory::HOST); real_type* y_data = y->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i) { - y_data[i] = (*alpha) * x_data[i] + y_data[i]; + y_data[i] = alpha * x_data[i] + y_data[i]; } } @@ -137,14 +137,14 @@ namespace ReSolve * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCpu::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerCpu::gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { // x = beta*x + alpha*V*y OR x = beta*x + alpha*V^Ty real_type* V_data = V->getData(memory::HOST); @@ -157,11 +157,11 @@ namespace ReSolve case 'T': for (i = 0; i < k; ++i) { - sum = (*beta) * x_data[i]; + sum = beta * x_data[i]; real_type c = 0.0; for (j = 0; j < n; ++j) { - real_type y = ((*alpha) * V_data[i * n + j] * y_data[j]) - c; + real_type y = (alpha * V_data[i * n + j] * y_data[j]) - c; real_type t = sum + y; c = (t - sum) - y; sum = t; @@ -173,11 +173,11 @@ namespace ReSolve default: for (i = 0; i < n; ++i) { - sum = (*beta) * x_data[i]; + sum = beta * x_data[i]; real_type c = 0.0; for (j = 0; j < k; ++j) { - real_type y = ((*alpha) * V_data[n * j + i] * y_data[j]) - c; + real_type y = (alpha * V_data[n * j + i] * y_data[j]) - c; real_type t = sum + y; c = (t - sum) - y; sum = t; diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index bb67730bb..bb66f3a51 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -23,13 +23,13 @@ namespace ReSolve virtual ~VectorHandlerCpu(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm virtual real_type iamax(vector::Vector* x); @@ -47,14 +47,14 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); virtual int scal(vector::Vector* diag, vector::Vector* vec); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 7e4e394e5..8e4c11461 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -117,12 +117,12 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerCuda::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasDaxpy(handle_cublas, x->getSize(), - alpha, + &alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), @@ -145,14 +145,14 @@ namespace ReSolve * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCuda::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerCuda::gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); switch (transpose) @@ -162,12 +162,12 @@ namespace ReSolve CUBLAS_OP_T, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); return; @@ -176,12 +176,12 @@ namespace ReSolve CUBLAS_OP_N, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); if (transpose != 'N') diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 4201aad76..078206204 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -24,13 +24,13 @@ namespace ReSolve virtual ~VectorHandlerCuda(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, /* const */vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm virtual real_type iamax(vector::Vector* x); @@ -48,14 +48,14 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); /** * @brief scale: scales a vector by a diagonal matrix diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 1ead333d3..0199521e2 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -67,14 +67,19 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerHip::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerHip::scal(const real_type alpha, vector::Vector* x) { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); - rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + + rocblas_status st = rocblas_dscal(handle_rocblas, + x->getSize(), + &alpha, + x->getData(memory::DEVICE), + 1); if (st != 0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; + ReSolve::io::Logger::error() << "scal returned error code " << st << "\n"; } } @@ -112,12 +117,12 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerHip::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerHip::axpy(const real_type alpha, vector::Vector* x, vector::Vector* y) { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_daxpy(handle_rocblas, x->getSize(), - alpha, + &alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), @@ -140,14 +145,14 @@ namespace ReSolve * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerHip::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerHip::gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); switch (transpose) @@ -157,12 +162,12 @@ namespace ReSolve rocblas_operation_transpose, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); return; @@ -171,12 +176,12 @@ namespace ReSolve rocblas_operation_none, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); if (transpose != 'N') diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 298b22f94..1bbc7ec07 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -24,13 +24,13 @@ namespace ReSolve virtual ~VectorHandlerHip(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm virtual real_type iamax(vector::Vector* x); @@ -48,14 +48,14 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); /** * @brief scale: scales a vector by a diagonal matrix diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index af892ff36..e2f1c818a 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -24,13 +24,13 @@ namespace ReSolve } // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) = 0; + virtual void axpy(const real_type alpha, vector::Vector* x, vector::Vector* y) = 0; // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y) = 0; // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x) = 0; + virtual void scal(const real_type alpha, vector::Vector* x) = 0; // iamax = ||x||_\infty virtual real_type iamax(vector::Vector* x) = 0; @@ -51,14 +51,14 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) = 0; + virtual void gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) = 0; }; } // namespace ReSolve diff --git a/tests/functionality/TestHelper.hpp b/tests/functionality/TestHelper.hpp index 04b19d9e0..7dbf4dcf8 100644 --- a/tests/functionality/TestHelper.hpp +++ b/tests/functionality/TestHelper.hpp @@ -469,7 +469,7 @@ class TestHelper ReSolve::memory::MemorySpace memspace) { using namespace ReSolve::constants; - vh_.axpy(&MINUS_ONE, &x_true, &x, memspace); // x := -x_true + x + vh_.axpy(MINUS_ONE, &x_true, &x, memspace); // x := -x_true + x return norm2(x, memspace); } diff --git a/tests/functionality/testLUSOL.cpp b/tests/functionality/testLUSOL.cpp index 64e2f19b6..3ab659808 100644 --- a/tests/functionality/testLUSOL.cpp +++ b/tests/functionality/testLUSOL.cpp @@ -131,7 +131,7 @@ int main(int argc, char* argv[]) real_type normB = sqrt(vector_handler.dot(&vec_rhs, &vec_rhs, ReSolve::memory::HOST)); // Compute vec_diff := vec_diff - vec_x - vector_handler.axpy(&MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); + vector_handler.axpy(MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); // Compute norm of vec_diff real_type normDiffMatrix = sqrt(vector_handler.dot(&vec_diff, &vec_diff, ReSolve::memory::HOST)); @@ -234,7 +234,7 @@ int main(int argc, char* argv[]) normB = sqrt(vector_handler.dot(&vec_rhs, &vec_rhs, ReSolve::memory::HOST)); // Compute vec_diff := vec_diff - vec_x - vector_handler.axpy(&MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); + vector_handler.axpy(MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); // Compute norm of vec_diff normDiffMatrix = sqrt(vector_handler.dot(&vec_diff, &vec_diff, ReSolve::memory::HOST)); diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index 488997636..609ee66e8 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -184,8 +184,13 @@ int main(int argc, char* argv[]) vec_test->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); // compute ||x_diff|| = ||x - x_true|| norm +<<<<<<< HEAD vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler.axpy(&MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); +======= + vec_diff->copyDataFrom(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); +>>>>>>> 6a3875d7 (Pass scalars by value in VectorHandler.) real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); // Compute residual norm ON THE GPU using REFERENCE solution @@ -288,7 +293,7 @@ int main(int argc, char* argv[]) // compute ||x_diff|| = ||x - x_true|| norm vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler.axpy(&MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); + vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); real_type normDiffMatrix2 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); // compute the residual using exact solution diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 1eaeec1e3..9f71eede0 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -125,7 +125,7 @@ namespace ReSolve real_type nrm = handler_.dot(&V, &V, memspace_); nrm = sqrt(nrm); nrm = 1.0 / nrm; - handler_.scal(&nrm, &V, memspace_); + handler_.scal(nrm, &V, memspace_); // Orthogonalize system and verify result GS.orthogonalize(N, &V, H, 0); diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 47ab278a0..5a9768ea4 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -93,7 +93,7 @@ namespace ReSolve real_type alpha = 0.5; // the result is a vector with y[i] = 2.5 forall i; - handler_.axpy(&alpha, &x, &y, memspace_); + handler_.axpy(alpha, &x, &y, memspace_); status *= verifyAnswer(y, 2.5); return status.report(__func__); @@ -141,7 +141,7 @@ namespace ReSolve // the answer is x[i] = 4.375; real_type answer = 4.375; - handler_.scal(&alpha, &x, memspace_); + handler_.scal(alpha, &x, memspace_); status *= verifyAnswer(x, answer); return status.report(__func__); @@ -230,9 +230,9 @@ namespace ReSolve real_type alpha = -1.0; real_type beta = 1.0; - handler_.gemv('N', N, K, &alpha, &beta, &V, &yN, &xN, memspace_); + handler_.gemv('N', N, K, alpha, beta, &V, &yN, &xN, memspace_); status *= verifyAnswer(xN, static_cast(K) + 0.5); - handler_.gemv('T', N, K, &alpha, &beta, &V, &yT, &xT, memspace_); + handler_.gemv('T', N, K, alpha, beta, &V, &yT, &xT, memspace_); status *= verifyAnswer(xT, static_cast(N) + 0.5); return status.report(__func__); From 9ea61144d1f24352b1b8dfb19f91854de364b59b Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 14:15:55 -0500 Subject: [PATCH 05/23] Check (multi)vector sizes in gemv. --- resolve/vector/VectorHandler.cpp | 10 ++++++++++ resolve/vector/VectorHandler.hpp | 13 +++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index ebb382fe6..521ac6a25 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -209,6 +209,16 @@ namespace ReSolve memory::MemorySpace memspace) { using namespace ReSolve::memory; + + // TODO: Remove n as the argument becuase it must always be n = V->getSize() + assert(n == V->getSize() && "gemv: n does not match the number of rows in V"); + + assert((transpose == 'T' && V->getSize() == y->getSize()) && + "gemv: size mismatch! size of V^T does not match size of y"); + + assert((transpose == 'N' && V->getSize() == x->getSize()) && + "gemv: size mismatch! size of V does not match size of x"); + switch (memspace) { case HOST: diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 74cd3a12c..c949c1568 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -43,15 +43,20 @@ namespace ReSolve // Size = n void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + /** + * @brief Dense matrix-vector product. + * + * In Re::Solve applications, gemv is used to compute dot products of + * multivectors. + * + * if `transpose = N` (no), `x := beta*x + alpha*V*y`, * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * if `transpose = T` (yes), `x := beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ void gemv(char transpose, index_type n, - index_type k, + index_type k, // how many vectors from multivector V to use const real_type alpha, const real_type beta, vector::Vector* V, From e789965cf2a0e5117ca9caae68edc4f9a1a2e7f6 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 15:23:30 -0500 Subject: [PATCH 06/23] Update comments in VectorHandler class. --- resolve/vector/VectorHandler.cpp | 85 +++++++++++++++++++++----------- resolve/vector/VectorHandler.hpp | 74 +++++++++++++-------------- 2 files changed, 94 insertions(+), 65 deletions(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 521ac6a25..0b3af3337 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -165,7 +165,10 @@ namespace ReSolve * @param[in] memspace String containg memspace (cpu or cuda or hip) * */ - void VectorHandler::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpy(const real_type alpha, + /* const */ vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace) { // AXPY: y = alpha * x + y using namespace ReSolve::memory; @@ -182,8 +185,16 @@ namespace ReSolve } /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y + * @brief gemv computes dense matrix-vector product. + * + * In Re::Solve applications, gemv is used to compute dot products of + * multivectors. + * + * If `transpose = N` (no), `x := beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * If `transpose = T` (yes), `x := beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + * * * @param[in] Transpose - yes (T) or no (N) * @param[in] n - Number of rows in (non-transposed) matrix @@ -198,14 +209,14 @@ namespace ReSolve * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandler::gemv(char transpose, - index_type n, - index_type k, - const real_type alpha, - const real_type beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x, + void VectorHandler::gemv(char transpose, + index_type n, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; @@ -213,11 +224,9 @@ namespace ReSolve // TODO: Remove n as the argument becuase it must always be n = V->getSize() assert(n == V->getSize() && "gemv: n does not match the number of rows in V"); - assert((transpose == 'T' && V->getSize() == y->getSize()) && - "gemv: size mismatch! size of V^T does not match size of y"); - - assert((transpose == 'N' && V->getSize() == x->getSize()) && - "gemv: size mismatch! size of V does not match size of x"); + // TODO: These assertions should hold but they do not. Investigate more. + // assert((transpose == 'T' && V->getSize() == y->getSize()) && "gemv: size mismatch! size of V^T does not match size of y"); + // assert(((transpose == 'N') && (V->getSize() == x->getSize())) && "gemv: size mismatch! size of V does not match size of x"); switch (memspace) { @@ -232,19 +241,27 @@ namespace ReSolve } /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * @brief Multivector axpy: y: = y + \sum_i alpha_i x_i * * @param[in] size number of elements in y * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k + * @param[in] x (multi)vector [size x k] * @param[in,out] y vector size size x 1 (this is where the result is stored) * @param[in] memspace string containg memspace (cpu or cuda or hip) * * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandler::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace) { + assert(y->getSize() == x->getSize() && "Sizes of x and y must match!\n"); + assert(alpha->getSize() == k && "Size of alpha must match k!\n"); + using namespace ReSolve::memory; switch (memspace) { @@ -259,21 +276,32 @@ namespace ReSolve } /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) - * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * @brief Multivector dot product, i.e V^T x + * + * Computes V^T x with k vectors from multivector V. Result is storred + * in `res`. * * @param[in] size - Number of elements in a single vector in V - * @param[in] V - Multivector; k vectors size n x 1 each - * @param[in] k - Number of vectors in V - * @param[in] x - Multivector; 2 vectors size n x 1 each - * @param[out] res - Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace - String containg memspace (cpu or cuda or hip) + * @param[in] V - Multivector; k vectors of size n x 1 each + * @param[in] k - Number of vectors in V to use + * @param[in] x - Multivector; 2 vectors of size n x 1 each + * @param[out] res - Multivector; 2 vectors size k x 1 each + * @param[in] memspace - String containg memspace (cpu, cuda or hip) * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * @pre _size_ > 0, _k_ > 0, size = x->getSize(). + * @pre _res_ needs to be allocated to k x 2 size. * */ - void VectorHandler::dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace) + void VectorHandler::dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res, + memory::MemorySpace memspace) { + assert(x->getSize() == V->getSize() && "Sizes of V and x do not match!\n"); + assert(res->getSize() == k && "Size of `res` must match k!\n"); + using namespace ReSolve::memory; switch (memspace) { @@ -304,6 +332,7 @@ namespace ReSolve assert(diag->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); assert(vec->getData(memspace) != nullptr && "Vector data is null!\n"); + using namespace ReSolve::memory; switch (memspace) { diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index c949c1568..171b95360 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -16,8 +16,7 @@ namespace ReSolve } // namespace ReSolve namespace ReSolve -{ // namespace vector { - +{ class VectorHandler { public: @@ -27,48 +26,49 @@ namespace ReSolve VectorHandler(LinAlgWorkspaceHIP* new_workspace); ~VectorHandler(); - // y = alpha x + y - void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + // y := alpha x + y + void axpy(const real_type alpha, + /* const */ vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace); - // dot: x \cdot y + // Dot product of two vectors real_type dot(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); - // scal = alpha * x + // Scale vector by scalar void scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace); - // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); - - // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - // Size = n - void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); + // Scale vector by diagonal matrix represented as a vector (i.e., vec = diag*vec) + int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); - /** - * @brief Dense matrix-vector product. - * - * In Re::Solve applications, gemv is used to compute dot products of - * multivectors. - * - * if `transpose = N` (no), `x := beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x := beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - void gemv(char transpose, - index_type n, - index_type k, // how many vectors from multivector V to use - const real_type alpha, - const real_type beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x, + // axpy for multivectors + void axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace); + + // Dot product of two vectors with a multivector V + void dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res, + memory::MemorySpace memspace); + + // Dense matrix-vector product. + void gemv(char transpose, + index_type n, + index_type k, // number of vectors from multivector V to use + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, memory::MemorySpace memspace); - int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); - - /** iamax: - * Returns infinity norm of a vector (i.e., entry with max abs value) - */ + // Vector infinity norm real_type iamax(vector::Vector* x, memory::MemorySpace memspace); bool getIsCudaEnabled() const; @@ -81,6 +81,6 @@ namespace ReSolve bool isCpuEnabled_{false}; bool isCudaEnabled_{false}; bool isHipEnabled_{false}; - }; + }; // class VectorHandler } // namespace ReSolve From e7366a75912ed84522f19f936c57cbf7b993ba02 Mon Sep 17 00:00:00 2001 From: pelesh Date: Wed, 25 Feb 2026 20:32:53 +0000 Subject: [PATCH 07/23] Apply pre-commmit fixes --- resolve/cuda/cudaKernels.cu | 8 ++++---- resolve/vector/VectorHandler.cpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 16 ++++++++-------- resolve/vector/VectorHandlerCuda.cpp | 8 ++++---- resolve/vector/VectorHandlerCuda.hpp | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index 393422708..598850ab2 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -142,10 +142,10 @@ namespace ReSolve */ template __global__ void axpyMulti3(index_type N, - index_type k, - const real_type* x_data, - real_type* y_data, - const real_type* alpha) + index_type k, + const real_type* x_data, + real_type* y_data, + const real_type* alpha) { index_type i = blockIdx.x * blockDim.x + threadIdx.x; index_type t = threadIdx.x; diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 0b3af3337..498512001 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -277,7 +277,7 @@ namespace ReSolve /** * @brief Multivector dot product, i.e V^T x - * + * * Computes V^T x with k vectors from multivector V. Result is storred * in `res`. * diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index bcd901328..515dd2ea0 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -206,10 +206,10 @@ namespace ReSolve * */ void VectorHandlerCpu::axpyMulti(index_type size, - vector::Vector* alpha, - index_type k, - vector::Vector* x, - vector::Vector* y) + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { real_type* alpha_data = alpha->getData(memory::HOST); @@ -243,10 +243,10 @@ namespace ReSolve * */ void VectorHandlerCpu::dot2Multi(index_type size, - vector::Vector* V, - index_type q, - vector::Vector* x, - vector::Vector* res) + vector::Vector* V, + index_type q, + vector::Vector* x, + vector::Vector* res) { real_type* res_data = res->getData(memory::HOST); real_type* x_data = x->getData(memory::HOST); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 8e4c11461..e3af4377a 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -245,10 +245,10 @@ namespace ReSolve * */ void VectorHandlerCuda::dot2Multi(index_type size, - vector::Vector* V, - index_type k, - vector::Vector* x, - vector::Vector* res) + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res) { using namespace constants; diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 078206204..e0b6c439d 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -24,7 +24,7 @@ namespace ReSolve virtual ~VectorHandlerCuda(); // y = alpha x + y - virtual void axpy(const real_type alpha, /* const */vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); From 37077f5c22016019d917220b54ad99dd07d46ed4 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 16:24:14 -0500 Subject: [PATCH 08/23] Remove unused code. --- examples/randGmres.cpp | 1 - resolve/CMakeLists.txt | 1 - resolve/LinSolverDirectSerialILU0.cpp | 386 -------------------------- resolve/LinSolverDirectSerialILU0.hpp | 59 ---- resolve/RefactorizationSolver.cpp | 8 - resolve/RefactorizationSolver.hpp | 36 --- resolve/SystemSolver.cpp | 3 +- 7 files changed, 1 insertion(+), 493 deletions(-) delete mode 100644 resolve/LinSolverDirectSerialILU0.cpp delete mode 100644 resolve/LinSolverDirectSerialILU0.hpp delete mode 100644 resolve/RefactorizationSolver.cpp delete mode 100644 resolve/RefactorizationSolver.hpp diff --git a/examples/randGmres.cpp b/examples/randGmres.cpp index 59c7e5bdb..f8ec86a54 100644 --- a/examples/randGmres.cpp +++ b/examples/randGmres.cpp @@ -5,7 +5,6 @@ #include "ExampleHelper.hpp" #include #include -#include #include #include #include diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 07dbf7c28..628963c8b 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -18,7 +18,6 @@ set(ReSolve_SRC LinSolverIterativeFGMRES.cpp LinSolverDirectCpuILU0.cpp LinSolverIterativeRandFGMRES.cpp - LinSolverDirectSerialILU0.cpp SystemSolver.cpp Preconditioner.cpp PreconditionerLU.cpp diff --git a/resolve/LinSolverDirectSerialILU0.cpp b/resolve/LinSolverDirectSerialILU0.cpp deleted file mode 100644 index 0785c38ed..000000000 --- a/resolve/LinSolverDirectSerialILU0.cpp +++ /dev/null @@ -1,386 +0,0 @@ -#include "LinSolverDirectSerialILU0.hpp" - -#include - -#include -#include -#include - -namespace ReSolve -{ - using out = io::Logger; - - LinSolverDirectSerialILU0::LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace) - { - workspace_ = workspace; - } - - LinSolverDirectSerialILU0::~LinSolverDirectSerialILU0() - { - if (owns_factors_) - { - delete L_; - delete U_; - L_ = nullptr; - U_ = nullptr; - } - delete[] h_aux1_; - delete[] h_ILU_vals_; - } - - int LinSolverDirectSerialILU0::setup(matrix::Sparse* A, - matrix::Sparse*, - matrix::Sparse*, - index_type*, - index_type*, - vector_type*) - { - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnz(); - - h_ILU_vals_ = new real_type[nnz]; - h_aux1_ = new real_type[n]; - - index_type zero_pivot = 0; // no zero pivot - - // copy A values to a buffer first - for (index_type i = 0; i < nnz; ++i) - { - h_ILU_vals_[i] = A_->getValues(ReSolve::memory::HOST)[i]; - } - - // allocate helper variables that make this code fast - index_type* u_ptr = new index_type[n]; - index_type* ja_mapper = new index_type[n]; - - // aux scalars for indexing etc - index_type k, j, jw, j1, j2; - - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) - { - ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = j; - } - // IJK ILU - j = j1; - while (j < j2) - { - k = A_->getColData(ReSolve::memory::HOST)[j]; - if (k < i) - { - h_ILU_vals_[j] /= h_ILU_vals_[u_ptr[k]]; - for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) - { - jw = ja_mapper[A_->getColData(ReSolve::memory::HOST)[jj]]; - if (jw != 0) - { - h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; - } - } - } - else - { - break; - } - j++; - } - u_ptr[i] = j; - if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) - { - zero_pivot = -1; // zero pivot is in place (i,i) on the diagonal - return zero_pivot; - } - // reset mapper - for (index_type j = j1; j < j2; ++j) - { - ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = 0; - } - } - - // clean up - delete[] ja_mapper; - delete[] u_ptr; - - // split into L and U! - index_type nnzL = 0, nnzU = 0; - // the diagonal values GO TO U, L has 1s on the diagonal - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) - { - if (A->getColData(ReSolve::memory::HOST)[j] == i) - { - // diagonal, add to both - nnzL++; - nnzU++; - } - if (A->getColData(ReSolve::memory::HOST)[j] > i) - { - // upper part - nnzU++; - } - if (A->getColData(ReSolve::memory::HOST)[j] < i) - { - // lower part - nnzL++; - } - } - } - // TODO: What is the purpose of nnzL and nnzU if they are not used after this? - // allocate L and U - - L_ = new matrix::Csr(n, n, nnzL, false, true); - U_ = new matrix::Csr(n, n, nnzU, false, true); - owns_factors_ = true; - - L_->allocateMatrixData(ReSolve::memory::HOST); - U_->allocateMatrixData(ReSolve::memory::HOST); - index_type lit = 0, uit = 0, kL, kU; - L_->getRowData(ReSolve::memory::HOST)[0] = 0; - U_->getRowData(ReSolve::memory::HOST)[0] = 0; - - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - kL = 0; - kU = 0; - for (index_type j = j1; j < j2; ++j) - { - - if (A->getColData(ReSolve::memory::HOST)[j] == i) - { - // diagonal, add to both - - L_->getValues(ReSolve::memory::HOST)[lit] = 1.0; - U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; - - L_->getColData(ReSolve::memory::HOST)[lit] = i; - U_->getColData(ReSolve::memory::HOST)[uit] = i; - - lit++; - uit++; - kL++; - kU++; - } - - if (A->getColData(ReSolve::memory::HOST)[j] > i) - { - // upper part - - U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; - U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; - ; - - uit++; - kU++; - } - - if (A->getColData(ReSolve::memory::HOST)[j] < i) - { - // lower part - L_->getValues(ReSolve::memory::HOST)[lit] = h_ILU_vals_[j]; - L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; - - lit++; - kL++; - } - } - // update row pointers (offsets) - L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; - U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; - } - - return zero_pivot; - } - - int LinSolverDirectSerialILU0::reset(matrix::Sparse* A) - { - return this->setup(A); - } - - // solution is returned in RHS - int LinSolverDirectSerialILU0::solve(vector_type* rhs) - { - int error_sum = 0; - // printf("solve t 1\n"); - // h_aux1 = L^{-1} rhs - for (index_type i = 0; i < L_->getNumRows(); ++i) - { - h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) - { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; - } - h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; - } - - // rhs = U^{-1} h_aux1 - - for (index_type i = A_->getNumRows() - 1; i >= 0; --i) - { - rhs->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) - { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - rhs->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * rhs->getData(ReSolve::memory::HOST)[col]; - } - rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry - } - - return error_sum; - } - - int LinSolverDirectSerialILU0::solve(vector_type* rhs, vector_type* x) - { - // printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); - int error_sum = 0; - // h_aux1 = L^{-1} rhs - // for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); - for (index_type i = 0; i < L_->getNumRows(); ++i) - { - h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) - { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; - } - h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; - } - - // for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); - // x = U^{-1} h_aux1 - - for (index_type i = U_->getNumRows() - 1; i >= 0; --i) - { - x->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) - { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; - } - x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry - } - // for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); - return error_sum; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to set. - * @return int Error code. - */ - int LinSolverDirectSerialILU0::setCliParam(const std::string id, const std::string /* value */) - { - switch (getParamId(id)) - { - default: - std::cout << "Setting parameter failed!\n"; - } - return 0; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return std::string Value of the string parameter to return. - */ - std::string LinSolverDirectSerialILU0::getCliParamString(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; - } - return ""; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return int Value of the int parameter to return. - */ - index_type LinSolverDirectSerialILU0::getCliParamInt(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; - } - return -1; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return real_type Value of the real_type parameter to return. - */ - real_type LinSolverDirectSerialILU0::getCliParamReal(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; - } - return std::numeric_limits::quiet_NaN(); - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return bool Value of the bool parameter to return. - */ - bool LinSolverDirectSerialILU0::getCliParamBool(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; - } - return false; - } - - int LinSolverDirectSerialILU0::printCliParam(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; - } - return 0; - } - -} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.hpp b/resolve/LinSolverDirectSerialILU0.hpp deleted file mode 100644 index f69aa2d93..000000000 --- a/resolve/LinSolverDirectSerialILU0.hpp +++ /dev/null @@ -1,59 +0,0 @@ - -#pragma once - -#include "Common.hpp" -#include -#include -#include - -namespace ReSolve -{ - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } - - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } - - class LinSolverDirectSerialILU0 : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); - ~LinSolverDirectSerialILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - int reset(matrix::Sparse* A) override; - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - // MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool owns_factors_{false}; ///< If the class owns L and U factors - - real_type* h_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep - // the values of ILU decomposition. - real_type* h_ILU_vals_{nullptr}; - }; -} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.cpp b/resolve/RefactorizationSolver.cpp deleted file mode 100644 index 0b1fab8f5..000000000 --- a/resolve/RefactorizationSolver.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "RefactorizationSolver.hpp" - -namespace ReSolve -{ - RefactorizationSolver::RefactorizationSolver() - { - } -} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.hpp b/resolve/RefactorizationSolver.hpp deleted file mode 100644 index 482cb74cc..000000000 --- a/resolve/RefactorizationSolver.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include - -namespace ReSolve -{ - RefactorizationSolver - { - using vector_type = vector::Vector; - - public: - RefactorizationSolver(); - ~RefactorizationSolver(); - int setup(std::string first_solver, - std::string refact_solver_, - std::string use_ir_); - - int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); - - int solve(matrix::Sparse * A, vector_type * vec_rhs, vector_type * vec_x); - - private: - std::string first_solver_name_; - std::string refact_solver_name_; - std::string use_ir_; - // IR parameters - real_type ir_tol_; - index_type ir_maxit_; - index_type ir_gs_; - - LinSolverDirect* first_solver_; - LinSolverDirect* refact_solver_; - LinSolverIterative* ir_solver_; - bool factorization_exists_; - }; -} // namespace ReSolve diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 17773fb20..9c380b1a9 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -319,7 +318,7 @@ namespace ReSolve { if (memspace_ == "cpu") { - preconditionSolver_ = new LinSolverDirectSerialILU0(workspaceCpu_); + preconditionSolver_ = new LinSolverDirectCpuILU0(workspaceCpu_); preconditioner_ = new PreconditionerLU(preconditionSolver_); #ifdef RESOLVE_USE_CUDA } From 81ad4b19b85a7fe81f2c411143dfed305c783d7f Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 16:28:18 -0500 Subject: [PATCH 09/23] [skip ci] Update CHANGELOG. --- .github/pull_request_template.md | 3 +-- CHANGELOG.md | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5fbff7ac9..d133ced0d 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,8 +26,7 @@ - [ ] CPU backend - [ ] CUDA backend - [ ] HIP backend -- [ ] I have manually run the non-experimental examples and verified that residuals are close to machine precision. (In your build directory run: -`./examples/.exe -h` to get instructions how to run examples). Code tested on: +- [ ] I have manually run the non-experimental examples and verified that residuals are close to machine precision. (In your build directory run: `./examples/.exe -h` to get instructions how to run examples). Code tested on: - [ ] CPU backend - [ ] CUDA backend - [ ] HIP backend diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aac751e2..e5e08eeec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ - Reworked templates to include example tests. -- Removed unnecessary full facotorization in the examples and made the input 1 based. +- Removed unnecessary full facotorization in the examples. + +- Update method names in `VectorHandler` to match BLAS naming. - Added `cons` counterparts to `Vector::getData` methods. From 9229a9068b2a2b6a49f81dcb4df441b0eccc3f3c Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 21:29:09 -0500 Subject: [PATCH 10/23] Change iamax --> amax. --- examples/ExampleHelper.hpp | 8 ++++---- resolve/SystemSolver.cpp | 4 ++-- resolve/vector/VectorHandler.cpp | 6 +++--- resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 2 +- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 2 +- resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 2 +- resolve/vector/VectorHandlerHip.hpp | 2 +- resolve/vector/VectorHandlerImpl.hpp | 4 ++-- tests/functionality/TestHelper.hpp | 4 ++-- tests/functionality/testSysGLU.cpp | 8 ++++---- tests/unit/vector/VectorHandlerTests.hpp | 4 ++-- tests/unit/vector/runVectorHandlerTests.cpp | 6 +++--- 15 files changed, 29 insertions(+), 29 deletions(-) diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index 238100fe5..3d569371c 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -226,8 +226,8 @@ namespace ReSolve // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.iamax(x_, memspace_); - real_type inf_norm_res = vh_.iamax(res_, memspace_); + real_type inf_norm_x = vh_.amax(x_, memspace_); + real_type inf_norm_res = vh_.amax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -318,8 +318,8 @@ namespace ReSolve // Compute norm of scaled residuals mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); - inf_norm_x_ = vh_.iamax(x_, memspace_); - inf_norm_res_ = vh_.iamax(res_, memspace_); + inf_norm_x_ = vh_.amax(x_, memspace_); + inf_norm_res_ = vh_.amax(res_, memspace_); nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); } diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 9c380b1a9..f7a4acf88 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -875,8 +875,8 @@ namespace ReSolve } matrixHandler_->setValuesChanged(true, ms); matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUS_ONE, ms); - resnorm = vectorHandler_->iamax(resVector_, ms); - norm_x = vectorHandler_->iamax(x, ms); + resnorm = vectorHandler_->amax(resVector_, ms); + norm_x = vectorHandler_->amax(x, ms); matrixHandler_->matrixInfNorm(A_, &norm_A, ms); return resnorm / (norm_x * norm_A); } diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 498512001..4beeb83ff 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -141,16 +141,16 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandler::iamax(vector::Vector* x, memory::MemorySpace memspace) + real_type VectorHandler::amax(vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->iamax(x); + return cpuImpl_->amax(x); break; case DEVICE: - return devImpl_->iamax(x); + return devImpl_->amax(x); break; } return -1.0; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 171b95360..5cd62315c 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -69,7 +69,7 @@ namespace ReSolve memory::MemorySpace memspace); // Vector infinity norm - real_type iamax(vector::Vector* x, memory::MemorySpace memspace); + real_type amax(vector::Vector* x, memory::MemorySpace memspace); bool getIsCudaEnabled() const; bool getIsHipEnabled() const; diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 515dd2ea0..d05a608bb 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -86,7 +86,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCpu::iamax(vector::Vector* x) + real_type VectorHandlerCpu::amax(vector::Vector* x) { real_type* x_data = x->getData(memory::HOST); real_type vecmax = std::abs(x_data[0]); diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index bb66f3a51..337bf5fd0 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -32,7 +32,7 @@ namespace ReSolve virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type iamax(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index e3af4377a..f54fe3ff4 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -85,7 +85,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCuda::iamax(vector::Vector* x) + real_type VectorHandlerCuda::amax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index e0b6c439d..9dcaebf1d 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -33,7 +33,7 @@ namespace ReSolve virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type iamax(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 0199521e2..d94347a31 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -91,7 +91,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerHip::iamax(vector::Vector* x) + real_type VectorHandlerHip::amax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 1bbc7ec07..7b5437871 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -33,7 +33,7 @@ namespace ReSolve virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type iamax(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index e2f1c818a..eaf123fbf 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -32,8 +32,8 @@ namespace ReSolve // scal = alpha * x virtual void scal(const real_type alpha, vector::Vector* x) = 0; - // iamax = ||x||_\infty - virtual real_type iamax(vector::Vector* x) = 0; + // amax = ||x||_\infty + virtual real_type amax(vector::Vector* x) = 0; // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; diff --git a/tests/functionality/TestHelper.hpp b/tests/functionality/TestHelper.hpp index 7dbf4dcf8..0b7bf6e9d 100644 --- a/tests/functionality/TestHelper.hpp +++ b/tests/functionality/TestHelper.hpp @@ -305,8 +305,8 @@ class TestHelper // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.iamax(x_, memspace_); - real_type inf_norm_res = vh_.iamax(res_, memspace_); + real_type inf_norm_x = vh_.amax(x_, memspace_); + real_type inf_norm_res = vh_.amax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index 609ee66e8..e703c6b46 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -153,8 +153,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals real_type inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - real_type inf_norm_x = vector_handler.iamax(vec_x, ReSolve::memory::DEVICE); - real_type inf_norm_r = vector_handler.iamax(vec_r, ReSolve::memory::DEVICE); + real_type inf_norm_x = vector_handler.amax(vec_x, ReSolve::memory::DEVICE); + real_type inf_norm_r = vector_handler.amax(vec_r, ReSolve::memory::DEVICE); real_type nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); real_type nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -273,8 +273,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals for the second system inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - inf_norm_x = vector_handler.iamax(vec_x, ReSolve::memory::DEVICE); - inf_norm_r = vector_handler.iamax(vec_r, ReSolve::memory::DEVICE); + inf_norm_x = vector_handler.amax(vec_x, ReSolve::memory::DEVICE); + inf_norm_r = vector_handler.amax(vec_r, ReSolve::memory::DEVICE); nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); error = std::abs(nsr_system - nsr_norm) / nsr_norm; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 5a9768ea4..07d51e825 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -49,7 +49,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome iamax(index_type N) + TestOutcome amax(index_type N) { TestStatus status; status = true; @@ -63,7 +63,7 @@ namespace ReSolve } x.copyFromExternal(data, memory::HOST, memspace_); - real_type result = handler_.iamax(&x, memspace_); + real_type result = handler_.amax(&x, memspace_); real_type answer = static_cast(N - 1) * 0.1; if (!isEqual(result, answer)) diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 9f3091820..0d4e02983 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -20,7 +20,7 @@ int main(int, char**) result += test.dot(50); result += test.axpy(50); result += test.scal(50); - result += test.iamax(50); + result += test.amax(50); result += test.gemv(5000, 10); result += test.axpyMulti(100, 10); result += test.massDot(100, 10); @@ -46,7 +46,7 @@ int main(int, char**) result += test.axpyMulti(1000, 30); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.iamax(1000); + result += test.amax(1000); result += test.scale(1000); std::cout << "\n"; @@ -70,7 +70,7 @@ int main(int, char**) result += test.axpyMulti(1000, 300); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.iamax(1000); + result += test.amax(1000); result += test.scale(1000); std::cout << "\n"; From 5e71c8fdcf30efe999cc7541ed70fd9a0e733231 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 25 Feb 2026 22:23:38 -0500 Subject: [PATCH 11/23] Vector handler methods set update flags correctly. --- resolve/vector/VectorHandlerCpu.cpp | 39 ++++++++++++++---------- resolve/vector/VectorHandlerCuda.cpp | 32 ++++++++++++++++---- resolve/vector/VectorHandlerHip.cpp | 45 ++++++++++++++++++++++------ 3 files changed, 85 insertions(+), 31 deletions(-) diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index d05a608bb..1cd6cc8c2 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -45,10 +45,10 @@ namespace ReSolve real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) { - real_type* x_data = x->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - real_type sum = 0.0; - real_type c = 0.0; + const real_type* x_data = x->getData(memory::HOST); + const real_type* y_data = y->getData(memory::HOST); + real_type sum = 0.0; + real_type c = 0.0; // real_type t, y; for (int i = 0; i < x->getSize(); ++i) { @@ -76,6 +76,7 @@ namespace ReSolve { x_data[i] *= alpha; } + x->setDataUpdated(memory::HOST); } /** @@ -88,9 +89,10 @@ namespace ReSolve */ real_type VectorHandlerCpu::amax(vector::Vector* x) { - real_type* x_data = x->getData(memory::HOST); - real_type vecmax = std::abs(x_data[0]); - real_type v; + const real_type* x_data = x->getData(memory::HOST); + + real_type vecmax = std::abs(x_data[0]); + real_type v; for (int i = 1; i < x->getSize(); ++i) { v = std::abs(x_data[i]); @@ -119,6 +121,7 @@ namespace ReSolve { y_data[i] = alpha * x_data[i] + y_data[i]; } + y->setDataUpdated(memory::HOST); } /** @@ -147,9 +150,10 @@ namespace ReSolve vector::Vector* x) { // x = beta*x + alpha*V*y OR x = beta*x + alpha*V^Ty - real_type* V_data = V->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - real_type* x_data = x->getData(memory::HOST); + const real_type* V_data = V->getData(memory::HOST); + const real_type* y_data = y->getData(memory::HOST); + real_type* x_data = x->getData(memory::HOST); + index_type i, j; real_type sum; switch (transpose) @@ -192,6 +196,7 @@ namespace ReSolve } break; } // switch + x->setDataUpdated(memory::HOST); } /** @@ -227,6 +232,7 @@ namespace ReSolve } y_data[i] = y_data[i] - sum; } + y->setDataUpdated(memory::HOST); } /** @@ -248,9 +254,9 @@ namespace ReSolve vector::Vector* x, vector::Vector* res) { - real_type* res_data = res->getData(memory::HOST); - real_type* x_data = x->getData(memory::HOST); - real_type* V_data = V->getData(memory::HOST); + real_type* res_data = res->getData(memory::HOST); + const real_type* x_data = x->getData(memory::HOST); + const real_type* V_data = V->getData(memory::HOST); real_type c0 = 0.0; real_type cq = 0.0; @@ -274,6 +280,7 @@ namespace ReSolve res_data[i + q] = tq; } } + res->setDataUpdated(memory::HOST); } /** @@ -286,9 +293,9 @@ namespace ReSolve */ int VectorHandlerCpu::scal(vector::Vector* diag, vector::Vector* vec) { - real_type* diag_data = diag->getData(memory::HOST); - real_type* vec_data = vec->getData(memory::HOST); - index_type n = vec->getSize(); + const real_type* diag_data = diag->getData(memory::HOST); + real_type* vec_data = vec->getData(memory::HOST); + index_type n = vec->getSize(); for (index_type i = 0; i < n; ++i) { diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index f54fe3ff4..90f46e780 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -51,8 +51,15 @@ namespace ReSolve real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); - double nrm = 0.0; - cublasStatus_t st = cublasDdot(handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + + double nrm{0.0}; + cublasStatus_t st = cublasDdot(handle_cublas, + x->getSize(), + x->getData(memory::DEVICE), + 1, + y->getData(memory::DEVICE), + 1, + &nrm); if (st != 0) { out::error() << "Dot product failed with error code " << st << "\n"; @@ -73,8 +80,9 @@ namespace ReSolve cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); if (st != 0) { - out::error() << "scal crashed with code " << st << "\n"; + out::error() << "scal returned error code " << st << "\n"; } + x->setDataUpdated(memory::DEVICE); } /** @@ -95,7 +103,7 @@ namespace ReSolve workspace_->setNormBuffer(buffer); workspace_->setNormBufferState(true); } - real_type norm; + real_type norm{0.0}; // TODO: Shouldn't the return type be cusolverStatus_t ? int status = cusolverSpDnrminf(workspace_->getCusolverSpHandle(), x->getSize(), @@ -127,6 +135,7 @@ namespace ReSolve 1, y->getData(memory::DEVICE), 1); + y->setDataUpdated(memory::DEVICE); } /** @@ -190,6 +199,7 @@ namespace ReSolve << " in gemv. Using non-transposed multivector.\n"; } } + x->setDataUpdated(memory::DEVICE); } /** @@ -203,12 +213,20 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCuda::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerCuda::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { using namespace constants; if (k < 200) { - cuda::mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + cuda::mass_axpy(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -228,6 +246,7 @@ namespace ReSolve y->getData(memory::DEVICE), // c size); // ldc } + y->setDataUpdated(memory::DEVICE); } /** @@ -279,6 +298,7 @@ namespace ReSolve res->getData(memory::DEVICE), // c k); // ldc } + res->setDataUpdated(memory::DEVICE); } /** diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index d94347a31..dbe5a0cb4 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -51,11 +51,17 @@ namespace ReSolve { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); double nrm = 0.0; - rocblas_status st = rocblas_ddot(handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + rocblas_status st = rocblas_ddot(handle_rocblas, + x->getSize(), + x->getData(memory::DEVICE), + 1, + y->getData(memory::DEVICE), + 1, + &nrm); if (st != 0) { - printf("dot product crashed with code %d \n", st); + out::error() << "dot product returned error code " << st << "\n"; } return nrm; } @@ -76,11 +82,11 @@ namespace ReSolve &alpha, x->getData(memory::DEVICE), 1); - if (st != 0) { - ReSolve::io::Logger::error() << "scal returned error code " << st << "\n"; + out::error() << "scal returned error code " << st << "\n"; } + x->setDataUpdated(memory::DEVICE); } /** @@ -101,7 +107,7 @@ namespace ReSolve workspace_->setNormBuffer(buffer); workspace_->setNormBufferState(true); } - real_type norm; + real_type norm{0.0}; hip::vector_inf_norm(x->getSize(), x->getData(memory::DEVICE), workspace_->getNormBuffer(), @@ -127,6 +133,7 @@ namespace ReSolve 1, y->getData(memory::DEVICE), 1); + y->setDataUpdated(memory::DEVICE); } /** @@ -190,6 +197,7 @@ namespace ReSolve << " in gemv. Using non-transposed multivector.\n"; } } + x->setDataUpdated(memory::DEVICE); } /** @@ -203,12 +211,20 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerHip::axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerHip::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { using namespace constants; if (k < 200) { - hip::mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + hip::mass_axpy(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -228,6 +244,7 @@ namespace ReSolve y->getData(memory::DEVICE), // c size); // ldc } + y->setDataUpdated(memory::DEVICE); } /** @@ -244,13 +261,22 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerHip::dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + void VectorHandlerHip::dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res) { using namespace constants; if (k < 200) { - hip::mass_inner_product_two_vectors(size, k, x->getData(0, memory::DEVICE), x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); + hip::mass_inner_product_two_vectors(size, + k, + x->getData(0, memory::DEVICE), + x->getData(1, memory::DEVICE), + V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); } else { @@ -270,6 +296,7 @@ namespace ReSolve res->getData(memory::DEVICE), // c k); // ldc } + res->setDataUpdated(memory::DEVICE); } /** From 7b14ca152e6bb89b3e545f1794727c9de70b1a5a Mon Sep 17 00:00:00 2001 From: Shaked Regev <35384901+shakedregev@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:32:59 -0500 Subject: [PATCH 12/23] Apply suggestion from @shakedregev Remove ambiguity with Oxford comma --- resolve/vector/VectorHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 4beeb83ff..26d885e74 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -286,7 +286,7 @@ namespace ReSolve * @param[in] k - Number of vectors in V to use * @param[in] x - Multivector; 2 vectors of size n x 1 each * @param[out] res - Multivector; 2 vectors size k x 1 each - * @param[in] memspace - String containg memspace (cpu, cuda or hip) + * @param[in] memspace - String containg memspace (cpu, cuda, or hip) * * @pre _size_ > 0, _k_ > 0, size = x->getSize(). * @pre _res_ needs to be allocated to k x 2 size. From a68f558735f33edbca674d2b5f7abe055ac69004 Mon Sep 17 00:00:00 2001 From: pelesh Date: Thu, 26 Feb 2026 21:51:03 +0000 Subject: [PATCH 13/23] Fix scal function signature in CUDA implementation. --- resolve/vector/VectorHandlerCuda.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 90f46e780..4456e41f3 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -74,10 +74,14 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerCuda::scal(const real_type alpha, vector::Vector* x) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + cublasStatus_t st = cublasDscal(handle_cublas, + x->getSize(), + &alpha, + x->getData(memory::DEVICE), + 1); if (st != 0) { out::error() << "scal returned error code " << st << "\n"; From 9a9691a4fe36a4f6c9869364716735e183cab630 Mon Sep 17 00:00:00 2001 From: pelesh Date: Thu, 26 Feb 2026 22:14:52 +0000 Subject: [PATCH 14/23] Harmonize CUDA and HIP kernel names with calling functions. --- resolve/cuda/cudaKernels.cu | 4 ++-- resolve/cuda/cudaKernels.h | 14 +++++++------- resolve/hip/hipKernels.h | 14 +++++++------- resolve/hip/hipKernels.hip | 14 +++++++------- resolve/vector/VectorHandlerCuda.cpp | 22 +++++++++++----------- resolve/vector/VectorHandlerHip.cpp | 22 +++++++++++----------- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index 598850ab2..1cdb39da1 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -287,7 +287,7 @@ namespace ReSolve * value of Tv5? * @todo Should we use dynamic shared memory here instead? */ - void mass_inner_product_two_vectors(index_type n, + void dot_2_multi(index_type n, index_type i, const real_type* vec1, const real_type* vec2, @@ -306,7 +306,7 @@ namespace ReSolve * @param[out] y - (n x (i+1)) multivector * @param[in] alpha - ((i+1) x 1) vector */ - void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha) + void axpy_multi(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha) { kernels::axpyMulti3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha); } diff --git a/resolve/cuda/cudaKernels.h b/resolve/cuda/cudaKernels.h index 037a3ae8e..bcc6da834 100644 --- a/resolve/cuda/cudaKernels.h +++ b/resolve/cuda/cudaKernels.h @@ -15,14 +15,14 @@ namespace ReSolve { namespace cuda { - void mass_inner_product_two_vectors(index_type n, - index_type i, - const real_type* vec1, - const real_type* vec2, - const real_type* mvec, - real_type* result); + void dot_2_multi(index_type n, + index_type i, + const real_type* vec1, + const real_type* vec2, + const real_type* mvec, + real_type* result); - void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha); + void axpy_multi(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha); void leftScale(index_type n, const index_type* a_row_ptr, diff --git a/resolve/hip/hipKernels.h b/resolve/hip/hipKernels.h index 4308ac589..9c8aaf3b6 100644 --- a/resolve/hip/hipKernels.h +++ b/resolve/hip/hipKernels.h @@ -16,13 +16,13 @@ namespace ReSolve { namespace hip { - void mass_inner_product_two_vectors(index_type n, - index_type i, - real_type* vec1, - real_type* vec2, - real_type* mvec, - real_type* result); - void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha); + void dot_2_multi(index_type n, + index_type i, + real_type* vec1, + real_type* vec2, + real_type* mvec, + real_type* result); + void axpy_multi(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha); void leftScale(index_type n, const index_type* a_row_ptr, diff --git a/resolve/hip/hipKernels.hip b/resolve/hip/hipKernels.hip index 04fe0c063..a6e8314c5 100644 --- a/resolve/hip/hipKernels.hip +++ b/resolve/hip/hipKernels.hip @@ -388,12 +388,12 @@ namespace ReSolve { * value of Tv5? * @todo Should we use dynamic shared memory here instead? */ - void mass_inner_product_two_vectors(index_type n, - index_type i, - real_type* vec1, - real_type* vec2, - real_type* mvec, - real_type* result) + void dot_2_multi(index_type n, + index_type i, + real_type* vec1, + real_type* vec2, + real_type* mvec, + real_type* result) { hipLaunchKernelGGL(kernels::MassIPTwoVec_kernel, dim3(i), @@ -417,7 +417,7 @@ namespace ReSolve { * @param[out] y - (n x (i+1)) multivector * @param[in] alpha - ((i+1) x 1) vector */ - void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha) + void axpy_multi(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha) { hipLaunchKernelGGL(kernels::axpyMulti3_kernel, dim3((n + 384 - 1) / 384), diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 4456e41f3..81fa8cbcc 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -226,11 +226,11 @@ namespace ReSolve using namespace constants; if (k < 200) { - cuda::mass_axpy(size, - k, - x->getData(memory::DEVICE), - y->getData(memory::DEVICE), - alpha->getData(memory::DEVICE)); + cuda::axpy_multi(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -277,12 +277,12 @@ namespace ReSolve if (k < 200) { - cuda::mass_inner_product_two_vectors(size, - k, - x->getData(0, memory::DEVICE), - x->getData(1, memory::DEVICE), - V->getData(memory::DEVICE), - res->getData(memory::DEVICE)); + cuda::dot_2_multi(size, + k, + x->getData(0, memory::DEVICE), + x->getData(1, memory::DEVICE), + V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); } else { diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index dbe5a0cb4..677323b01 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -220,11 +220,11 @@ namespace ReSolve using namespace constants; if (k < 200) { - hip::mass_axpy(size, - k, - x->getData(memory::DEVICE), - y->getData(memory::DEVICE), - alpha->getData(memory::DEVICE)); + hip::axpy_multi(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -271,12 +271,12 @@ namespace ReSolve if (k < 200) { - hip::mass_inner_product_two_vectors(size, - k, - x->getData(0, memory::DEVICE), - x->getData(1, memory::DEVICE), - V->getData(memory::DEVICE), - res->getData(memory::DEVICE)); + hip::dot_2_multi(size, + k, + x->getData(0, memory::DEVICE), + x->getData(1, memory::DEVICE), + V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); } else { From 5e6540ea5f1207a38e8810940244125e4bedaed6 Mon Sep 17 00:00:00 2001 From: pelesh Date: Thu, 26 Feb 2026 22:15:44 +0000 Subject: [PATCH 15/23] Apply pre-commmit fixes --- resolve/cuda/cudaKernels.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index 1cdb39da1..c6081012c 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -288,11 +288,11 @@ namespace ReSolve * @todo Should we use dynamic shared memory here instead? */ void dot_2_multi(index_type n, - index_type i, - const real_type* vec1, - const real_type* vec2, - const real_type* mvec, - real_type* result) + index_type i, + const real_type* vec1, + const real_type* vec2, + const real_type* mvec, + real_type* result) { kernels::MassIPTwoVec<<>>(vec1, vec2, mvec, result, i, n); } From 2de6cbd66994b5e6af93a45a33dc2d911ed9bfce Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 2 Mar 2026 20:59:52 -0500 Subject: [PATCH 16/23] Add correct assert statements to Vector::gemv. --- resolve/vector/VectorHandler.cpp | 15 ++++++++------- resolve/vector/VectorHandlerCpu.cpp | 12 +++++++++++- resolve/vector/VectorHandlerCuda.cpp | 12 +++++++++++- resolve/vector/VectorHandlerHip.cpp | 12 +++++++++++- 4 files changed, 41 insertions(+), 10 deletions(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 26d885e74..d507032ab 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -195,10 +195,9 @@ namespace ReSolve * If `transpose = T` (yes), `x := beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. * - * * @param[in] Transpose - yes (T) or no (N) * @param[in] n - Number of rows in (non-transposed) matrix - * @param[in] k - Number of columns in (non-transposed) + * @param[in] k - Number of columns in (non-transposed) matrix to use * @param[in] alpha - Constant real number * @param[in] beta - Constant real number * @param[in] V - Multivector containing the matrix, organized columnwise @@ -206,7 +205,13 @@ namespace ReSolve * @param[in,out] x - Vector, n x 1 if N and k x 1 if T * @param[in] memspace - cpu or cuda or hip (for now) * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ void VectorHandler::gemv(char transpose, @@ -224,10 +229,6 @@ namespace ReSolve // TODO: Remove n as the argument becuase it must always be n = V->getSize() assert(n == V->getSize() && "gemv: n does not match the number of rows in V"); - // TODO: These assertions should hold but they do not. Investigate more. - // assert((transpose == 'T' && V->getSize() == y->getSize()) && "gemv: size mismatch! size of V^T does not match size of y"); - // assert(((transpose == 'N') && (V->getSize() == x->getSize())) && "gemv: size mismatch! size of V does not match size of x"); - switch (memspace) { case HOST: diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 1cd6cc8c2..6075fd3e6 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -137,7 +137,13 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ void VectorHandlerCpu::gemv(char transpose, @@ -159,6 +165,8 @@ namespace ReSolve switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); for (i = 0; i < k; ++i) { sum = beta * x_data[i]; @@ -175,6 +183,8 @@ namespace ReSolve } break; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); for (i = 0; i < n; ++i) { sum = beta * x_data[i]; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 81fa8cbcc..5ed3d7f74 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -155,7 +155,13 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ void VectorHandlerCuda::gemv(char transpose, @@ -171,6 +177,8 @@ namespace ReSolve switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); cublasDgemv(handle_cublas, CUBLAS_OP_T, n, @@ -185,6 +193,8 @@ namespace ReSolve 1); return; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); cublasDgemv(handle_cublas, CUBLAS_OP_N, n, diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 677323b01..1241b9b0a 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -149,7 +149,13 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ void VectorHandlerHip::gemv(char transpose, @@ -165,6 +171,8 @@ namespace ReSolve switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); rocblas_dgemv(handle_rocblas, rocblas_operation_transpose, n, @@ -179,6 +187,8 @@ namespace ReSolve 1); return; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); rocblas_dgemv(handle_rocblas, rocblas_operation_none, n, From d28e7412fc403464fb089eb8b2e36879413d6308 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 2 Mar 2026 21:11:10 -0500 Subject: [PATCH 17/23] Align Vector::scal with the adopted BLAS-like standard. --- resolve/vector/VectorHandler.cpp | 4 +--- resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 3 +-- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 3 +-- resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 3 +-- resolve/vector/VectorHandlerHip.hpp | 2 +- resolve/vector/VectorHandlerImpl.hpp | 2 +- 9 files changed, 9 insertions(+), 14 deletions(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index d507032ab..fc90b15c7 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -326,9 +326,8 @@ namespace ReSolve * @pre The diagonal vector must be of the same size as the vector. * @invariant diag * - * @return 0 if successful, 1 otherwise */ - int VectorHandler::scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) + void VectorHandler::scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) { assert(diag->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); @@ -344,7 +343,6 @@ namespace ReSolve return devImpl_->scal(diag, vec); break; } - return 1; } /** diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 5cd62315c..4dbe66fb8 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -39,7 +39,7 @@ namespace ReSolve void scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace); // Scale vector by diagonal matrix represented as a vector (i.e., vec = diag*vec) - int scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); + void scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); // axpy for multivectors void axpyMulti(index_type size, diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 6075fd3e6..f5260892c 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -301,7 +301,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::scal(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerCpu::scal(vector::Vector* diag, vector::Vector* vec) { const real_type* diag_data = diag->getData(memory::HOST); real_type* vec_data = vec->getData(memory::HOST); @@ -312,7 +312,6 @@ namespace ReSolve vec_data[i] *= diag_data[i]; } vec->setDataUpdated(memory::HOST); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 337bf5fd0..2bbd20c9a 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -56,7 +56,7 @@ namespace ReSolve vector::Vector* y, vector::Vector* x); - virtual int scal(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceCpu* workspace_; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 5ed3d7f74..1bc9c2973 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -328,14 +328,13 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::scal(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerCuda::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); cuda::scale(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 9dcaebf1d..4638a8f30 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -65,7 +65,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scal(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 1241b9b0a..b849c13c5 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -322,14 +322,13 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::scal(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerHip::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); hip::scale(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 7b5437871..0c5eb42c9 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -65,7 +65,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scal(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceHIP* workspace_; diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index eaf123fbf..6b3a40867 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -43,7 +43,7 @@ namespace ReSolve virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; // Scale a vector by a diagonal matrix - virtual int scal(vector::Vector* diag, vector::Vector* vec) = 0; + virtual void scal(vector::Vector* diag, vector::Vector* vec) = 0; /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, From c0dc586e22701de079741051786689454765d5f5 Mon Sep 17 00:00:00 2001 From: Shaked Regev <35384901+shakedregev@users.noreply.github.com> Date: Tue, 3 Mar 2026 08:52:02 -0500 Subject: [PATCH 18/23] Update resolve/vector/VectorHandler.cpp --- resolve/vector/VectorHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index fc90b15c7..786e4b850 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -279,7 +279,7 @@ namespace ReSolve /** * @brief Multivector dot product, i.e V^T x * - * Computes V^T x with k vectors from multivector V. Result is storred + * Computes V^T x with k vectors from multivector V. Result is stored * in `res`. * * @param[in] size - Number of elements in a single vector in V From aed62e2d45657e739c1357a03e127a33f18a4bc1 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 3 Mar 2026 09:43:58 -0500 Subject: [PATCH 19/23] Remove unnecessary argument n from VectorHandler::gemv --- resolve/GramSchmidt.cpp | 17 +++++++++++------ resolve/LinSolverIterativeRandFGMRES.cpp | 2 +- resolve/vector/VectorHandler.cpp | 10 +++------- resolve/vector/VectorHandler.hpp | 1 - resolve/vector/VectorHandlerCpu.cpp | 2 +- resolve/vector/VectorHandlerCpu.hpp | 1 - resolve/vector/VectorHandlerCuda.cpp | 5 +++-- resolve/vector/VectorHandlerCuda.hpp | 1 - resolve/vector/VectorHandlerHip.cpp | 5 +++-- resolve/vector/VectorHandlerHip.hpp | 1 - resolve/vector/VectorHandlerImpl.hpp | 1 - tests/unit/vector/VectorHandlerTests.hpp | 4 ++-- 12 files changed, 24 insertions(+), 26 deletions(-) diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 56b973230..2e488e623 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -178,10 +178,15 @@ namespace ReSolve return 0; case CGS2: + // std::cout << "k = " << i + 1 << std::endl; + // std::cout << "size of V: " << V->getSize() << std::endl; + // std::cout << "num vecs in V: " << V->getNumVectors() << std::endl; + // std::cout << "size of y (vec_v_): " << vec_v_->getSize() << std::endl; + // std::cout << "size of x (vec_Hcolumn_): " << vec_Hcolumn_->getSize() << std::endl << std::endl; vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to aux, we will need it later @@ -191,11 +196,11 @@ namespace ReSolve mem_.deviceSynchronize(); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); mem_.deviceSynchronize(); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H @@ -373,9 +378,9 @@ namespace ReSolve case CGS1: vec_v_->setData(V->getData(i + 1, memspace_), memspace_); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 9978bbdc7..6651f289f 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -269,7 +269,7 @@ namespace ReSolve vec_aux_->copyFromExternal(&h_H_[i * (restart_ + 1)], memory::HOST, memspace_); // V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i); - vector_handler_->gemv('N', n_, i + 1, MINUS_ONE, ONE, vec_V_, vec_aux_, &vec_v, memspace_); + vector_handler_->gemv('N', i + 1, MINUS_ONE, ONE, vec_V_, vec_aux_, &vec_v, memspace_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; vector_handler_->scal(t, &vec_v, memspace_); diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 786e4b850..f677feb0c 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -203,7 +203,7 @@ namespace ReSolve * @param[in] V - Multivector containing the matrix, organized columnwise * @param[in] y - Vector, k x 1 if N and n x 1 if T * @param[in,out] x - Vector, n x 1 if N and k x 1 if T - * @param[in] memspace - cpu or cuda or hip (for now) + * @param[in] memspace - enum specifying HOST or DEVICE memory space. * * @note Parameter k is not the total number of columns in V but the number * of columns to use in matrix-vector product. @@ -215,7 +215,6 @@ namespace ReSolve * */ void VectorHandler::gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, @@ -226,16 +225,13 @@ namespace ReSolve { using namespace ReSolve::memory; - // TODO: Remove n as the argument becuase it must always be n = V->getSize() - assert(n == V->getSize() && "gemv: n does not match the number of rows in V"); - switch (memspace) { case HOST: - cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + cpuImpl_->gemv(transpose, k, alpha, beta, V, y, x); break; case DEVICE: - devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + devImpl_->gemv(transpose, k, alpha, beta, V, y, x); break; } x->setDataUpdated(memspace); diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 4dbe66fb8..674a8c538 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -59,7 +59,6 @@ namespace ReSolve // Dense matrix-vector product. void gemv(char transpose, - index_type n, index_type k, // number of vectors from multivector V to use const real_type alpha, const real_type beta, diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index f5260892c..b255453a2 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -147,7 +147,6 @@ namespace ReSolve * */ void VectorHandlerCpu::gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, @@ -159,6 +158,7 @@ namespace ReSolve const real_type* V_data = V->getData(memory::HOST); const real_type* y_data = y->getData(memory::HOST); real_type* x_data = x->getData(memory::HOST); + const index_type n = V->getSize(); index_type i, j; real_type sum; diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 2bbd20c9a..d00b06305 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -48,7 +48,6 @@ namespace ReSolve * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ virtual void gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 1bc9c2973..387ad5510 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -165,7 +165,6 @@ namespace ReSolve * */ void VectorHandlerCuda::gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, @@ -173,7 +172,9 @@ namespace ReSolve vector::Vector* y, vector::Vector* x) { - cublasHandle_t handle_cublas = workspace_->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); + const index_type n = V->getSize(); + switch (transpose) { case 'T': diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 4638a8f30..a18f8dbfe 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -49,7 +49,6 @@ namespace ReSolve * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ virtual void gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index b849c13c5..0a2c1a2a1 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -159,7 +159,6 @@ namespace ReSolve * */ void VectorHandlerHip::gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, @@ -167,7 +166,9 @@ namespace ReSolve vector::Vector* y, vector::Vector* x) { - rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); + const index_type n = V->getSize(); + switch (transpose) { case 'T': diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 0c5eb42c9..f890bf113 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -49,7 +49,6 @@ namespace ReSolve * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ virtual void gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 6b3a40867..6e76091b4 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -52,7 +52,6 @@ namespace ReSolve * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ virtual void gemv(char transpose, - index_type n, index_type k, const real_type alpha, const real_type beta, diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 07d51e825..76e558244 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -230,9 +230,9 @@ namespace ReSolve real_type alpha = -1.0; real_type beta = 1.0; - handler_.gemv('N', N, K, alpha, beta, &V, &yN, &xN, memspace_); + handler_.gemv('N', K, alpha, beta, &V, &yN, &xN, memspace_); status *= verifyAnswer(xN, static_cast(K) + 0.5); - handler_.gemv('T', N, K, alpha, beta, &V, &yT, &xT, memspace_); + handler_.gemv('T', K, alpha, beta, &V, &yT, &xT, memspace_); status *= verifyAnswer(xT, static_cast(N) + 0.5); return status.report(__func__); From 2e34bce0284dfa39c5c37e2a518239667331ea76 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 3 Mar 2026 12:44:21 -0500 Subject: [PATCH 20/23] Add assert incluides explicitly to VectorHandler --- resolve/vector/VectorHandlerCpu.cpp | 2 ++ resolve/vector/VectorHandlerCuda.cpp | 1 + resolve/vector/VectorHandlerHip.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index b255453a2..a1620c939 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -1,5 +1,7 @@ #include "VectorHandlerCpu.hpp" +#include + #include #include #include diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 387ad5510..9d4626b0c 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -1,5 +1,6 @@ #include "VectorHandlerCuda.hpp" +#include #include #include diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 0a2c1a2a1..9a006acb8 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -1,5 +1,6 @@ #include "VectorHandlerHip.hpp" +#include #include #include From dd5b4cdb982b3c273430ae0d324e58f33a2474a3 Mon Sep 17 00:00:00 2001 From: shakedregev Date: Tue, 3 Mar 2026 13:41:58 -0500 Subject: [PATCH 21/23] fixed missing #include --- resolve/matrix/MatrixHandlerHip.cpp | 2 +- tests/functionality/testSysGLU.cpp | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 8586101e7..db5d59956 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,5 +1,5 @@ #include "MatrixHandlerHip.hpp" - +#include #include #include diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index e703c6b46..1fbb9aa6e 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -184,13 +184,8 @@ int main(int argc, char* argv[]) vec_test->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); // compute ||x_diff|| = ||x - x_true|| norm -<<<<<<< HEAD vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler.axpy(&MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); -======= - vec_diff->copyDataFrom(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); ->>>>>>> 6a3875d7 (Pass scalars by value in VectorHandler.) real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); // Compute residual norm ON THE GPU using REFERENCE solution From b4f82ee910f51dcefcc1cffbc4e595b84805dcd8 Mon Sep 17 00:00:00 2001 From: shakedregev Date: Tue, 3 Mar 2026 18:42:30 +0000 Subject: [PATCH 22/23] Apply pre-commmit fixes --- resolve/matrix/MatrixHandlerHip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index db5d59956..8586101e7 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,5 +1,5 @@ #include "MatrixHandlerHip.hpp" -#include + #include #include From 8d6fea583f870492054826743e5b032086ec690a Mon Sep 17 00:00:00 2001 From: Shaked Regev <35384901+shakedregev@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:44:28 -0500 Subject: [PATCH 23/23] Apply suggestion from @shakedregev typo fix --- resolve/vector/VectorHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index f677feb0c..4f8be96aa 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -244,7 +244,7 @@ namespace ReSolve * @param[in] alpha vector size k x 1 * @param[in] x (multi)vector [size x k] * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or cuda or hip) + * @param[in] memspace string containg memspace (cpu, cuda, or hip) * * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() *