diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5fbff7ac9..d133ced0d 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,8 +26,7 @@ - [ ] CPU backend - [ ] CUDA backend - [ ] HIP backend -- [ ] I have manually run the non-experimental examples and verified that residuals are close to machine precision. (In your build directory run: -`./examples/.exe -h` to get instructions how to run examples). Code tested on: +- [ ] I have manually run the non-experimental examples and verified that residuals are close to machine precision. (In your build directory run: `./examples/.exe -h` to get instructions how to run examples). Code tested on: - [ ] CPU backend - [ ] CUDA backend - [ ] HIP backend diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aac751e2..e5e08eeec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ - Reworked templates to include example tests. -- Removed unnecessary full facotorization in the examples and made the input 1 based. +- Removed unnecessary full facotorization in the examples. + +- Update method names in `VectorHandler` to match BLAS naming. - Added `cons` counterparts to `Vector::getData` methods. diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index cd78f7c43..3d569371c 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -226,8 +226,8 @@ namespace ReSolve // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type inf_norm_x = vh_.amax(x_, memspace_); + real_type inf_norm_res = vh_.amax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -318,8 +318,8 @@ namespace ReSolve // Compute norm of scaled residuals mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); - inf_norm_x_ = vh_.infNorm(x_, memspace_); - inf_norm_res_ = vh_.infNorm(res_, memspace_); + inf_norm_x_ = vh_.amax(x_, memspace_); + inf_norm_res_ = vh_.amax(res_, memspace_); nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); } diff --git a/examples/randGmres.cpp b/examples/randGmres.cpp index 59c7e5bdb..f8ec86a54 100644 --- a/examples/randGmres.cpp +++ b/examples/randGmres.cpp @@ -5,7 +5,6 @@ #include "ExampleHelper.hpp" #include #include -#include #include #include #include diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 07dbf7c28..628963c8b 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -18,7 +18,6 @@ set(ReSolve_SRC LinSolverIterativeFGMRES.cpp LinSolverDirectCpuILU0.cpp LinSolverIterativeRandFGMRES.cpp - LinSolverDirectSerialILU0.cpp SystemSolver.cpp Preconditioner.cpp PreconditionerLU.cpp diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 533f9b02d..2e488e623 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -158,7 +158,7 @@ namespace ReSolve t = vector_handler_->dot(vec_v_, vec_w_, memspace_); H[idxmap(i, j, num_vecs_ + 1)] = t; t *= -1.0; - vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); + vector_handler_->axpy(t, vec_v_, vec_w_, memspace_); } t = 0.0; t = vector_handler_->dot(vec_w_, vec_w_, memspace_); @@ -168,7 +168,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); } else { @@ -178,10 +178,15 @@ namespace ReSolve return 0; case CGS2: + // std::cout << "k = " << i + 1 << std::endl; + // std::cout << "size of V: " << V->getSize() << std::endl; + // std::cout << "num vecs in V: " << V->getNumVectors() << std::endl; + // std::cout << "size of y (vec_v_): " << vec_v_->getSize() << std::endl; + // std::cout << "size of x (vec_Hcolumn_): " << vec_Hcolumn_->getSize() << std::endl << std::endl; vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to aux, we will need it later @@ -191,11 +196,11 @@ namespace ReSolve mem_.deviceSynchronize(); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); mem_.deviceSynchronize(); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H @@ -218,7 +223,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_v_, memspace_); + vector_handler_->scal(t, vec_v_, memspace_); } else { @@ -233,7 +238,7 @@ namespace ReSolve vec_w_->setData(V->getData(i + 1, memspace_), memspace_); vec_rv_->resize(i + 1); - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vector_handler_->dot2Multi(n, V, i + 1, vec_x_, vec_rv_, memspace_); vec_rv_->setDataUpdated(memspace_); if (memspace_ == memory::DEVICE) { @@ -260,7 +265,7 @@ namespace ReSolve } // for j vec_Hcolumn_->resize(i + 1); vec_Hcolumn_->copyFromExternal(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + vector_handler_->axpyMulti(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); // normalize (second synch) t = vector_handler_->dot(vec_w_, vec_w_, memspace_); @@ -270,7 +275,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); for (int ii = 0; ii <= i; ++ii) { vec_v_->setData(V->getData(ii, memspace_), memspace_); @@ -290,7 +295,7 @@ namespace ReSolve vec_w_->setData(V->getData(i + 1, memspace_), memspace_); vec_rv_->resize(i + 1); - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vector_handler_->dot2Multi(n, V, i + 1, vec_x_, vec_rv_, memspace_); vec_rv_->setDataUpdated(memspace_); if (memspace_ == memory::DEVICE) { @@ -351,7 +356,7 @@ namespace ReSolve vec_Hcolumn_->resize(i + 1); vec_Hcolumn_->copyFromExternal(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + vector_handler_->axpyMulti(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); // normalize (second synch) t = vector_handler_->dot(vec_w_, vec_w_, memspace_); // set the last entry in Hessenberg matrix @@ -360,7 +365,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); + vector_handler_->scal(t, vec_w_, memspace_); } else { @@ -373,9 +378,9 @@ namespace ReSolve case CGS1: vec_v_->setData(V->getData(i + 1, memspace_), memspace_); // Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + vector_handler_->gemv('T', i + 1, ONE, ZERO, V, vec_v_, vec_Hcolumn_, memspace_); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + vector_handler_->gemv('N', i + 1, ONE, MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); mem_.deviceSynchronize(); // copy H_col to H @@ -391,7 +396,7 @@ namespace ReSolve if (std::abs(t) > MACHINE_EPSILON) { t = 1.0 / t; - vector_handler_->scal(&t, vec_v_, memspace_); + vector_handler_->scal(t, vec_v_, memspace_); } else { diff --git a/resolve/LinSolverDirectSerialILU0.cpp b/resolve/LinSolverDirectSerialILU0.cpp deleted file mode 100644 index 0785c38ed..000000000 --- a/resolve/LinSolverDirectSerialILU0.cpp +++ /dev/null @@ -1,386 +0,0 @@ -#include "LinSolverDirectSerialILU0.hpp" - -#include - -#include -#include -#include - -namespace ReSolve -{ - using out = io::Logger; - - LinSolverDirectSerialILU0::LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace) - { - workspace_ = workspace; - } - - LinSolverDirectSerialILU0::~LinSolverDirectSerialILU0() - { - if (owns_factors_) - { - delete L_; - delete U_; - L_ = nullptr; - U_ = nullptr; - } - delete[] h_aux1_; - delete[] h_ILU_vals_; - } - - int LinSolverDirectSerialILU0::setup(matrix::Sparse* A, - matrix::Sparse*, - matrix::Sparse*, - index_type*, - index_type*, - vector_type*) - { - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnz(); - - h_ILU_vals_ = new real_type[nnz]; - h_aux1_ = new real_type[n]; - - index_type zero_pivot = 0; // no zero pivot - - // copy A values to a buffer first - for (index_type i = 0; i < nnz; ++i) - { - h_ILU_vals_[i] = A_->getValues(ReSolve::memory::HOST)[i]; - } - - // allocate helper variables that make this code fast - index_type* u_ptr = new index_type[n]; - index_type* ja_mapper = new index_type[n]; - - // aux scalars for indexing etc - index_type k, j, jw, j1, j2; - - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) - { - ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = j; - } - // IJK ILU - j = j1; - while (j < j2) - { - k = A_->getColData(ReSolve::memory::HOST)[j]; - if (k < i) - { - h_ILU_vals_[j] /= h_ILU_vals_[u_ptr[k]]; - for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) - { - jw = ja_mapper[A_->getColData(ReSolve::memory::HOST)[jj]]; - if (jw != 0) - { - h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; - } - } - } - else - { - break; - } - j++; - } - u_ptr[i] = j; - if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) - { - zero_pivot = -1; // zero pivot is in place (i,i) on the diagonal - return zero_pivot; - } - // reset mapper - for (index_type j = j1; j < j2; ++j) - { - ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = 0; - } - } - - // clean up - delete[] ja_mapper; - delete[] u_ptr; - - // split into L and U! - index_type nnzL = 0, nnzU = 0; - // the diagonal values GO TO U, L has 1s on the diagonal - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) - { - if (A->getColData(ReSolve::memory::HOST)[j] == i) - { - // diagonal, add to both - nnzL++; - nnzU++; - } - if (A->getColData(ReSolve::memory::HOST)[j] > i) - { - // upper part - nnzU++; - } - if (A->getColData(ReSolve::memory::HOST)[j] < i) - { - // lower part - nnzL++; - } - } - } - // TODO: What is the purpose of nnzL and nnzU if they are not used after this? - // allocate L and U - - L_ = new matrix::Csr(n, n, nnzL, false, true); - U_ = new matrix::Csr(n, n, nnzU, false, true); - owns_factors_ = true; - - L_->allocateMatrixData(ReSolve::memory::HOST); - U_->allocateMatrixData(ReSolve::memory::HOST); - index_type lit = 0, uit = 0, kL, kU; - L_->getRowData(ReSolve::memory::HOST)[0] = 0; - U_->getRowData(ReSolve::memory::HOST)[0] = 0; - - for (index_type i = 0; i < n; ++i) - { - j1 = A_->getRowData(ReSolve::memory::HOST)[i]; - j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - kL = 0; - kU = 0; - for (index_type j = j1; j < j2; ++j) - { - - if (A->getColData(ReSolve::memory::HOST)[j] == i) - { - // diagonal, add to both - - L_->getValues(ReSolve::memory::HOST)[lit] = 1.0; - U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; - - L_->getColData(ReSolve::memory::HOST)[lit] = i; - U_->getColData(ReSolve::memory::HOST)[uit] = i; - - lit++; - uit++; - kL++; - kU++; - } - - if (A->getColData(ReSolve::memory::HOST)[j] > i) - { - // upper part - - U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; - U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; - ; - - uit++; - kU++; - } - - if (A->getColData(ReSolve::memory::HOST)[j] < i) - { - // lower part - L_->getValues(ReSolve::memory::HOST)[lit] = h_ILU_vals_[j]; - L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; - - lit++; - kL++; - } - } - // update row pointers (offsets) - L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; - U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; - } - - return zero_pivot; - } - - int LinSolverDirectSerialILU0::reset(matrix::Sparse* A) - { - return this->setup(A); - } - - // solution is returned in RHS - int LinSolverDirectSerialILU0::solve(vector_type* rhs) - { - int error_sum = 0; - // printf("solve t 1\n"); - // h_aux1 = L^{-1} rhs - for (index_type i = 0; i < L_->getNumRows(); ++i) - { - h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) - { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; - } - h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; - } - - // rhs = U^{-1} h_aux1 - - for (index_type i = A_->getNumRows() - 1; i >= 0; --i) - { - rhs->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) - { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - rhs->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * rhs->getData(ReSolve::memory::HOST)[col]; - } - rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry - } - - return error_sum; - } - - int LinSolverDirectSerialILU0::solve(vector_type* rhs, vector_type* x) - { - // printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); - int error_sum = 0; - // h_aux1 = L^{-1} rhs - // for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); - for (index_type i = 0; i < L_->getNumRows(); ++i) - { - h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) - { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; - } - h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; - } - - // for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); - // x = U^{-1} h_aux1 - - for (index_type i = U_->getNumRows() - 1; i >= 0; --i) - { - x->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) - { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; - } - x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry - } - // for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); - return error_sum; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to set. - * @return int Error code. - */ - int LinSolverDirectSerialILU0::setCliParam(const std::string id, const std::string /* value */) - { - switch (getParamId(id)) - { - default: - std::cout << "Setting parameter failed!\n"; - } - return 0; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return std::string Value of the string parameter to return. - */ - std::string LinSolverDirectSerialILU0::getCliParamString(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; - } - return ""; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return int Value of the int parameter to return. - */ - index_type LinSolverDirectSerialILU0::getCliParamInt(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; - } - return -1; - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return real_type Value of the real_type parameter to return. - */ - real_type LinSolverDirectSerialILU0::getCliParamReal(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; - } - return std::numeric_limits::quiet_NaN(); - } - - /** - * @brief Placeholder function for now. - * - * The following switch (getParamId(Id)) cases always run the default and - * are currently redundant code (like an if (true)). - * In the future, they will be expanded to include more options. - * - * @param id - string ID for parameter to get. - * @return bool Value of the bool parameter to return. - */ - bool LinSolverDirectSerialILU0::getCliParamBool(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; - } - return false; - } - - int LinSolverDirectSerialILU0::printCliParam(const std::string id) const - { - switch (getParamId(id)) - { - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; - } - return 0; - } - -} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.hpp b/resolve/LinSolverDirectSerialILU0.hpp deleted file mode 100644 index f69aa2d93..000000000 --- a/resolve/LinSolverDirectSerialILU0.hpp +++ /dev/null @@ -1,59 +0,0 @@ - -#pragma once - -#include "Common.hpp" -#include -#include -#include - -namespace ReSolve -{ - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } - - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } - - class LinSolverDirectSerialILU0 : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); - ~LinSolverDirectSerialILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - int reset(matrix::Sparse* A) override; - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - // MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool owns_factors_{false}; ///< If the class owns L and U factors - - real_type* h_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep - // the values of ILU decomposition. - real_type* h_ILU_vals_{nullptr}; - }; -} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 67ee67a7a..67efc53e3 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -176,7 +176,7 @@ namespace ReSolve // normalize first vector t = 1.0 / rnorm; - vector_handler_->scal(&t, vec_V_, memspace_); + vector_handler_->scal(t, vec_V_, memspace_); // initialize norm history h_rs_[0] = rnorm; i = -1; @@ -272,7 +272,7 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_z, x, memspace_); } } else @@ -282,14 +282,14 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x - vector_handler_->axpy(&ONE, &vec_v, x, memspace_); + vector_handler_->axpy(ONE, &vec_v, x, memspace_); } /* test solution */ diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index bc03bf4f0..6651f289f 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -169,7 +169,7 @@ namespace ReSolve if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -219,8 +219,8 @@ namespace ReSolve // normalize first vector t = 1.0 / rnorm; - vector_handler_->scal(&t, vec_V_, memspace_); - vector_handler_->scal(&t, vec_S_, memspace_); + vector_handler_->scal(t, vec_V_, memspace_); + vector_handler_->scal(t, vec_S_, memspace_); mem_.deviceSynchronize(); @@ -259,7 +259,7 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); GS_->orthogonalize(k_rand_, vec_S_, h_H_, i); @@ -269,10 +269,10 @@ namespace ReSolve vec_aux_->copyFromExternal(&h_H_[i * (restart_ + 1)], memory::HOST, memspace_); // V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i); - vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_); + vector_handler_->gemv('N', i + 1, MINUS_ONE, ONE, vec_V_, vec_aux_, &vec_v, memspace_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; - vector_handler_->scal(&t, &vec_v, memspace_); + vector_handler_->scal(t, &vec_v, memspace_); mem_.deviceSynchronize(); vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); @@ -340,7 +340,7 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_z, x, memspace_); } } else @@ -350,14 +350,14 @@ namespace ReSolve for (j = 0; j <= i; j++) { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); - vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); + vector_handler_->axpy(h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x - vector_handler_->axpy(&ONE, &vec_v, x, memspace_); + vector_handler_->axpy(ONE, &vec_v, x, memspace_); } /* test solution */ @@ -382,7 +382,7 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); if (sketching_method_ == fwht) { - vector_handler_->scal(&one_over_k_, &vec_s, memspace_); + vector_handler_->scal(one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); rnorm = vector_handler_->dot(vec_S_, vec_S_, memspace_); diff --git a/resolve/RefactorizationSolver.cpp b/resolve/RefactorizationSolver.cpp deleted file mode 100644 index 0b1fab8f5..000000000 --- a/resolve/RefactorizationSolver.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "RefactorizationSolver.hpp" - -namespace ReSolve -{ - RefactorizationSolver::RefactorizationSolver() - { - } -} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.hpp b/resolve/RefactorizationSolver.hpp deleted file mode 100644 index 482cb74cc..000000000 --- a/resolve/RefactorizationSolver.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include - -namespace ReSolve -{ - RefactorizationSolver - { - using vector_type = vector::Vector; - - public: - RefactorizationSolver(); - ~RefactorizationSolver(); - int setup(std::string first_solver, - std::string refact_solver_, - std::string use_ir_); - - int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); - - int solve(matrix::Sparse * A, vector_type * vec_rhs, vector_type * vec_x); - - private: - std::string first_solver_name_; - std::string refact_solver_name_; - std::string use_ir_; - // IR parameters - real_type ir_tol_; - index_type ir_maxit_; - index_type ir_gs_; - - LinSolverDirect* first_solver_; - LinSolverDirect* refact_solver_; - LinSolverIterative* ir_solver_; - bool factorization_exists_; - }; -} // namespace ReSolve diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index b4df7ae6a..f7a4acf88 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -319,7 +318,7 @@ namespace ReSolve { if (memspace_ == "cpu") { - preconditionSolver_ = new LinSolverDirectSerialILU0(workspaceCpu_); + preconditionSolver_ = new LinSolverDirectCpuILU0(workspaceCpu_); preconditioner_ = new PreconditionerLU(preconditionSolver_); #ifdef RESOLVE_USE_CUDA } @@ -876,8 +875,8 @@ namespace ReSolve } matrixHandler_->setValuesChanged(true, ms); matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUS_ONE, ms); - resnorm = vectorHandler_->infNorm(resVector_, ms); - norm_x = vectorHandler_->infNorm(x, ms); + resnorm = vectorHandler_->amax(resVector_, ms); + norm_x = vectorHandler_->amax(x, ms); matrixHandler_->matrixInfNorm(A_, &norm_A, ms); return resnorm / (norm_x * norm_A); } diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index bdd8a1f76..c6081012c 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -141,11 +141,11 @@ namespace ReSolve * @param[in] alpha - doble array, size [k x 1] */ template - __global__ void massAxpy3(index_type N, - index_type k, - const real_type* x_data, - real_type* y_data, - const real_type* alpha) + __global__ void axpyMulti3(index_type N, + index_type k, + const real_type* x_data, + real_type* y_data, + const real_type* alpha) { index_type i = blockIdx.x * blockDim.x + threadIdx.x; index_type t = threadIdx.x; @@ -287,12 +287,12 @@ namespace ReSolve * value of Tv5? * @todo Should we use dynamic shared memory here instead? */ - void mass_inner_product_two_vectors(index_type n, - index_type i, - const real_type* vec1, - const real_type* vec2, - const real_type* mvec, - real_type* result) + void dot_2_multi(index_type n, + index_type i, + const real_type* vec1, + const real_type* vec2, + const real_type* mvec, + real_type* result) { kernels::MassIPTwoVec<<>>(vec1, vec2, mvec, result, i, n); } @@ -306,9 +306,9 @@ namespace ReSolve * @param[out] y - (n x (i+1)) multivector * @param[in] alpha - ((i+1) x 1) vector */ - void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha) + void axpy_multi(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha) { - kernels::massAxpy3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha); + kernels::axpyMulti3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha); } /** diff --git a/resolve/cuda/cudaKernels.h b/resolve/cuda/cudaKernels.h index 037a3ae8e..bcc6da834 100644 --- a/resolve/cuda/cudaKernels.h +++ b/resolve/cuda/cudaKernels.h @@ -15,14 +15,14 @@ namespace ReSolve { namespace cuda { - void mass_inner_product_two_vectors(index_type n, - index_type i, - const real_type* vec1, - const real_type* vec2, - const real_type* mvec, - real_type* result); + void dot_2_multi(index_type n, + index_type i, + const real_type* vec1, + const real_type* vec2, + const real_type* mvec, + real_type* result); - void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha); + void axpy_multi(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha); void leftScale(index_type n, const index_type* a_row_ptr, diff --git a/resolve/hip/hipKernels.h b/resolve/hip/hipKernels.h index 4308ac589..9c8aaf3b6 100644 --- a/resolve/hip/hipKernels.h +++ b/resolve/hip/hipKernels.h @@ -16,13 +16,13 @@ namespace ReSolve { namespace hip { - void mass_inner_product_two_vectors(index_type n, - index_type i, - real_type* vec1, - real_type* vec2, - real_type* mvec, - real_type* result); - void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha); + void dot_2_multi(index_type n, + index_type i, + real_type* vec1, + real_type* vec2, + real_type* mvec, + real_type* result); + void axpy_multi(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha); void leftScale(index_type n, const index_type* a_row_ptr, diff --git a/resolve/hip/hipKernels.hip b/resolve/hip/hipKernels.hip index fdf67bfcb..a6e8314c5 100644 --- a/resolve/hip/hipKernels.hip +++ b/resolve/hip/hipKernels.hip @@ -131,7 +131,7 @@ namespace ReSolve { * @param[in] alpha - */ template - __global__ void massAxpy3_kernel(index_type N, + __global__ void axpyMulti3_kernel(index_type N, index_type k, const real_type* x_data, real_type* y_data, @@ -388,12 +388,12 @@ namespace ReSolve { * value of Tv5? * @todo Should we use dynamic shared memory here instead? */ - void mass_inner_product_two_vectors(index_type n, - index_type i, - real_type* vec1, - real_type* vec2, - real_type* mvec, - real_type* result) + void dot_2_multi(index_type n, + index_type i, + real_type* vec1, + real_type* vec2, + real_type* mvec, + real_type* result) { hipLaunchKernelGGL(kernels::MassIPTwoVec_kernel, dim3(i), @@ -417,9 +417,9 @@ namespace ReSolve { * @param[out] y - (n x (i+1)) multivector * @param[in] alpha - ((i+1) x 1) vector */ - void mass_axpy(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha) + void axpy_multi(index_type n, index_type i, real_type* x, real_type* y, real_type* alpha) { - hipLaunchKernelGGL(kernels::massAxpy3_kernel, + hipLaunchKernelGGL(kernels::axpyMulti3_kernel, dim3((n + 384 - 1) / 384), dim3(384), 0, diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 86e30ba4b..4f8be96aa 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -117,7 +117,7 @@ namespace ReSolve * @param memspace[in] string containg memspace (cpu or cuda or hip) * */ - void VectorHandler::scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace) + void VectorHandler::scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) @@ -141,16 +141,16 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandler::infNorm(vector::Vector* x, memory::MemorySpace memspace) + real_type VectorHandler::amax(vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->infNorm(x); + return cpuImpl_->amax(x); break; case DEVICE: - return devImpl_->infNorm(x); + return devImpl_->amax(x); break; } return -1.0; @@ -165,7 +165,10 @@ namespace ReSolve * @param[in] memspace String containg memspace (cpu or cuda or hip) * */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpy(const real_type alpha, + /* const */ vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace) { // AXPY: y = alpha * x + y using namespace ReSolve::memory; @@ -182,96 +185,128 @@ namespace ReSolve } /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y + * @brief gemv computes dense matrix-vector product. + * + * In Re::Solve applications, gemv is used to compute dot products of + * multivectors. + * + * If `transpose = N` (no), `x := beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * If `transpose = T` (yes), `x := beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. * * @param[in] Transpose - yes (T) or no (N) * @param[in] n - Number of rows in (non-transposed) matrix - * @param[in] k - Number of columns in (non-transposed) + * @param[in] k - Number of columns in (non-transposed) matrix to use * @param[in] alpha - Constant real number * @param[in] beta - Constant real number * @param[in] V - Multivector containing the matrix, organized columnwise * @param[in] y - Vector, k x 1 if N and n x 1 if T * @param[in,out] x - Vector, n x 1 if N and k x 1 if T - * @param[in] memspace - cpu or cuda or hip (for now) + * @param[in] memspace - enum specifying HOST or DEVICE memory space. + * + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ void VectorHandler::gemv(char transpose, - index_type n, index_type k, - const real_type* alpha, - const real_type* beta, + const real_type alpha, + const real_type beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, memory::MemorySpace memspace) { using namespace ReSolve::memory; + switch (memspace) { case HOST: - cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + cpuImpl_->gemv(transpose, k, alpha, beta, V, y, x); break; case DEVICE: - devImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + devImpl_->gemv(transpose, k, alpha, beta, V, y, x); break; } x->setDataUpdated(memspace); } /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * @brief Multivector axpy: y: = y + \sum_i alpha_i x_i * * @param[in] size number of elements in y * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k + * @param[in] x (multi)vector [size x k] * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or cuda or hip) + * @param[in] memspace string containg memspace (cpu, cuda, or hip) * * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandler::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + void VectorHandler::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace) { + assert(y->getSize() == x->getSize() && "Sizes of x and y must match!\n"); + assert(alpha->getSize() == k && "Size of alpha must match k!\n"); + using namespace ReSolve::memory; switch (memspace) { case HOST: - cpuImpl_->massAxpy(size, alpha, k, x, y); + cpuImpl_->axpyMulti(size, alpha, k, x, y); break; case DEVICE: - devImpl_->massAxpy(size, alpha, k, x, y); + devImpl_->axpyMulti(size, alpha, k, x, y); break; } y->setDataUpdated(memspace); } /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) - * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * @brief Multivector dot product, i.e V^T x + * + * Computes V^T x with k vectors from multivector V. Result is stored + * in `res`. * * @param[in] size - Number of elements in a single vector in V - * @param[in] V - Multivector; k vectors size n x 1 each - * @param[in] k - Number of vectors in V - * @param[in] x - Multivector; 2 vectors size n x 1 each - * @param[out] res - Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace - String containg memspace (cpu or cuda or hip) + * @param[in] V - Multivector; k vectors of size n x 1 each + * @param[in] k - Number of vectors in V to use + * @param[in] x - Multivector; 2 vectors of size n x 1 each + * @param[out] res - Multivector; 2 vectors size k x 1 each + * @param[in] memspace - String containg memspace (cpu, cuda, or hip) * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * @pre _size_ > 0, _k_ > 0, size = x->getSize(). + * @pre _res_ needs to be allocated to k x 2 size. * */ - void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace) + void VectorHandler::dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res, + memory::MemorySpace memspace) { + assert(x->getSize() == V->getSize() && "Sizes of V and x do not match!\n"); + assert(res->getSize() == k && "Size of `res` must match k!\n"); + using namespace ReSolve::memory; switch (memspace) { case HOST: - cpuImpl_->massDot2Vec(size, V, k, x, res); + cpuImpl_->dot2Multi(size, V, k, x, res); break; case DEVICE: - devImpl_->massDot2Vec(size, V, k, x, res); + devImpl_->dot2Multi(size, V, k, x, res); break; } res->setDataUpdated(memspace); @@ -287,24 +322,23 @@ namespace ReSolve * @pre The diagonal vector must be of the same size as the vector. * @invariant diag * - * @return 0 if successful, 1 otherwise */ - int VectorHandler::scale(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) + void VectorHandler::scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) { assert(diag->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); assert(vec->getData(memspace) != nullptr && "Vector data is null!\n"); + using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->scale(diag, vec); + return cpuImpl_->scal(diag, vec); break; case DEVICE: - return devImpl_->scale(diag, vec); + return devImpl_->scal(diag, vec); break; } - return 1; } /** diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index cbea0c28e..674a8c538 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -16,8 +16,7 @@ namespace ReSolve } // namespace ReSolve namespace ReSolve -{ // namespace vector { - +{ class VectorHandler { public: @@ -27,44 +26,49 @@ namespace ReSolve VectorHandler(LinAlgWorkspaceHIP* new_workspace); ~VectorHandler(); - // y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + // y := alpha x + y + void axpy(const real_type alpha, + /* const */ vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace); - // dot: x \cdot y + // Dot product of two vectors real_type dot(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); - // scal = alpha * x - void scal(const real_type* alpha, vector::Vector* x, memory::MemorySpace memspace); - - // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); - - // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - // Size = n - void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, memory::MemorySpace memspace); - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ + // Scale vector by scalar + void scal(const real_type alpha, vector::Vector* x, memory::MemorySpace memspace); + + // Scale vector by diagonal matrix represented as a vector (i.e., vec = diag*vec) + void scal(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); + + // axpy for multivectors + void axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y, + memory::MemorySpace memspace); + + // Dot product of two vectors with a multivector V + void dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res, + memory::MemorySpace memspace); + + // Dense matrix-vector product. void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, + index_type k, // number of vectors from multivector V to use + const real_type alpha, + const real_type beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, memory::MemorySpace memspace); - int scale(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); - - /** infNorm: - * Returns infinity norm of a vector (i.e., entry with max abs value) - */ - real_type infNorm(vector::Vector* x, memory::MemorySpace memspace); + // Vector infinity norm + real_type amax(vector::Vector* x, memory::MemorySpace memspace); bool getIsCudaEnabled() const; bool getIsHipEnabled() const; @@ -76,6 +80,6 @@ namespace ReSolve bool isCpuEnabled_{false}; bool isCudaEnabled_{false}; bool isHipEnabled_{false}; - }; + }; // class VectorHandler } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index de7ff3869..a1620c939 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -1,5 +1,7 @@ #include "VectorHandlerCpu.hpp" +#include + #include #include #include @@ -45,10 +47,10 @@ namespace ReSolve real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) { - real_type* x_data = x->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - real_type sum = 0.0; - real_type c = 0.0; + const real_type* x_data = x->getData(memory::HOST); + const real_type* y_data = y->getData(memory::HOST); + real_type sum = 0.0; + real_type c = 0.0; // real_type t, y; for (int i = 0; i < x->getSize(); ++i) { @@ -68,14 +70,15 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerCpu::scal(const real_type alpha, vector::Vector* x) { real_type* x_data = x->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i) { - x_data[i] *= (*alpha); + x_data[i] *= alpha; } + x->setDataUpdated(memory::HOST); } /** @@ -86,11 +89,12 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCpu::infNorm(vector::Vector* x) + real_type VectorHandlerCpu::amax(vector::Vector* x) { - real_type* x_data = x->getData(memory::HOST); - real_type vecmax = std::abs(x_data[0]); - real_type v; + const real_type* x_data = x->getData(memory::HOST); + + real_type vecmax = std::abs(x_data[0]); + real_type v; for (int i = 1; i < x->getSize(); ++i) { v = std::abs(x_data[i]); @@ -110,15 +114,16 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerCpu::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y) { // AXPY: y = alpha * x + y real_type* x_data = x->getData(memory::HOST); real_type* y_data = y->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i) { - y_data[i] = (*alpha) * x_data[i] + y_data[i]; + y_data[i] = alpha * x_data[i] + y_data[i]; } + y->setDataUpdated(memory::HOST); } /** @@ -134,34 +139,43 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ - void VectorHandlerCpu::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerCpu::gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { // x = beta*x + alpha*V*y OR x = beta*x + alpha*V^Ty - real_type* V_data = V->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - real_type* x_data = x->getData(memory::HOST); + const real_type* V_data = V->getData(memory::HOST); + const real_type* y_data = y->getData(memory::HOST); + real_type* x_data = x->getData(memory::HOST); + const index_type n = V->getSize(); + index_type i, j; real_type sum; switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); for (i = 0; i < k; ++i) { - sum = (*beta) * x_data[i]; + sum = beta * x_data[i]; real_type c = 0.0; for (j = 0; j < n; ++j) { - real_type y = ((*alpha) * V_data[i * n + j] * y_data[j]) - c; + real_type y = (alpha * V_data[i * n + j] * y_data[j]) - c; real_type t = sum + y; c = (t - sum) - y; sum = t; @@ -171,13 +185,15 @@ namespace ReSolve } break; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); for (i = 0; i < n; ++i) { - sum = (*beta) * x_data[i]; + sum = beta * x_data[i]; real_type c = 0.0; for (j = 0; j < k; ++j) { - real_type y = ((*alpha) * V_data[n * j + i] * y_data[j]) - c; + real_type y = (alpha * V_data[n * j + i] * y_data[j]) - c; real_type t = sum + y; c = (t - sum) - y; sum = t; @@ -192,6 +208,7 @@ namespace ReSolve } break; } // switch + x->setDataUpdated(memory::HOST); } /** @@ -205,11 +222,11 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCpu::massAxpy(index_type size, - vector::Vector* alpha, - index_type k, - vector::Vector* x, - vector::Vector* y) + void VectorHandlerCpu::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { real_type* alpha_data = alpha->getData(memory::HOST); @@ -227,6 +244,7 @@ namespace ReSolve } y_data[i] = y_data[i] - sum; } + y->setDataUpdated(memory::HOST); } /** @@ -242,15 +260,15 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCpu::massDot2Vec(index_type size, - vector::Vector* V, - index_type q, - vector::Vector* x, - vector::Vector* res) + void VectorHandlerCpu::dot2Multi(index_type size, + vector::Vector* V, + index_type q, + vector::Vector* x, + vector::Vector* res) { - real_type* res_data = res->getData(memory::HOST); - real_type* x_data = x->getData(memory::HOST); - real_type* V_data = V->getData(memory::HOST); + real_type* res_data = res->getData(memory::HOST); + const real_type* x_data = x->getData(memory::HOST); + const real_type* V_data = V->getData(memory::HOST); real_type c0 = 0.0; real_type cq = 0.0; @@ -274,6 +292,7 @@ namespace ReSolve res_data[i + q] = tq; } } + res->setDataUpdated(memory::HOST); } /** @@ -284,18 +303,17 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::scale(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerCpu::scal(vector::Vector* diag, vector::Vector* vec) { - real_type* diag_data = diag->getData(memory::HOST); - real_type* vec_data = vec->getData(memory::HOST); - index_type n = vec->getSize(); + const real_type* diag_data = diag->getData(memory::HOST); + real_type* vec_data = vec->getData(memory::HOST); + index_type n = vec->getSize(); for (index_type i = 0; i < n; ++i) { vec_data[i] *= diag_data[i]; } vec->setDataUpdated(memory::HOST); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index bf3c2ba83..d00b06305 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -23,23 +23,23 @@ namespace ReSolve virtual ~VectorHandlerCpu(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, @@ -47,16 +47,15 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceCpu* workspace_; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index e60cad347..9d4626b0c 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -1,5 +1,6 @@ #include "VectorHandlerCuda.hpp" +#include #include #include @@ -51,8 +52,15 @@ namespace ReSolve real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); - double nrm = 0.0; - cublasStatus_t st = cublasDdot(handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + + double nrm{0.0}; + cublasStatus_t st = cublasDdot(handle_cublas, + x->getSize(), + x->getData(memory::DEVICE), + 1, + y->getData(memory::DEVICE), + 1, + &nrm); if (st != 0) { out::error() << "Dot product failed with error code " << st << "\n"; @@ -67,14 +75,19 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerCuda::scal(const real_type alpha, vector::Vector* x) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + cublasStatus_t st = cublasDscal(handle_cublas, + x->getSize(), + &alpha, + x->getData(memory::DEVICE), + 1); if (st != 0) { - out::error() << "scal crashed with code " << st << "\n"; + out::error() << "scal returned error code " << st << "\n"; } + x->setDataUpdated(memory::DEVICE); } /** @@ -85,7 +98,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerCuda::infNorm(vector::Vector* x) + real_type VectorHandlerCuda::amax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) @@ -95,7 +108,7 @@ namespace ReSolve workspace_->setNormBuffer(buffer); workspace_->setNormBufferState(true); } - real_type norm; + real_type norm{0.0}; // TODO: Shouldn't the return type be cusolverStatus_t ? int status = cusolverSpDnrminf(workspace_->getCusolverSpHandle(), x->getSize(), @@ -117,16 +130,17 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerCuda::axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y) { cublasHandle_t handle_cublas = workspace_->getCublasHandle(); cublasDaxpy(handle_cublas, x->getSize(), - alpha, + &alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1); + y->setDataUpdated(memory::DEVICE); } /** @@ -142,46 +156,57 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ - void VectorHandlerCuda::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerCuda::gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { - cublasHandle_t handle_cublas = workspace_->getCublasHandle(); + cublasHandle_t handle_cublas = workspace_->getCublasHandle(); + const index_type n = V->getSize(); + switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); cublasDgemv(handle_cublas, CUBLAS_OP_T, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); return; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); cublasDgemv(handle_cublas, CUBLAS_OP_N, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); if (transpose != 'N') @@ -190,6 +215,7 @@ namespace ReSolve << " in gemv. Using non-transposed multivector.\n"; } } + x->setDataUpdated(memory::DEVICE); } /** @@ -203,12 +229,20 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCuda::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerCuda::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { using namespace constants; if (k < 200) { - cuda::mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + cuda::axpy_multi(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -228,6 +262,7 @@ namespace ReSolve y->getData(memory::DEVICE), // c size); // ldc } + y->setDataUpdated(memory::DEVICE); } /** @@ -244,22 +279,22 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCuda::massDot2Vec(index_type size, - vector::Vector* V, - index_type k, - vector::Vector* x, - vector::Vector* res) + void VectorHandlerCuda::dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res) { using namespace constants; if (k < 200) { - cuda::mass_inner_product_two_vectors(size, - k, - x->getData(0, memory::DEVICE), - x->getData(1, memory::DEVICE), - V->getData(memory::DEVICE), - res->getData(memory::DEVICE)); + cuda::dot_2_multi(size, + k, + x->getData(0, memory::DEVICE), + x->getData(1, memory::DEVICE), + V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); } else { @@ -279,6 +314,7 @@ namespace ReSolve res->getData(memory::DEVICE), // c k); // ldc } + res->setDataUpdated(memory::DEVICE); } /** @@ -294,14 +330,13 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::scale(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerCuda::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); cuda::scale(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 5a5b500c7..a18f8dbfe 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -24,23 +24,23 @@ namespace ReSolve virtual ~VectorHandlerCuda(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, /* const */ vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, @@ -48,14 +48,13 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); /** * @brief scale: scales a vector by a diagonal matrix @@ -65,7 +64,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index f3930d8ac..9a006acb8 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -1,5 +1,6 @@ #include "VectorHandlerHip.hpp" +#include #include #include @@ -51,11 +52,17 @@ namespace ReSolve { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); double nrm = 0.0; - rocblas_status st = rocblas_ddot(handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + rocblas_status st = rocblas_ddot(handle_rocblas, + x->getSize(), + x->getData(memory::DEVICE), + 1, + y->getData(memory::DEVICE), + 1, + &nrm); if (st != 0) { - printf("dot product crashed with code %d \n", st); + out::error() << "dot product returned error code " << st << "\n"; } return nrm; } @@ -67,15 +74,20 @@ namespace ReSolve * @param[in,out] x The vector * */ - void VectorHandlerHip::scal(const real_type* alpha, vector::Vector* x) + void VectorHandlerHip::scal(const real_type alpha, vector::Vector* x) { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); - rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + rocblas_status st = rocblas_dscal(handle_rocblas, + x->getSize(), + &alpha, + x->getData(memory::DEVICE), + 1); if (st != 0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; + out::error() << "scal returned error code " << st << "\n"; } + x->setDataUpdated(memory::DEVICE); } /** @@ -86,7 +98,7 @@ namespace ReSolve * @return infinity norm (real number) of _x_ * */ - real_type VectorHandlerHip::infNorm(vector::Vector* x) + real_type VectorHandlerHip::amax(vector::Vector* x) { if (workspace_->getNormBufferState() == false) @@ -96,7 +108,7 @@ namespace ReSolve workspace_->setNormBuffer(buffer); workspace_->setNormBufferState(true); } - real_type norm; + real_type norm{0.0}; hip::vector_inf_norm(x->getSize(), x->getData(memory::DEVICE), workspace_->getNormBuffer(), @@ -112,16 +124,17 @@ namespace ReSolve * @param[in,out] y The second vector (result is return in y) * */ - void VectorHandlerHip::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + void VectorHandlerHip::axpy(const real_type alpha, vector::Vector* x, vector::Vector* y) { rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); rocblas_daxpy(handle_rocblas, x->getSize(), - alpha, + &alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1); + y->setDataUpdated(memory::DEVICE); } /** @@ -137,46 +150,57 @@ namespace ReSolve * @param[in] y Vector, k x 1 if N and n x 1 if T * @param[in,out] x Vector, n x 1 if N and k x 1 if T * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * @note Parameter k is not the total number of columns in V but the number + * of columns to use in matrix-vector product. + * + * @pre _n_ > 0, _k_ > 0 + * @pre Number of columns in V >= k + * @pre If transpose = N, size of y must equal k. If transpose = T, size of + * x must equal k. * */ - void VectorHandlerHip::gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) + void VectorHandlerHip::gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { - rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); + rocblas_handle handle_rocblas = workspace_->getRocblasHandle(); + const index_type n = V->getSize(); + switch (transpose) { case 'T': + assert((V->getSize() == y->getSize()) + && "gemv: Size mismatch! Size of V does not match size of y."); rocblas_dgemv(handle_rocblas, rocblas_operation_transpose, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); return; default: + assert((V->getSize() == x->getSize()) + && "gemv: Size mismatch! Size of V does not match size of x."); rocblas_dgemv(handle_rocblas, rocblas_operation_none, n, k, - alpha, + &alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, - beta, + &beta, x->getData(memory::DEVICE), 1); if (transpose != 'N') @@ -185,6 +209,7 @@ namespace ReSolve << " in gemv. Using non-transposed multivector.\n"; } } + x->setDataUpdated(memory::DEVICE); } /** @@ -198,12 +223,20 @@ namespace ReSolve * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerHip::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerHip::axpyMulti(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { using namespace constants; if (k < 200) { - hip::mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + hip::axpy_multi(size, + k, + x->getData(memory::DEVICE), + y->getData(memory::DEVICE), + alpha->getData(memory::DEVICE)); } else { @@ -223,6 +256,7 @@ namespace ReSolve y->getData(memory::DEVICE), // c size); // ldc } + y->setDataUpdated(memory::DEVICE); } /** @@ -239,13 +273,22 @@ namespace ReSolve * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerHip::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + void VectorHandlerHip::dot2Multi(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res) { using namespace constants; if (k < 200) { - hip::mass_inner_product_two_vectors(size, k, x->getData(0, memory::DEVICE), x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); + hip::dot_2_multi(size, + k, + x->getData(0, memory::DEVICE), + x->getData(1, memory::DEVICE), + V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); } else { @@ -265,6 +308,7 @@ namespace ReSolve res->getData(memory::DEVICE), // c k); // ldc } + res->setDataUpdated(memory::DEVICE); } /** @@ -280,14 +324,13 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::scale(vector::Vector* diag, vector::Vector* vec) + void VectorHandlerHip::scal(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); hip::scale(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); - return 0; } } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index a1601dddb..f890bf113 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -24,23 +24,23 @@ namespace ReSolve virtual ~VectorHandlerHip(); // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + virtual void axpy(const real_type alpha, vector::Vector* x, vector::Vector* y); // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y); // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); + virtual void scal(const real_type alpha, vector::Vector* x); // vector infinity norm - virtual real_type infNorm(vector::Vector* x); + virtual real_type amax(vector::Vector* x); // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, @@ -48,14 +48,13 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); + virtual void gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); /** * @brief scale: scales a vector by a diagonal matrix @@ -65,7 +64,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int scale(vector::Vector* diag, vector::Vector* vec); + virtual void scal(vector::Vector* diag, vector::Vector* vec); private: LinAlgWorkspaceHIP* workspace_; diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 9b6c0a073..6e76091b4 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -24,26 +24,26 @@ namespace ReSolve } // y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) = 0; + virtual void axpy(const real_type alpha, vector::Vector* x, vector::Vector* y) = 0; // dot: x \cdot y virtual real_type dot(vector::Vector* x, vector::Vector* y) = 0; // scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x) = 0; + virtual void scal(const real_type alpha, vector::Vector* x) = 0; - // infNorm = ||x||_\infty - virtual real_type infNorm(vector::Vector* x) = 0; + // amax = ||x||_\infty + virtual real_type amax(vector::Vector* x) = 0; // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; + virtual void axpyMulti(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise // Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; + virtual void dot2Multi(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; // Scale a vector by a diagonal matrix - virtual int scale(vector::Vector* diag, vector::Vector* vec) = 0; + virtual void scal(vector::Vector* diag, vector::Vector* vec) = 0; /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, @@ -51,14 +51,13 @@ namespace ReSolve * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - virtual void gemv(char transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) = 0; + virtual void gemv(char transpose, + index_type k, + const real_type alpha, + const real_type beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) = 0; }; } // namespace ReSolve diff --git a/tests/functionality/TestHelper.hpp b/tests/functionality/TestHelper.hpp index d7542a257..0b7bf6e9d 100644 --- a/tests/functionality/TestHelper.hpp +++ b/tests/functionality/TestHelper.hpp @@ -305,8 +305,8 @@ class TestHelper // Compute norm of scaled residuals real_type inf_norm_A = 0.0; mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type inf_norm_x = vh_.amax(x_, memspace_); + real_type inf_norm_res = vh_.amax(res_, memspace_); real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -469,7 +469,7 @@ class TestHelper ReSolve::memory::MemorySpace memspace) { using namespace ReSolve::constants; - vh_.axpy(&MINUS_ONE, &x_true, &x, memspace); // x := -x_true + x + vh_.axpy(MINUS_ONE, &x_true, &x, memspace); // x := -x_true + x return norm2(x, memspace); } diff --git a/tests/functionality/testLUSOL.cpp b/tests/functionality/testLUSOL.cpp index 64e2f19b6..3ab659808 100644 --- a/tests/functionality/testLUSOL.cpp +++ b/tests/functionality/testLUSOL.cpp @@ -131,7 +131,7 @@ int main(int argc, char* argv[]) real_type normB = sqrt(vector_handler.dot(&vec_rhs, &vec_rhs, ReSolve::memory::HOST)); // Compute vec_diff := vec_diff - vec_x - vector_handler.axpy(&MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); + vector_handler.axpy(MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); // Compute norm of vec_diff real_type normDiffMatrix = sqrt(vector_handler.dot(&vec_diff, &vec_diff, ReSolve::memory::HOST)); @@ -234,7 +234,7 @@ int main(int argc, char* argv[]) normB = sqrt(vector_handler.dot(&vec_rhs, &vec_rhs, ReSolve::memory::HOST)); // Compute vec_diff := vec_diff - vec_x - vector_handler.axpy(&MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); + vector_handler.axpy(MINUS_ONE, &vec_x, &vec_diff, ReSolve::memory::HOST); // Compute norm of vec_diff normDiffMatrix = sqrt(vector_handler.dot(&vec_diff, &vec_diff, ReSolve::memory::HOST)); diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index 543726fd8..1fbb9aa6e 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -153,8 +153,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals real_type inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - real_type inf_norm_x = vector_handler.infNorm(vec_x, ReSolve::memory::DEVICE); - real_type inf_norm_r = vector_handler.infNorm(vec_r, ReSolve::memory::DEVICE); + real_type inf_norm_x = vector_handler.amax(vec_x, ReSolve::memory::DEVICE); + real_type inf_norm_r = vector_handler.amax(vec_r, ReSolve::memory::DEVICE); real_type nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); real_type nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -185,7 +185,7 @@ int main(int argc, char* argv[]) // compute ||x_diff|| = ||x - x_true|| norm vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler.axpy(&MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); + vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); // Compute residual norm ON THE GPU using REFERENCE solution @@ -268,8 +268,8 @@ int main(int argc, char* argv[]) // Compute norm of scaled residuals for the second system inf_norm_A = 0.0; matrix_handler.matrixInfNorm(A, &inf_norm_A, ReSolve::memory::DEVICE); - inf_norm_x = vector_handler.infNorm(vec_x, ReSolve::memory::DEVICE); - inf_norm_r = vector_handler.infNorm(vec_r, ReSolve::memory::DEVICE); + inf_norm_x = vector_handler.amax(vec_x, ReSolve::memory::DEVICE); + inf_norm_r = vector_handler.amax(vec_r, ReSolve::memory::DEVICE); nsr_norm = inf_norm_r / (inf_norm_A * inf_norm_x); nsr_system = solver.getNormOfScaledResiduals(vec_rhs, vec_x); error = std::abs(nsr_system - nsr_norm) / nsr_norm; @@ -288,7 +288,7 @@ int main(int argc, char* argv[]) // compute ||x_diff|| = ||x - x_true|| norm vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vector_handler.axpy(&MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); + vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); real_type normDiffMatrix2 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); // compute the residual using exact solution diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 1eaeec1e3..9f71eede0 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -125,7 +125,7 @@ namespace ReSolve real_type nrm = handler_.dot(&V, &V, memspace_); nrm = sqrt(nrm); nrm = 1.0 / nrm; - handler_.scal(&nrm, &V, memspace_); + handler_.scal(nrm, &V, memspace_); // Orthogonalize system and verify result GS.orthogonalize(N, &V, H, 0); diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 04444a815..76e558244 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -49,7 +49,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome infNorm(index_type N) + TestOutcome amax(index_type N) { TestStatus status; status = true; @@ -63,7 +63,7 @@ namespace ReSolve } x.copyFromExternal(data, memory::HOST, memspace_); - real_type result = handler_.infNorm(&x, memspace_); + real_type result = handler_.amax(&x, memspace_); real_type answer = static_cast(N - 1) * 0.1; if (!isEqual(result, answer)) @@ -93,7 +93,7 @@ namespace ReSolve real_type alpha = 0.5; // the result is a vector with y[i] = 2.5 forall i; - handler_.axpy(&alpha, &x, &y, memspace_); + handler_.axpy(alpha, &x, &y, memspace_); status *= verifyAnswer(y, 2.5); return status.report(__func__); @@ -141,13 +141,13 @@ namespace ReSolve // the answer is x[i] = 4.375; real_type answer = 4.375; - handler_.scal(&alpha, &x, memspace_); + handler_.scal(alpha, &x, memspace_); status *= verifyAnswer(x, answer); return status.report(__func__); } - TestOutcome massAxpy(index_type N, index_type K) + TestOutcome axpyMulti(index_type N, index_type K) { TestStatus status; @@ -180,7 +180,7 @@ namespace ReSolve index_type r = K % 2; real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - handler_.massAxpy(N, &alpha, K, &x, &y, memspace_); + handler_.axpyMulti(N, &alpha, K, &x, &y, memspace_); status *= verifyAnswer(y, 2.0 - res); return status.report(__func__); @@ -199,7 +199,7 @@ namespace ReSolve x.setToConst(1.0, memspace_); y.setToConst(-1.0, memspace_); - handler_.massDot2Vec(N, &x, K, &y, &res, memspace_); + handler_.dot2Multi(N, &x, K, &y, &res, memspace_); status *= verifyAnswer(res, (-1.0) * (real_type) N); @@ -230,9 +230,9 @@ namespace ReSolve real_type alpha = -1.0; real_type beta = 1.0; - handler_.gemv('N', N, K, &alpha, &beta, &V, &yN, &xN, memspace_); + handler_.gemv('N', K, alpha, beta, &V, &yN, &xN, memspace_); status *= verifyAnswer(xN, static_cast(K) + 0.5); - handler_.gemv('T', N, K, &alpha, &beta, &V, &yT, &xT, memspace_); + handler_.gemv('T', K, alpha, beta, &V, &yT, &xT, memspace_); status *= verifyAnswer(xT, static_cast(N) + 0.5); return status.report(__func__); @@ -259,7 +259,7 @@ namespace ReSolve } diag.copyFromExternal(diag_data.get(), memory::HOST, memspace_); - handler_.scale(&diag, &vec, memspace_); + handler_.scal(&diag, &vec, memspace_); if (memspace_ == memory::DEVICE) { diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 74fae7e41..0d4e02983 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -20,9 +20,9 @@ int main(int, char**) result += test.dot(50); result += test.axpy(50); result += test.scal(50); - result += test.infNorm(50); + result += test.amax(50); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); + result += test.axpyMulti(100, 10); result += test.massDot(100, 10); result += test.scale(100); @@ -42,11 +42,11 @@ int main(int, char**) result += test.axpy(5000); result += test.scal(5000); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); - result += test.massAxpy(1000, 30); + result += test.axpyMulti(100, 10); + result += test.axpyMulti(1000, 30); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.infNorm(1000); + result += test.amax(1000); result += test.scale(1000); std::cout << "\n"; @@ -66,11 +66,11 @@ int main(int, char**) result += test.axpy(5000); result += test.scal(5000); result += test.gemv(5000, 10); - result += test.massAxpy(100, 10); - result += test.massAxpy(1000, 300); + result += test.axpyMulti(100, 10); + result += test.axpyMulti(1000, 300); result += test.massDot(100, 10); result += test.massDot(1000, 30); - result += test.infNorm(1000); + result += test.amax(1000); result += test.scale(1000); std::cout << "\n";