diff --git a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp index a06501a77..319ec8ce7 100644 --- a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp @@ -150,15 +150,15 @@ int main(int argc, char* argv[]) std::cout << "KLU analysis status: " << status << std::endl; status = KLU->factorize(); std::cout << "KLU factorization status: " << status << std::endl; - matrix_type* L = KLU->getLFactorCsr(); - matrix_type* U = KLU->getUFactorCsr(); + matrix_type* L = KLU->getLFactor(); + matrix_type* U = KLU->getUFactor(); if (L == nullptr) { printf("ERROR"); } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); - GLU->setupCsr(A, L, U, P, Q); + GLU->setup(A, L, U, P, Q); status = GLU->solve(vec_rhs, vec_x); std::cout << "GLU solve status: " << status << std::endl; // status = KLU->solve(vec_rhs, vec_x); diff --git a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp index 8375b06cf..a08debbd1 100644 --- a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp @@ -149,8 +149,8 @@ int main(int argc, char* argv[]) std::cout << "KLU solve status: " << status << std::endl; if (i == 1) { - L = (ReSolve::matrix::Csr*) KLU->getLFactorCsr(); - U = (ReSolve::matrix::Csr*) KLU->getUFactorCsr(); + L = (ReSolve::matrix::Csr*) KLU->getLFactor(); + U = (ReSolve::matrix::Csr*) KLU->getUFactor(); if (L == nullptr) { std::cout << "ERROR: L factor is null\n"; @@ -163,7 +163,7 @@ int main(int argc, char* argv[]) } P = KLU->getPOrdering(); Q = KLU->getQOrdering(); - Rf->setupCsr(A, L, U, P, Q); + Rf->setup(A, L, U, P, Q); status_refactor = Rf->refactorize(); std::cout << "Initial Rf refactorization status: " << status_refactor << std::endl; @@ -223,15 +223,15 @@ int main(int argc, char* argv[]) << std::scientific << std::setprecision(16) << res_nrm / b_nrm << "\n"; - L = (ReSolve::matrix::Csr*) KLU->getLFactorCsr(); - U = (ReSolve::matrix::Csr*) KLU->getUFactorCsr(); + L = (ReSolve::matrix::Csr*) KLU->getLFactor(); + U = (ReSolve::matrix::Csr*) KLU->getUFactor(); if (L != nullptr && U != nullptr) { P = KLU->getPOrdering(); Q = KLU->getQOrdering(); - Rf->setupCsr(A, L, U, P, Q); + Rf->setup(A, L, U, P, Q); status_refactor = Rf->refactorize(); std::cout << "Rf refactorization after KLU redo status: " << status_refactor << std::endl; } diff --git a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp index dc012e0b4..35baf7dc8 100644 --- a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -159,15 +159,15 @@ int main(int argc, char* argv[]) << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) / norm_b << "\n"; if (i == 1) { - ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactorCsr(); - ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactorCsr(); + ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactor(); + ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactor(); if (L == nullptr) { std::cout << "ERROR\n"; } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); - Rf->setupCsr(A, L, U, P, Q); + Rf->setup(A, L, U, P, Q); std::cout << "about to set FGMRES" << std::endl; FGMRES->setRestart(1000); FGMRES->setMaxit(2000); diff --git a/examples/experimental/r_KLU_rocsolverrf_asym6x6.cpp b/examples/experimental/r_KLU_rocsolverrf_asym6x6.cpp index bf5892741..9a9d42e06 100644 --- a/examples/experimental/r_KLU_rocsolverrf_asym6x6.cpp +++ b/examples/experimental/r_KLU_rocsolverrf_asym6x6.cpp @@ -172,8 +172,8 @@ int main() helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU.getLFactorCsr(); - ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU.getUFactorCsr(); + ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU.getLFactor(); + ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU.getUFactor(); if (L == nullptr || U == nullptr) { std::cout << "Factor extraction from KLU failed!\n"; @@ -196,7 +196,7 @@ int main() std::cout << rhs_before[i] << (i < n - 1 ? ", " : ""); } std::cout << "]" << std::endl; - Rf.setupCsr(A, L, U, P, Q, vec_rhs); + Rf.setup(A, L, U, P, Q, vec_rhs); std::cout << "RocSolverRf setup completed\n"; // Test refactorization with the same matrix (in practice, you'd change matrix values) diff --git a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp index dbc14ce6b..72c4dac26 100644 --- a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp @@ -139,12 +139,12 @@ int main(int argc, char* argv[]) std::cout << "KLU solve status: " << status << std::endl; if (i == 1) { - ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactorCsr(); - ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactorCsr(); + ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactor(); + ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactor(); index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - Rf->setupCsr(A, L, U, P, Q, vec_rhs); + Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); } } @@ -193,13 +193,13 @@ int main(int argc, char* argv[]) << std::scientific << std::setprecision(16) << res_nrm / b_nrm << "\n"; - ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactorCsr(); - ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactorCsr(); + ReSolve::matrix::Csr* L = (ReSolve::matrix::Csr*) KLU->getLFactor(); + ReSolve::matrix::Csr* U = (ReSolve::matrix::Csr*) KLU->getUFactor(); index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); - Rf->setupCsr(A, L, U, P, Q, vec_rhs); + Rf->setup(A, L, U, P, Q, vec_rhs); } } diff --git a/examples/gluRefactor.cpp b/examples/gluRefactor.cpp index 8639d1058..322b392ba 100644 --- a/examples/gluRefactor.cpp +++ b/examples/gluRefactor.cpp @@ -235,15 +235,15 @@ int gluRefactor(int argc, char* argv[]) std::cout << "KLU solve status: " << status << std::endl; // Extract factors and configure refactorization solver - matrix::Csr* L = (matrix::Csr*) KLU.getLFactorCsr(); - matrix::Csr* U = (matrix::Csr*) KLU.getUFactorCsr(); + matrix::Csr* L = (matrix::Csr*) KLU.getLFactor(); + matrix::Csr* U = (matrix::Csr*) KLU.getUFactor(); if (L == nullptr || U == nullptr) { std::cout << "Factor extraction from KLU failed!\n"; } index_type* P = KLU.getPOrdering(); index_type* Q = KLU.getQOrdering(); - status = Rf.setupCsr(A, L, U, P, Q); + status = Rf.setup(A, L, U, P, Q); std::cout << "Refactorization setup status: " << status << std::endl; RESOLVE_RANGE_POP("KLU"); diff --git a/examples/gpuRefactor.cpp b/examples/gpuRefactor.cpp index 58e8cf3bd..90f5ee06e 100644 --- a/examples/gpuRefactor.cpp +++ b/examples/gpuRefactor.cpp @@ -266,8 +266,8 @@ int gpuRefactor(int argc, char* argv[]) if (i == 1) { // Extract factors and configure refactorization solver - matrix::Csr* L = (matrix::Csr*) KLU.getLFactorCsr(); - matrix::Csr* U = (matrix::Csr*) KLU.getUFactorCsr(); + matrix::Csr* L = (matrix::Csr*) KLU.getLFactor(); + matrix::Csr* U = (matrix::Csr*) KLU.getUFactor(); if (L == nullptr || U == nullptr) { std::cout << "Factor extraction from KLU failed!\n"; @@ -275,7 +275,7 @@ int gpuRefactor(int argc, char* argv[]) index_type* P = KLU.getPOrdering(); index_type* Q = KLU.getQOrdering(); - Rf.setupCsr(A, L, U, P, Q, vec_rhs); + Rf.setup(A, L, U, P, Q, vec_rhs); // Setup iterative refinement solver if (is_iterative_refinement) diff --git a/resolve/LinSolverDirect.cpp b/resolve/LinSolverDirect.cpp index 195f0ac2b..d82e1046c 100644 --- a/resolve/LinSolverDirect.cpp +++ b/resolve/LinSolverDirect.cpp @@ -35,7 +35,7 @@ namespace ReSolve } /** - * @brief Setup function for LinSolverDirect class. + * @brief Setup function for LinSolverDirect class with CSR data * * @param[in] A - matrix to be solved * @param[in] L - optional lower triangular factor @@ -61,33 +61,6 @@ namespace ReSolve return 0; } - /** - * @brief Setup function for LinSolverDirect class with CSR data - * - * @param[in] A - matrix to be solved - * @param[in] L - optional lower triangular factor - * @param[in] U - optional upper triangular factor - * @param[in] P - optional row permutation vector - * @param[in] Q - optional column permutation vector - * @param[in] rhs - optional right-hand side vector - * - * @return int - error code, 0 if successful - */ - int LinSolverDirect::setupCsr(matrix::Sparse* A, - matrix::Sparse* /* L */, - matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) - { - if (A == nullptr) - { - return 1; - } - A_ = A; - return 0; - } - /** * @brief Placeholder function for symbolic factorization. */ @@ -115,29 +88,13 @@ namespace ReSolve /** * @brief Placeholder function for lower triangular factor in Csr. */ - matrix::Sparse* LinSolverDirect::getLFactorCsr() - { - return nullptr; - } - - /** - * @brief Placeholder function for upper triangular factor in Csr. - */ - matrix::Sparse* LinSolverDirect::getUFactorCsr() - { - return nullptr; - } - - /** - * @brief Placeholder function for lower triangular factor. - */ matrix::Sparse* LinSolverDirect::getLFactor() { return nullptr; } /** - * @brief Placeholder function for upper triangular factor. + * @brief Placeholder function for upper triangular factor in Csr. */ matrix::Sparse* LinSolverDirect::getUFactor() { diff --git a/resolve/LinSolverDirect.hpp b/resolve/LinSolverDirect.hpp index 3432fcc1c..aafc03f8c 100644 --- a/resolve/LinSolverDirect.hpp +++ b/resolve/LinSolverDirect.hpp @@ -18,28 +18,20 @@ namespace ReSolve public: LinSolverDirect(); virtual ~LinSolverDirect(); - virtual int setup(matrix::Sparse* A = nullptr, + + virtual int setup(matrix::Sparse* A, matrix::Sparse* L = nullptr, matrix::Sparse* U = nullptr, index_type* P = nullptr, index_type* Q = nullptr, vector_type* rhs = nullptr); - virtual int setupCsr(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr); - virtual int analyze(); // the same as symbolic factorization virtual int factorize(); virtual int refactorize(); virtual int solve(vector_type* rhs, vector_type* x) = 0; virtual int solve(vector_type* x) = 0; - virtual matrix::Sparse* getLFactorCsr(); - virtual matrix::Sparse* getUFactorCsr(); virtual matrix::Sparse* getLFactor(); virtual matrix::Sparse* getUFactor(); virtual index_type* getPOrdering(); diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index 5608fd7e6..bd47b4928 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -40,81 +40,6 @@ namespace ReSolve * @param[in] Q - Pointer to the permutation vector for columns (base-0). * @param[in] rhs - Pointer to the right-hand side vector (not used in this setup). */ - int LinSolverDirectCuSolverGLU::setupCsr(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* /** rhs */) - { - RESOLVE_RANGE_PUSH(__FUNCTION__); - int error_sum = 0; - - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - // get the handle - handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); - A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnz(); - // create combined factor - combineFactorsCsr(L, U); - - // set up descriptors - cusparseCreateMatDescr(&descr_M_); - cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); - cusolverSpCreateGluInfo(&info_M_); - - cusparseCreateMatDescr(&descr_A_); - cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); - - // set up the GLU - status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, - n, - nnz, - descr_A_, - A_->getRowData(memory::HOST), - A_->getColData(memory::HOST), - P, /** base-0 */ - Q, /** base-0 */ - M_->getNnz(), /** nnzM */ - descr_M_, - M_->getRowData(memory::HOST), - M_->getColData(memory::HOST), - info_M_); - error_sum += status_cusolver_; - // NOW the buffer - size_t buffer_size; - status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); - error_sum += status_cusolver_; - - mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size); - - status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; - - // reset and refactor so factors are ON THE GPU - - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, - n, - /** A is original matrix */ - nnz, - descr_A_, - A_->getValues(memory::DEVICE), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - info_M_); - - error_sum += status_cusolver_; - - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; - - RESOLVE_RANGE_POP(__FUNCTION__); - return error_sum; - } - int LinSolverDirectCuSolverGLU::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, @@ -180,6 +105,7 @@ namespace ReSolve A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), info_M_); + error_sum += status_cusolver_; status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); @@ -200,7 +126,7 @@ namespace ReSolve * @param[in] L - Pointer to the L factor in CSR format. * @param[in] U - Pointer to the U factor in CSR format. */ - void LinSolverDirectCuSolverGLU::combineFactorsCsr(matrix::Sparse* L, matrix::Sparse* U) + void LinSolverDirectCuSolverGLU::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { index_type n = L->getNumRows(); index_type* L_row = L->getRowData(memory::HOST); @@ -236,70 +162,6 @@ namespace ReSolve } } - void LinSolverDirectCuSolverGLU::combineFactors(matrix::Sparse* L, matrix::Sparse* U) - { - // L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(memory::HOST); - index_type* Li = L->getRowData(memory::HOST); - index_type* Up = U->getColData(memory::HOST); - index_type* Ui = U->getRowData(memory::HOST); - index_type nnzM = (L->getNnz() + U->getNnz() - n); - M_ = new matrix::Csr(n, n, nnzM); - M_->allocateMatrixData(memory::HOST); - index_type* mia = M_->getRowData(memory::HOST); - index_type* mja = M_->getColData(memory::HOST); - index_type row; - for (index_type i = 0; i < n; ++i) - { - // go through EACH COLUMN OF L first - for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) - { - row = Li[j]; - // BUT dont count diagonal twice, important - if (row != i) - { - mia[row + 1]++; - } - } - // then each column of U - for (index_type j = Up[i]; j < Up[i + 1]; ++j) - { - row = Ui[j]; - mia[row + 1]++; - } - } - // then organize mia_; - mia[0] = 0; - for (index_type i = 1; i < n + 1; i++) - { - mia[i] += mia[i - 1]; - } - - std::vector Mshifts(n, 0); - for (index_type i = 0; i < n; ++i) - { - // go through EACH COLUMN OF L first - for (int j = Lp[i]; j < Lp[i + 1]; ++j) - { - row = Li[j]; - if (row != i) - { - // place (row, i) where it belongs! - mja[mia[row] + Mshifts[row]] = i; - Mshifts[row]++; - } - } - // each column of U next - for (index_type j = Up[i]; j < Up[i + 1]; ++j) - { - row = Ui[j]; - mja[mia[row] + Mshifts[row]] = i; - Mshifts[row]++; - } - } - } - int LinSolverDirectCuSolverGLU::refactorize() { RESOLVE_RANGE_PUSH(__FUNCTION__); diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index df1e50234..cf982695b 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -34,18 +34,11 @@ namespace ReSolve int solve(vector_type* rhs, vector_type* x) override; int solve(vector_type* x) override; - int setupCsr(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, vector_type* rhs = nullptr) override; int setCliParam(const std::string id, const std::string value) override; @@ -56,9 +49,8 @@ namespace ReSolve int printCliParam(const std::string id) const override; private: - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors - void combineFactorsCsr(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors in CSR format - matrix::Sparse* M_; ///< the matrix that contains added factors + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors in CSR format + matrix::Sparse* M_; ///< the matrix that contains added factors // note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation cusparseMatDescr_t descr_M_; // this is NOT sparse matrix descriptor cusparseMatDescr_t descr_A_; // this is NOT sparse matrix descriptor diff --git a/resolve/LinSolverDirectCuSolverRf.cpp b/resolve/LinSolverDirectCuSolverRf.cpp index 5f9f267f2..d4f94429d 100644 --- a/resolve/LinSolverDirectCuSolverRf.cpp +++ b/resolve/LinSolverDirectCuSolverRf.cpp @@ -61,12 +61,12 @@ namespace ReSolve * @pre The matrix A is in CSR format. */ - int LinSolverDirectCuSolverRf::setupCsr(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* /* rhs */) + int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* /* rhs */) { assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix A has to be in CSR format for cusolverRf input.\n"); assert(L->getSparseFormat() == U->getSparseFormat() && "Matrices L and U have to be in the same format for cusolverRf input.\n"); @@ -139,148 +139,6 @@ namespace ReSolve return error_sum; } - /** - * @brief Setup the cuSolverRf factorization - * - * Sets up the cuSolverRf factorization for the given matrix A and its - * L and U factors. The permutation vectors P and Q are also set up. - * - * @param[in] A - pointer to the matrix A - * @param[in] L - pointer to the lower triangular factor L - * @param[in] U - pointer to the upper triangular factor U - * @param[in] P - pointer to the permutation vector P - * @param[in] Q - pointer to the permutation vector Q - * @param[in] rhs - pointer to the right-hand side vector (optional) - * - * @pre The matrix A is in CSR format. - */ - int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* /* rhs */) - { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix A has to be in CSR format for cusolverRf input.\n"); - assert(L->getSparseFormat() == U->getSparseFormat() && "Matrices L and U have to be in the same format for cusolverRf input.\n"); - - int error_sum = 0; - this->A_ = A; - index_type n = A_->getNumRows(); - - // remember - P and Q are generally CPU variables - // factorization data is stored in the handle. - // If function is called again, destroy the old handle to get rid of old data. - if (setup_completed_) - { - cusolverRfDestroy(handle_cusolverrf_); - cusolverRfCreate(&handle_cusolverrf_); - } - - matrix::Csc* L_csc = nullptr; - matrix::Csc* U_csc = nullptr; - matrix::Csr* L_csr = nullptr; - matrix::Csr* U_csr = nullptr; - - switch (L->getSparseFormat()) - { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - L_csc = static_cast(L); - U_csc = static_cast(U); - L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - csc2csr(L_csc, L_csr); - csc2csr(U_csc, U_csr); - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = static_cast(L); - U_csr = static_cast(U); - L_csr->setUpdated(memory::HOST); - U_csr->setUpdated(memory::HOST); - break; - default: - out::error() << "Matrix type for L and U factors not recognized!\n"; - out::error() << "Refactorization not completed.\n"; - return 1; - } - L_csr->syncData(memory::DEVICE); - U_csr->syncData(memory::DEVICE); - - if (d_P_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_P_, n); - } - - if (d_Q_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_Q_, n); - } - - if (d_T_ != nullptr) - { - mem_.deleteOnDevice(d_T_); - } - - mem_.allocateArrayOnDevice(&d_T_, n); - - mem_.copyArrayHostToDevice(d_P_, P, n); - mem_.copyArrayHostToDevice(d_Q_, Q, n); - - status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); - error_sum += status_cusolverrf_; - status_cusolverrf_ = cusolverRfSetupDevice(n, - A_->getNnz(), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - A_->getValues(memory::DEVICE), - L_csr->getNnz(), - L_csr->getRowData(memory::DEVICE), - L_csr->getColData(memory::DEVICE), - L_csr->getValues(memory::DEVICE), - U_csr->getNnz(), - U_csr->getRowData(memory::DEVICE), - U_csr->getColData(memory::DEVICE), - U_csr->getValues(memory::DEVICE), - d_P_, - d_Q_, - handle_cusolverrf_); - error_sum += status_cusolverrf_; - - mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); - error_sum += status_cusolverrf_; - - const cusolverRfFactorization_t fact_alg = - CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 - const cusolverRfTriangularSolve_t solve_alg = - CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error - this->setAlgorithms(fact_alg, solve_alg); - - setup_completed_ = true; - - // Remove temporary objects upon setup completion - switch (L->getSparseFormat()) - { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - delete L_csr; - delete U_csr; - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - default: - break; - } - return error_sum; - } - /** * @brief Sets factorization and triangular solve algorithms * diff --git a/resolve/LinSolverDirectCuSolverRf.hpp b/resolve/LinSolverDirectCuSolverRf.hpp index d7148a0e0..da51c1d88 100644 --- a/resolve/LinSolverDirectCuSolverRf.hpp +++ b/resolve/LinSolverDirectCuSolverRf.hpp @@ -37,14 +37,7 @@ namespace ReSolve matrix::Sparse* U, index_type* P, index_type* Q, - vector_type* rhs = nullptr) override; - - int setupCsr(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr); + vector_type* rhs = nullptr); int refactorize() override; int solve(vector_type* rhs, vector_type* x) override; diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index c9396a881..ecde2f48f 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -268,7 +268,7 @@ namespace ReSolve * It extracts the factors as $A = U^T L^T$, * where U^T is the reinterpretation of the CSC U factor as CSR and L^T is the reinterpretation of the CSC L factor as CSR. */ - void LinSolverDirectKLU::extractFactorsCsr() + void LinSolverDirectKLU::extractFactors() { if (!factors_extracted_) { @@ -320,67 +320,6 @@ namespace ReSolve * * @return L factor in compressed sparse row format */ - matrix::Sparse* LinSolverDirectKLU::getLFactorCsr() - { - extractFactorsCsr(); - return L_; - } - - /** - * @brief Gets a U factor of the matrix A in compressed sparse row format. - * - * @return U factor in compressed sparse row format - */ - matrix::Sparse* LinSolverDirectKLU::getUFactorCsr() - { - extractFactorsCsr(); - return U_; - } - - /** - * @brief Extract L and U factors from the KLU solver in compressed sparse column format. - */ - void LinSolverDirectKLU::extractFactors() - { - if (!factors_extracted_) - { - const int nnzL = Numeric_->lnz; - const int nnzU = Numeric_->unz; - - L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); - U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); - L_->allocateMatrixData(memory::HOST); - U_->allocateMatrixData(memory::HOST); - - int ok = klu_extract(Numeric_, - Symbolic_, - L_->getColData(memory::HOST), - L_->getRowData(memory::HOST), - L_->getValues(memory::HOST), - U_->getColData(memory::HOST), - U_->getRowData(memory::HOST), - U_->getValues(memory::HOST), - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &Common_); - - L_->setUpdated(memory::HOST); - U_->setUpdated(memory::HOST); - (void) ok; // TODO: Check status in ok before setting `factors_extracted_` - factors_extracted_ = true; - } - } - - /** - * @brief Get the lower triangular factor L of the matrix A in compressed sparse column format. - * - * @return L factor in compressed sparse column format - */ matrix::Sparse* LinSolverDirectKLU::getLFactor() { extractFactors(); @@ -388,9 +327,9 @@ namespace ReSolve } /** - * @brief Get the upper triangular factor U of the matrix A in compressed sparse column format. + * @brief Gets a U factor of the matrix A in compressed sparse row format. * - * @return U factor + * @return U factor in compressed sparse row format */ matrix::Sparse* LinSolverDirectKLU::getUFactor() { diff --git a/resolve/LinSolverDirectKLU.hpp b/resolve/LinSolverDirectKLU.hpp index 1bde030d9..37ca5ad7d 100644 --- a/resolve/LinSolverDirectKLU.hpp +++ b/resolve/LinSolverDirectKLU.hpp @@ -39,9 +39,6 @@ namespace ReSolve int solve(vector_type* rhs, vector_type* x) override; int solve(vector_type* x) override; - void extractFactorsCsr(); - matrix::Sparse* getLFactorCsr() override; - matrix::Sparse* getUFactorCsr() override; void extractFactors(); matrix::Sparse* getLFactor() override; matrix::Sparse* getUFactor() override; diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index b737a7c3a..f17793967 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -56,202 +56,6 @@ namespace ReSolve * * @param[out] error_sum - int - sum of errors from setup */ - int LinSolverDirectRocSolverRf::setupCsr(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) - { - RESOLVE_RANGE_PUSH(__FUNCTION__); - - int error_sum = 0; - - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for rocsolverRf.\n"); - A_ = A; - index_type n = A_->getNumRows(); - - // set matrix info - rocsolver_create_rfinfo(&infoM_, workspace_->getRocblasHandle()); - - // Combine factors L and U into matrix M_ - combineFactorsCsr(L, U); - - M_->setUpdated(ReSolve::memory::HOST); - M_->syncData(ReSolve::memory::DEVICE); - - // remember - P and Q are generally CPU variables - if (d_P_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_P_, n); - } - - if (d_Q_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_Q_, n); - } - mem_.copyArrayHostToDevice(d_P_, P, n); - mem_.copyArrayHostToDevice(d_Q_, Q, n); - - mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_analysis(workspace_->getRocblasHandle(), - n, - 1, - A_->getNnz(), - A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), // jCol_, - A_->getValues(ReSolve::memory::DEVICE), // vals_, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), // vals_, - d_P_, - d_Q_, - rhs->getData(ReSolve::memory::DEVICE), - n, - infoM_); - - mem_.deviceSynchronize(); - error_sum += status_rocblas_; - // tri solve setup - if (solve_mode_ == 1) - { // OBSOLETE -- to be removed. Formerly known as "fast mode" TODO - - if (L_csr_ != nullptr) - { - delete L_csr_; - } - - L_csr_ = new ReSolve::matrix::Csr(L->getNumRows(), L->getNumColumns(), L->getNnz()); - L_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - - if (U_csr_ != nullptr) - { - delete U_csr_; - } - - U_csr_ = new ReSolve::matrix::Csr(U->getNumRows(), U->getNumColumns(), U->getNnz()); - U_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - - rocsparse_create_mat_descr(&(descr_L_)); - rocsparse_set_mat_fill_mode(descr_L_, rocsparse_fill_mode_lower); - rocsparse_set_mat_index_base(descr_L_, rocsparse_index_base_zero); - - rocsparse_create_mat_descr(&(descr_U_)); - rocsparse_set_mat_index_base(descr_U_, rocsparse_index_base_zero); - rocsparse_set_mat_fill_mode(descr_U_, rocsparse_fill_mode_upper); - - rocsparse_create_mat_info(&info_L_); - rocsparse_create_mat_info(&info_U_); - - // local variables - size_t L_buffer_size; - size_t U_buffer_size; - - status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), - n, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), // vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - U_csr_->getValues(ReSolve::memory::DEVICE)); - - error_sum += status_rocblas_; - - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - L_csr_->getNnz(), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - &L_buffer_size); - error_sum += status_rocsparse_; - mem_.allocateBufferOnDevice(&L_buffer_, L_buffer_size); - - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - U_csr_->getNnz(), - descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - &U_buffer_size); - error_sum += status_rocsparse_; - mem_.allocateBufferOnDevice(&U_buffer_, U_buffer_size); - - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - L_csr_->getNnz(), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - rocsparse_analysis_policy_force, - rocsparse_solve_policy_auto, - L_buffer_); - - error_sum += status_rocsparse_; - if (status_rocsparse_ != 0) - { - std::cout << "status after analysis 1: " << status_rocsparse_ << "\n"; - } - - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - U_csr_->getNnz(), - descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - rocsparse_analysis_policy_force, - rocsparse_solve_policy_auto, - U_buffer_); - error_sum += status_rocsparse_; - if (status_rocsparse_ != 0) - { - out::error() << "status after analysis 2: " << status_rocsparse_ << "\n"; - } - - // allocate aux data - if (d_aux1_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_aux1_, n); - } - if (d_aux2_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_aux2_, n); - } - } - RESOLVE_RANGE_POP(__FUNCTION__); - return error_sum; - } - - /** - * @brief Setup function for LinSolverDirectRocSolverRf - * - * @param[in] A - matrix::Sparse* - matrix to solve - * @param[in] L - matrix::Sparse* - lower triangular factor - * @param[in] U - matrix::Sparse* - upper triangular factor - * @param[in] P - index_type* - permutation vector P - * @param[in] Q - index_type* - permutation vector Q - * @param[in] rhs - vector_type* - right hand side - * - * @param[out] error_sum - int - sum of errors from setup - */ int LinSolverDirectRocSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, @@ -311,7 +115,7 @@ namespace ReSolve error_sum += status_rocblas_; // tri solve setup if (solve_mode_ == 1) - { // fast mode + { // OBSOLETE -- to be removed. Formerly known as "fast mode" TODO if (L_csr_ != nullptr) { @@ -808,7 +612,7 @@ namespace ReSolve * * @post M_ is allocated and filled with L and U factors */ - void LinSolverDirectRocSolverRf::combineFactorsCsr(matrix::Sparse* L, matrix::Sparse* U) + void LinSolverDirectRocSolverRf::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { index_type n = L->getNumRows(); index_type* L_row = L->getRowData(memory::HOST); @@ -844,87 +648,6 @@ namespace ReSolve } } - /** - * @brief Combine L and U factors into a single matrix M - * - * M = [L U], where L and U are lower and upper triangular factors - * The implicit identity diagonal of L is not included in M - * - * @param[in] L - matrix::Sparse* - lower triangular factor - * @param[in] U - matrix::Sparse* - upper triangular factor - * - * @post M_ is allocated and filled with L and U factors - */ - void LinSolverDirectRocSolverRf::combineFactors(matrix::Sparse* L, matrix::Sparse* U) - { - // L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(ReSolve::memory::HOST); - index_type* Li = L->getRowData(ReSolve::memory::HOST); - index_type* Up = U->getColData(ReSolve::memory::HOST); - index_type* Ui = U->getRowData(ReSolve::memory::HOST); - if (M_ != nullptr) - { - delete M_; - } - - index_type nnzM = (L->getNnz() + U->getNnz() - n); - M_ = new matrix::Csr(n, n, nnzM); - M_->allocateMatrixData(ReSolve::memory::DEVICE); - M_->allocateMatrixData(ReSolve::memory::HOST); - index_type* mia = M_->getRowData(ReSolve::memory::HOST); - index_type* mja = M_->getColData(ReSolve::memory::HOST); - index_type row; - for (index_type i = 0; i < n; ++i) - { - // go through EACH COLUMN OF L first - for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) - { - row = Li[j]; - // BUT dont count diagonal twice, important - if (row != i) - { - mia[row + 1]++; - } - } - // then each column of U - for (index_type j = Up[i]; j < Up[i + 1]; ++j) - { - row = Ui[j]; - mia[row + 1]++; - } - } - // then organize mia_; - mia[0] = 0; - for (index_type i = 1; i < n + 1; i++) - { - mia[i] += mia[i - 1]; - } - - std::vector Mshifts(static_cast(n), 0); - for (index_type i = 0; i < n; ++i) - { - // go through EACH COLUMN OF L first - for (int j = Lp[i]; j < Lp[i + 1]; ++j) - { - row = Li[j]; - if (row != i) - { - // place (row, i) where it belongs! - mja[mia[row] + Mshifts[static_cast(row)]] = i; - Mshifts[static_cast(row)]++; - } - } - // each column of U next - for (index_type j = Up[i]; j < Up[i + 1]; ++j) - { - row = Ui[j]; - mja[mia[row] + Mshifts[static_cast(row)]] = i; - Mshifts[static_cast(row)]++; - } - } - } // LinSolverDirectRocSolverRf::combineFactors - /** * @brief initialize the parameter list for LinSolverDirectRocSolverRf * diff --git a/resolve/LinSolverDirectRocSolverRf.hpp b/resolve/LinSolverDirectRocSolverRf.hpp index 4ff579a75..d4702aeea 100644 --- a/resolve/LinSolverDirectRocSolverRf.hpp +++ b/resolve/LinSolverDirectRocSolverRf.hpp @@ -40,13 +40,6 @@ namespace ReSolve index_type* Q, vector_type* rhs) override; - int setupCsr(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) override; - int refactorize() override; int solve(vector_type* rhs, vector_type* x) override; int solve(vector_type* rhs) override; // the solution overwrites rhs @@ -71,8 +64,7 @@ namespace ReSolve private: // to be exported to matrix handler in a later time - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); // create L+U from separate L, U factors - void combineFactorsCsr(matrix::Sparse* L, matrix::Sparse* U); // create L+U from separate L, U factors + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); // create L+U from separate L, U factors void initParamList(); rocblas_status status_rocblas_; diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 0294dea92..8b56848d1 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -443,8 +443,8 @@ namespace ReSolve { int status = 0; - L_ = factorizationSolver_->getLFactorCsr(); - U_ = factorizationSolver_->getUFactorCsr(); + L_ = factorizationSolver_->getLFactor(); + U_ = factorizationSolver_->getUFactor(); P_ = factorizationSolver_->getPOrdering(); Q_ = factorizationSolver_->getQOrdering(); @@ -458,11 +458,11 @@ namespace ReSolve if (refactorizationMethod_ == "glu") { is_solve_on_device_ = true; - status += refactorizationSolver_->setupCsr(A_, L_, U_, P_, Q_); + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); } if (refactorizationMethod_ == "cusolverrf") { - status += refactorizationSolver_->setupCsr(A_, L_, U_, P_, Q_); + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); LinSolverDirectCuSolverRf* Rf = dynamic_cast(refactorizationSolver_); Rf->setNumericalProperties(1e-14, 1e-1); @@ -477,7 +477,7 @@ namespace ReSolve is_solve_on_device_ = false; auto* Rf = dynamic_cast(refactorizationSolver_); Rf->setSolveMode(1); - status += refactorizationSolver_->setupCsr(A_, L_, U_, P_, Q_, resVector_); + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_, resVector_); } #endif diff --git a/tests/functionality/testRefactor.cpp b/tests/functionality/testRefactor.cpp index b3058249f..a13cbec7d 100644 --- a/tests/functionality/testRefactor.cpp +++ b/tests/functionality/testRefactor.cpp @@ -186,13 +186,13 @@ int runTest(int argc, char* argv[], std::string& solver_name) status = KLU.factorize(); error_sum += status; - // Extract factors and setup factorization for refactorization - matrix_type* L = KLU.getLFactorCsr(); - matrix_type* U = KLU.getUFactorCsr(); + // Extract factors and setup factorization for refactorization. + matrix_type* L = KLU.getLFactor(); + matrix_type* U = KLU.getUFactor(); index_type* P = KLU.getPOrdering(); index_type* Q = KLU.getQOrdering(); - status = Rf.setupCsr(A, L, U, P, Q, &vec_rhs); + status = Rf.setup(A, L, U, P, Q, &vec_rhs); error_sum += status; // Refactorize (on device where available)