Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"installDir": "${sourceDir}/install",
"generator": "Unix Makefiles",
"cacheVariables": {
"RESOLVE_USE_CUDA": "ON"
"RESOLVE_USE_CUDA": "ON",
"CMAKE_CUDA_ARCHITECTURES": "60"
}
},
{
Expand Down Expand Up @@ -45,7 +46,8 @@
"description": "Custom changes specific for Ascent",
"cacheVariables": {
"CMAKE_C_COMPILER": "$env{OLCF_GCC_ROOT}/bin/gcc",
"CMAKE_CXX_COMPILER": "$env{OLCF_GCC_ROOT}/bin/g++"
"CMAKE_CXX_COMPILER": "$env{OLCF_GCC_ROOT}/bin/g++",
"CMAKE_CUDA_ARCHITECTURES": "70"
}
},
{
Expand Down
13 changes: 10 additions & 3 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ if(RESOLVE_USE_CUDA)
# Build example where matrix data is updated
add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp)
target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve)


#rand solver
add_executable(gmres_cusparse_rand.exe r_randGMRES_CUDA.cpp)
target_link_libraries(gmres_cusparse_rand.exe PRIVATE ReSolve)
endif(RESOLVE_USE_CUDA)

# Create HIP examples
Expand All @@ -53,17 +56,21 @@ if(RESOLVE_USE_HIP)
add_executable(klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp)
target_link_libraries(klu_rocsolverrf_check_redo.exe PRIVATE ReSolve)


# Rand GMRES test with rocsparse
add_executable(gmres_rocsparse_rand.exe r_randGMRES.cpp)
target_link_libraries(gmres_rocsparse_rand.exe PRIVATE ReSolve)
endif(RESOLVE_USE_HIP)

# Install all examples in bin directory
set(installable_executables klu_klu.exe klu_klu_standalone.exe)

if(RESOLVE_USE_CUDA)
set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe)
set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe gmres_cusparse_rand.exe)
Comment thread
cameronrutherford marked this conversation as resolved.
endif(RESOLVE_USE_CUDA)

if(RESOLVE_USE_HIP)
set(installable_executables ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe klu_rocsolverrf_check_redo.exe)
set(installable_executables ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe klu_rocsolverrf_check_redo.exe gmres_rocsparse_rand.exe)
endif(RESOLVE_USE_HIP)

install(TARGETS ${installable_executables}
Expand Down
128 changes: 128 additions & 0 deletions examples/r_randGMRES.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#include <string>
#include <iostream>
#include <iomanip>

#include <resolve/matrix/Coo.hpp>
#include <resolve/matrix/Csr.hpp>
#include <resolve/matrix/Csc.hpp>
#include <resolve/vector/Vector.hpp>
#include <resolve/matrix/io.hpp>
#include <resolve/matrix/MatrixHandler.hpp>
#include <resolve/vector/VectorHandler.hpp>
#include <resolve/LinSolverDirectKLU.hpp>
#include <resolve/LinSolverDirectRocSparseILU0.hpp>
#include <resolve/LinSolverIterativeRandFGMRES.hpp>
#include <resolve/workspace/LinAlgWorkspace.hpp>

using namespace ReSolve::constants;

int main(int argc, char *argv[])
{
// Use the same data types as those you specified in ReSolve build.
using real_type = ReSolve::real_type;
using vector_type = ReSolve::vector::Vector;

(void) argc; // TODO: Check if the number of input parameters is correct.
std::string matrixFileName = argv[1];
std::string rhsFileName = argv[2];


ReSolve::matrix::Coo* A_coo;
ReSolve::matrix::Csr* A;
ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP();
workspace_HIP->initializeHandles();
ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP);
ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP);
real_type* rhs = nullptr;
real_type* x = nullptr;
Comment thread
pelesh marked this conversation as resolved.

vector_type* vec_rhs;
vector_type* vec_x;
vector_type* vec_r;

ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2);

ReSolve::LinSolverDirectRocSparseILU0* Rf = new ReSolve::LinSolverDirectRocSparseILU0(workspace_HIP);
ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler,ReSolve::LinSolverIterativeRandFGMRES::cs, GS, "hip");

std::cout << std::endl << std::endl << std::endl;
std::cout << "========================================================================================================================"<<std::endl;
std::cout << "Reading: " << matrixFileName<< std::endl;
std::cout << "========================================================================================================================"<<std::endl;
std::cout << std::endl;
std::ifstream mat_file(matrixFileName);
if(!mat_file.is_open())
{
std::cout << "Failed to open file " << matrixFileName << "\n";
return -1;
}
std::ifstream rhs_file(rhsFileName);
if(!rhs_file.is_open())
{
std::cout << "Failed to open file " << rhsFileName << "\n";
return -1;
}
A_coo = ReSolve::io::readMatrixFromFile(mat_file);
A = new ReSolve::matrix::Csr(A_coo->getNumRows(),
A_coo->getNumColumns(),
A_coo->getNnz(),
A_coo->symmetric(),
A_coo->expanded());

rhs = ReSolve::io::readRhsFromFile(rhs_file);
x = new real_type[A->getNumRows()];
vec_rhs = new vector_type(A->getNumRows());
vec_x = new vector_type(A->getNumRows());
vec_x->allocate(ReSolve::memory::HOST);
//iinit guess is 0U
vec_x->allocate(ReSolve::memory::DEVICE);
vec_x->setToZero(ReSolve::memory::DEVICE);
vec_r = new vector_type(A->getNumRows());
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
mat_file.close();
rhs_file.close();

matrix_handler->coo2csr(A_coo,A, "hip");
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
//Now call direct solver
real_type norm_b;
matrix_handler->setValuesChanged(true, "hip");

Rf->setup(A);
FGMRES->setRestart(150);
FGMRES->setMaxit(2500);
FGMRES->setTol(1e-12);
FGMRES->setup(A);
GS->setup(FGMRES->getKrand(), FGMRES->getRestart());

//matrix_handler->setValuesChanged(true, "hip");
FGMRES->resetMatrix(A);
FGMRES->setupPreconditioner("LU", Rf);
FGMRES->setFlexible(1);

vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
FGMRES->solve(vec_rhs, vec_x);

norm_b = vector_handler->dot(vec_rhs, vec_rhs, "hip");
norm_b = sqrt(norm_b);
std::cout << "FGMRES: init nrm: "
<< std::scientific << std::setprecision(16)
<< FGMRES->getInitResidualNorm()/norm_b
<< " final nrm: "
<< FGMRES->getFinalResidualNorm()/norm_b
<< " iter: " << FGMRES->getNumIter() << "\n";


delete A;
delete A_coo;
delete Rf;
delete [] x;
delete [] rhs;
delete vec_r;
delete vec_x;
delete workspace_HIP;
delete matrix_handler;
delete vector_handler;

return 0;
}
128 changes: 128 additions & 0 deletions examples/r_randGMRES_CUDA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#include <string>
#include <iostream>
#include <iomanip>

#include <resolve/matrix/Coo.hpp>
#include <resolve/matrix/Csr.hpp>
#include <resolve/matrix/Csc.hpp>
#include <resolve/vector/Vector.hpp>
#include <resolve/matrix/io.hpp>
#include <resolve/matrix/MatrixHandler.hpp>
#include <resolve/vector/VectorHandler.hpp>
#include <resolve/LinSolverDirectKLU.hpp>
#include <resolve/LinSolverDirectCuSparseILU0.hpp>
#include <resolve/LinSolverIterativeRandFGMRES.hpp>
#include <resolve/workspace/LinAlgWorkspace.hpp>

using namespace ReSolve::constants;

int main(int argc, char *argv[])
{
// Use the same data types as those you specified in ReSolve build.
using real_type = ReSolve::real_type;
using vector_type = ReSolve::vector::Vector;

(void) argc; // TODO: Check if the number of input parameters is correct.
std::string matrixFileName = argv[1];
std::string rhsFileName = argv[2];


ReSolve::matrix::Coo* A_coo;
ReSolve::matrix::Csr* A;
ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA();
workspace_CUDA->initializeHandles();
ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA);
ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA);
real_type* rhs = nullptr;
real_type* x = nullptr;

vector_type* vec_rhs;
vector_type* vec_x;
vector_type* vec_r;

ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2);

ReSolve::LinSolverDirectCuSparseILU0* Rf = new ReSolve::LinSolverDirectCuSparseILU0(workspace_CUDA);
ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler,ReSolve::LinSolverIterativeRandFGMRES::fwht, GS, "cuda");

std::cout << std::endl << std::endl << std::endl;
std::cout << "========================================================================================================================"<<std::endl;
std::cout << "Reading: " << matrixFileName<< std::endl;
std::cout << "========================================================================================================================"<<std::endl;
std::cout << std::endl;
std::ifstream mat_file(matrixFileName);
if(!mat_file.is_open())
{
std::cout << "Failed to open file " << matrixFileName << "\n";
return -1;
}
std::ifstream rhs_file(rhsFileName);
if(!rhs_file.is_open())
{
std::cout << "Failed to open file " << rhsFileName << "\n";
return -1;
}
A_coo = ReSolve::io::readMatrixFromFile(mat_file);
A = new ReSolve::matrix::Csr(A_coo->getNumRows(),
A_coo->getNumColumns(),
A_coo->getNnz(),
A_coo->symmetric(),
A_coo->expanded());

rhs = ReSolve::io::readRhsFromFile(rhs_file);
x = new real_type[A->getNumRows()];
vec_rhs = new vector_type(A->getNumRows());
vec_x = new vector_type(A->getNumRows());
vec_x->allocate(ReSolve::memory::HOST);
//iinit guess is 0U
vec_x->allocate(ReSolve::memory::DEVICE);
vec_x->setToZero(ReSolve::memory::DEVICE);
vec_r = new vector_type(A->getNumRows());
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
mat_file.close();
rhs_file.close();

matrix_handler->coo2csr(A_coo,A, "cuda");
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
//Now call direct solver
real_type norm_b;
matrix_handler->setValuesChanged(true, "cuda");

Rf->setup(A, nullptr, nullptr, nullptr, nullptr, vec_rhs);
FGMRES->setRestart(800);
FGMRES->setMaxit(800);
FGMRES->setTol(1e-12);
FGMRES->setup(A);
GS->setup(FGMRES->getKrand(), FGMRES->getRestart());

//matrix_handler->setValuesChanged(true, "cuda");
FGMRES->resetMatrix(A);
FGMRES->setupPreconditioner("LU", Rf);
FGMRES->setFlexible(1);

vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
FGMRES->solve(vec_rhs, vec_x);

norm_b = vector_handler->dot(vec_rhs, vec_rhs, "cuda");
norm_b = sqrt(norm_b);
std::cout << "FGMRES: init nrm: "
<< std::scientific << std::setprecision(16)
<< FGMRES->getInitResidualNorm()/norm_b
<< " final nrm: "
<< FGMRES->getFinalResidualNorm()/norm_b
<< " iter: " << FGMRES->getNumIter() << "\n";


delete A;
delete A_coo;
delete Rf;
delete [] x;
delete [] rhs;
delete vec_r;
delete vec_x;
delete workspace_CUDA;
delete matrix_handler;
delete vector_handler;

return 0;
}
12 changes: 10 additions & 2 deletions resolve/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@ set(ReSolve_SRC
set(ReSolve_GPU_SRC
GramSchmidt.cpp
LinSolverIterativeFGMRES.cpp
)
LinSolverIterativeRandFGMRES.cpp
RandSketchingManager.cpp
RandSketchingCountSketch.cpp
RandSketchingFWHT.cpp
)

# C++ code that links to CUDA SDK libraries
set(ReSolve_CUDASDK_SRC
LinSolverDirectCuSolverGLU.cpp
LinSolverDirectCuSolverRf.cpp
LinSolverDirectCuSparseILU0.cpp
)
# HIP files
set(ReSolve_ROCM_SRC
LinSolverDirectRocSolverRf.cpp
LinSolverDirectRocSparseILU0.cpp
)
# Header files to be installed
set(ReSolve_HEADER_INSTALL
Expand All @@ -37,13 +43,15 @@ set(ReSolve_HEADER_INSTALL
LinSolverDirectCuSolverGLU.hpp
LinSolverDirectCuSolverRf.hpp
LinSolverDirectRocSolverRf.hpp
LinSolverDirectRocSparseILU0.hpp
Comment thread
kswirydo marked this conversation as resolved.
LinSolverDirectCuSparseILU0.hpp
Comment thread
pelesh marked this conversation as resolved.
LinSolverDirectKLU.hpp
LinSolverIterativeFGMRES.hpp
RefactorizationSolver.hpp
SystemSolver.hpp
GramSchmidt.hpp
MemoryUtils.hpp
)
RandSketchingManager.hpp)

# Now, build workspaces
add_subdirectory(workspace)
Expand Down
10 changes: 8 additions & 2 deletions resolve/GramSchmidt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,32 +151,38 @@ namespace ReSolve
vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace);
// V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol
vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace );

mem_.deviceSynchronize();

// copy H_col to aux, we will need it later
vec_Hcolumn_->setDataUpdated(memory::DEVICE);
vec_Hcolumn_->setCurrentSize(i + 1);
vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, memory::HOST);
mem_.deviceSynchronize();

//Hcol = V(:,1:i)^T*V(:,i+1);
vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace);
mem_.deviceSynchronize();

// V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol
vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace );
mem_.deviceSynchronize();

// copy H_col to H
vec_Hcolumn_->setDataUpdated(memory::DEVICE);
vec_Hcolumn_->deepCopyVectorData(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST);
mem_.deviceSynchronize();

// add both pieces together (unstable otherwise, careful here!!)
t = 0.0;
for(int j = 0; j <= i; ++j) {
H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j];
H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j];
}

t = vector_handler_->dot(vec_v_, vec_v_, memspace);
//set the last entry in Hessenberg matrix
t = sqrt(t);
H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t;

if(fabs(t) > EPSILON) {
t = 1.0/t;
vector_handler_->scal(&t, vec_v_, memspace);
Expand Down
Loading