From f0824d93756de53fcbb6779ac37c93917d839948 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Fri, 13 Feb 2026 16:45:57 -0500 Subject: [PATCH 01/15] Refactor Jacobian assembly in PowerElectronics --- .../LinearAlgebra/SparseMatrix/CMakeLists.txt | 17 +- .../LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 170 ++++ .../LinearAlgebra/SparseMatrix/CsrMatrix.cpp | 824 ------------------ .../LinearAlgebra/SparseMatrix/CsrMatrix.hpp | 762 +++++++++++++++- GridKit/Model/Evaluator.hpp | 11 +- .../PowerElectronics/CircuitComponent.hpp | 5 +- .../SystemModelPowerElectronics.hpp | 141 ++- GridKit/Solver/Dynamic/Ida.cpp | 70 +- GridKit/Solver/Dynamic/Ida.hpp | 11 + .../LinearAlgebra/SparseTest/SparseTest.cpp | 74 ++ 10 files changed, 1194 insertions(+), 891 deletions(-) delete mode 100644 GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp diff --git a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt index 56d5adc13..9d431895a 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -1,6 +1,13 @@ -gridkit_add_library(CsrMatrix - SOURCES - CsrMatrix.cpp - HEADERS +add_library(CsrMatrix INTERFACE) +target_include_directories(CsrMatrix INTERFACE + $ + $) + +add_library(GridKit::CsrMatrix ALIAS CsrMatrix) + +install( + FILES COO_Matrix.hpp - CsrMatrix.hpp) + CsrMatrix.hpp + DESTINATION + include/GridKit/LinearAlgebra/SparseMatrix) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 7f3671a22..9082f391f 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ namespace GridKit IdxT rows_size_; IdxT columns_size_; bool sorted_; + std::vector map2csr_; public: // Constructors @@ -55,6 +57,12 @@ namespace GridKit std::tuple, std::vector, std::vector> setDataToCSR(); std::vector getCSRRowData(); + /// Reinitialize COO matrix with new data + void resetEntries(std::vector r, std::vector c, std::vector v, IdxT m, IdxT n); + + /// Convert to CSR with deduplication, storing the COO-to-CSR mapping in map2csr_ + std::tuple, std::vector, std::vector> getCsrData(); + // Set values from vector storage. Will sort before storing void setValues(std::vector r, std::vector c, std::vector v); @@ -80,12 +88,14 @@ namespace GridKit IdxT nnz(); std::tuple getDimensions(); + std::vector getMapToCsr(); void printMatrix(std::string name = ""); void printMatrixMarket(const std::string& filename, const std::string& comment); static void sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values); + static void sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values, std::vector& map); private: IdxT indexStartRow(const std::vector& rows, IdxT r); @@ -189,6 +199,101 @@ namespace GridKit return {row_size_vec, this->column_indices_, this->values_}; } + /** + * @brief Reinitialize the COO matrix with new data. + * + * @tparam RealT - Real type for Jacobian entries + * @tparam IdxT - Integer data type for matrix indices + * + * @param[in] r row indices + * @param[in] c column indices + * @param[in] v values + * @param[in] m number of rows + * @param[in] n number of columns + */ + template + void COO_Matrix::resetEntries(std::vector r, std::vector c, std::vector v, IdxT m, IdxT n) + { + this->values_ = v; + this->row_indices_ = r; + this->column_indices_ = c; + this->rows_size_ = m; + this->columns_size_ = n; + this->sorted_ = false; // Set to false until explicitly sorted, though logically it is sorted. + } + + /** + * @brief Convert to CSR format with deduplication and index mapping. + * + * 1. Sorts the COO entries + * 2. Deduplicates entries by summing their values + * 3. Builds CSR row pointers + * 4. Stores the mapping from original COO indices to deduplicated CSR value indices + * + * @tparam RealT - Real type for Jacobian entries + * @tparam IdxT - Integer data type for matrix indices + * + * @return std::tuple, std::vector, std::vector> + */ + template + std::tuple, std::vector, std::vector> COO_Matrix::getCsrData() + { + const IdxT nnz_dup = static_cast(row_indices_.size()); + + // Sort the entries while preserving the mapping + std::vector map2sorted(nnz_dup); + sortSparseCOO(row_indices_, column_indices_, values_, map2sorted); + + // Deduplicate entries by summing values + std::vector row_ptrs(rows_size_ + 1, 0); + std::vector map2dedup(nnz_dup); + + map2dedup[0] = 0; + row_ptrs[row_indices_[0] + 1]++; + + // Write position + IdxT w = 0; + + for (IdxT i = 1; i < nnz_dup; ++i) + { + if (row_indices_[i] == row_indices_[w] && column_indices_[i] == column_indices_[w]) + { + values_[w] += values_[i]; + map2dedup[i] = w; + } + else + { + ++w; + row_indices_[w] = row_indices_[i]; + column_indices_[w] = column_indices_[i]; + values_[w] = values_[i]; + map2dedup[i] = w; + row_ptrs[row_indices_[w] + 1]++; + } + } + IdxT nnz_dedup = w + 1; + + // Cumulative sum for row pointers + for (IdxT i = 0; i < rows_size_; ++i) + { + row_ptrs[i + 1] += row_ptrs[i]; + } + + // map2csr_: original COO index -> deduplicated CSR index + map2csr_.resize(nnz_dup); + for (IdxT k = 0; k < nnz_dup; ++k) + { + map2csr_[map2sorted[k]] = map2dedup[k]; + } + + // Shrink internal arrays to deduplicated size + row_indices_.resize(nnz_dedup); + column_indices_.resize(nnz_dedup); + values_.resize(nnz_dedup); + + return {row_ptrs, column_indices_, values_}; + } + /** * @brief Only creates the row data * @@ -634,6 +739,12 @@ namespace GridKit return std::tuple(this->rows_size_, this->columns_size_); } + template + inline std::vector COO_Matrix::getMapToCsr() + { + return map2csr_; + } + /** * @brief Print matrix in sorted order * @@ -908,6 +1019,65 @@ namespace GridKit } } + /** + * @brief Sorts unordered COO matrix + * + * Matrix entries can appear in arbitrary order and will be sorted in + * row-major order before the method returns. + * Duplicate entries are not allowed and should be pre-summed. + * + * @pre rows, columns, and values are of the same size and represent a COO matrix with no duplicates + * @post Matrix entries are sorted in row-major order + * + * @todo simple setup. Should add stable sorting since lists are pre-sorted_ + * + * @tparam RealT - Real type for Jacobian entries + * @tparam IdxT - Integer data type for matrix indices + * + * @param rows + * @param columns + * @param values + */ + template + inline void COO_Matrix::sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values, std::vector& map) + { + + // index based sort code + // https://stackoverflow.com/questions/25921706/creating-a-vector-of-indices-of-a-sorted_-vector + // cannot call sort since two arrays are used instead + std::vector ordervec(rows.size()); + std::size_t n(0); + std::generate(std::begin(ordervec), std::end(ordervec), [&] + { return n++; }); + + // Sort by row first then column. + std::sort(std::begin(ordervec), + std::end(ordervec), + [&](auto i1, auto i2) + { return (rows[i1] < rows[i2]) || (rows[i1] == rows[i2] && columns[i1] < columns[i2]); }); + + // Preserve the mapping + map.resize(ordervec.size()); + std::copy(ordervec.begin(), ordervec.end(), map.begin()); + + // reorder based of index-sorting. Only swap cost no extra memory. + // @todo see if extra memory creation is fine + // https://stackoverflow.com/a/22183350 + for (size_t i = 0; i < ordervec.size(); i++) + { + // permutation swap + while (ordervec[i] != ordervec[ordervec[i]]) + { + std::swap(rows[ordervec[i]], rows[ordervec[ordervec[i]]]); + std::swap(columns[ordervec[i]], columns[ordervec[ordervec[i]]]); + std::swap(values[ordervec[i]], values[ordervec[ordervec[i]]]); + + // swap orderings + std::swap(ordervec[i], ordervec[ordervec[i]]); + } + } + } + /** * @brief Constructor for COO Matrix with given cooridnates and values * diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp deleted file mode 100644 index a1296c570..000000000 --- a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp +++ /dev/null @@ -1,824 +0,0 @@ -#include "CsrMatrix.hpp" - -#include -#include -#include - -namespace GridKit -{ - namespace LinearAlgebra - { - template - CsrMatrix::CsrMatrix() - { - } - - /** - * @brief basic constructor. It DOES NOT allocate any memory! - * - * @param[in] n - number of rows - * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros - */ - template - CsrMatrix::CsrMatrix(IdxT n, - IdxT m, - IdxT nnz) - : n_{n}, - m_{m}, - nnz_{nnz} - { - setNotUpdated(); - - // set everything to nullptr - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - } - - /** - * @brief Hijacking constructor - * - * @param[in] n - * @param[in] m - * @param[in] nnz - * @param[in,out] rows - * @param[in,out] cols - * @param[in,out] vals - * @param[in] memspace_src - * @param[in] memspace_dst - */ - template - CsrMatrix::CsrMatrix(IdxT n, - IdxT m, - IdxT nnz, - IdxT** rows, - IdxT** cols, - RealT** vals, - memory::MemorySpace memspace_src, - memory::MemorySpace memspace_dst) - : CsrMatrix(n, m, nnz) - { - int control = -1; - if ((memspace_src == memory::HOST) && (memspace_dst == memory::HOST)) - { - control = 0; - } - if ((memspace_src == memory::HOST) && (memspace_dst == memory::DEVICE)) - { - control = 1; - } - if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::HOST)) - { - control = 2; - } - if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::DEVICE)) - { - control = 3; - } - - switch (control) - { - case 0: // cpu->cpu - // Set host data - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - // Set device data to null - if (d_row_data_ || d_col_data_ || d_val_data_) - { - std::cerr << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - d_data_updated_ = false; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - // Set device data and copy it to host - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - syncData(memspace_dst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - // Set host data and copy it to device - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - syncData(memspace_dst); - - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - // Set device data - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - // Set host data to null - if (h_row_data_ || h_col_data_ || h_val_data_) - { - std::cerr << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - h_data_updated_ = false; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - std::cerr << "CsrMatrix constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; - } - } - - template - CsrMatrix::~CsrMatrix() - { - this->destroyMatrixData(memory::HOST); - this->destroyMatrixData(memory::DEVICE); - } - - /** - * @brief set the matrix update flags to false (for both HOST and DEVICE). - */ - template - void CsrMatrix::setNotUpdated() - { - h_data_updated_ = false; - d_data_updated_ = false; - } - - /** - * @brief get number of matrix rows - * - * @return number of matrix rows. - */ - template - IdxT CsrMatrix::getNumRows() - { - return this->n_; - } - - /** - * @brief get number of matrix columns - * - * @return number of matrix columns. - */ - template - IdxT CsrMatrix::getNumColumns() - { - return this->m_; - } - - /** - * @brief get number of non-zeros in the matrix. - * - * @return number of non-zeros. - */ - template - IdxT CsrMatrix::getNnz() - { - return this->nnz_; - } - - /** - * @brief Set number of non-zeros. - * - * @param[in] nnz_new - new number of non-zeros - */ - template - void CsrMatrix::setNnz(IdxT nnz_new) - { - this->nnz_ = nnz_new; - } - - /** - * @brief Tags `memspace` as updated. - * - * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" - * - * @return 0 if successful, -1 if not. - * - * The method sets the boolean flag indicating that the `memspace` is updated. - * It automatically sets the other data mirror to non-updated. You would - * use this function if you update matrix data by accessing its raw pointers. - * In such case, the matrix has no way of knowing which data is most recent, so - * you have to tell it. - * - * @warning This is an expert-level function. Use only if you know what you are - * doing. - * - * @note If you want to set both DEVICE and HOST memory to the same value - * use syncData function. - */ - template - int CsrMatrix::setUpdated(memory::MemorySpace memspace) - { - using namespace memory; - switch (memspace) - { - case HOST: - h_data_updated_ = true; - d_data_updated_ = false; - break; - case DEVICE: - d_data_updated_ = true; - h_data_updated_ = false; - break; - } - return 0; - } - - /** - * @brief Set the pointers for matrix row, column, value data. - * - * Useful if interfacing with other codes - this function only assigns - * pointers, but it does not allocate nor copy anything. The data ownership - * flags would be set to false (default). - * - * @param[in] row_data - pointer to row data (array of integers) - * @param[in] col_data - pointer to column data (array of integers) - * @param[in] val_data - pointer to value data (array of real numbers) - * @param[in] memspace - memory space (HOST or DEVICE) of incoming data - * - * @return 0 if successful, 1 if not. - */ - template - int CsrMatrix::setDataPointers(IdxT* row_data, - IdxT* col_data, - RealT* val_data, - memory::MemorySpace memspace) - { - using namespace memory; - - setNotUpdated(); - - switch (memspace) - { - case HOST: - if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) - { - std::cerr << "Trying to set matrix host data, but the data already set!\n"; - std::cerr << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_cpu_values_ && h_val_data_) - { - std::cerr << "Trying to set matrix host values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_row_data_ = row_data; - h_col_data_ = col_data; - h_val_data_ = val_data; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) - { - std::cerr << "Trying to set matrix host data, but the data already set!\n"; - std::cerr << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_gpu_values_ && d_val_data_) - { - std::cerr << "Trying to set matrix device values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_row_data_ = row_data; - d_col_data_ = col_data; - d_val_data_ = val_data; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - break; - } - return 0; - } - - /** - * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it - * (will attempt to destroy all three arrays). - * - * @param[in] memspace - memory space (HOST or DEVICE) of incoming data - * - * @return 0 if successful, -1 if not. - * - */ - template - int CsrMatrix::destroyMatrixData(memory::MemorySpace memspace) - { - using namespace memory; - switch (memspace) - { - case HOST: - if (owns_cpu_sparsity_pattern_) - { - delete[] h_row_data_; - delete[] h_col_data_; - h_row_data_ = nullptr; - h_col_data_ = nullptr; - } - if (owns_cpu_values_) - { - delete[] h_val_data_; - h_val_data_ = nullptr; - } - return 0; - case DEVICE: - if (owns_gpu_sparsity_pattern_) - { - mem_.deleteOnDevice(d_row_data_); - mem_.deleteOnDevice(d_col_data_); - d_row_data_ = nullptr; - d_col_data_ = nullptr; - } - if (owns_gpu_values_) - { - mem_.deleteOnDevice(d_val_data_); - d_val_data_ = nullptr; - } - return 0; - default: - return -1; - } - } - - /** - * @brief updata matrix values using the _new_values_ provided either as HOST or as DEVICE array. - * - * This function will copy the data (not just assign a pointer) and allocate if needed. - * It also sets ownership and update flags. - * - * @param[in] new_vals - pointer to new values data (array of real numbers) - * @param[in] memspace_in - memory space (HOST or DEVICE) of _new_vals_ - * @param[in] memspace_out - memory space (HOST or DEVICE) of matrix values to be updated. - * - * @return 0 if successful, -1 if not. - */ - template - int CsrMatrix::copyValues(const RealT* new_vals, - memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - - IdxT nnz_current = nnz_; - // four cases (for now) - setNotUpdated(); - int control = -1; - if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) - { - control = 0; - } - if ((memspace_in == memory::HOST) && (memspace_out == memory::DEVICE)) - { - control = 1; - } - if ((memspace_in == memory::DEVICE) && (memspace_out == memory::HOST)) - { - control = 2; - } - if ((memspace_in == memory::DEVICE) && (memspace_out == memory::DEVICE)) - { - control = 3; - } - - if (memspace_out == memory::HOST) - { - // check if cpu data allocated - if (h_val_data_ == nullptr) - { - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - owns_cpu_values_ = true; - } - } - - if (memspace_out == memory::DEVICE) - { - // check if cuda data allocated - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_values_ = true; - } - } - - switch (control) - { - case 0: // cpu->cpu - mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 2: // cuda->cpu - mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 1: // cpu->cuda - mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - case 3: // cuda->cuda - mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; - } - return 0; - } - - /** - * @brief updata matrix values using the _new_values_ provided either as - * HOST or as DEVICE array. - * - * This function only assigns a pointer, but does not copy. It sets update - * flags. - * - * @param[in] new_vals - pointer to new values data (array of real numbers) - * @param[in] memspace - memory space (HOST or DEVICE) of _new_vals_ - * - * @return 0 if successful, -1 if not. - */ - template - int CsrMatrix::setValuesPointer(RealT* new_vals, - memory::MemorySpace memspace) - { - using namespace memory; - setNotUpdated(); - - switch (memspace) - { - case HOST: - if (owns_cpu_values_ && h_val_data_) - { - std::cerr << "Trying to set matrix host values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_val_data_ = new_vals; - h_data_updated_ = true; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_values_ && d_val_data_) - { - std::cerr << "Trying to set matrix device values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_val_data_ = new_vals; - d_data_updated_ = true; - owns_gpu_values_ = false; - break; - default: - return -1; - } - return 0; - } - - template - IdxT* CsrMatrix::getRowData(memory::MemorySpace memspace) - { - using namespace memory; - - switch (memspace) - { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; - } - } - - template - IdxT* CsrMatrix::getColData(memory::MemorySpace memspace) - { - using namespace memory; - - switch (memspace) - { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; - } - } - - template - RealT* CsrMatrix::getValues(memory::MemorySpace memspace) - { - using namespace memory; - - switch (memspace) - { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; - } - } - - template - int CsrMatrix::copyDataFrom(const IdxT* row_data, - const IdxT* col_data, - const RealT* val_data, - memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - // four cases (for now) - IdxT nnz_current = nnz_; - setNotUpdated(); - int control = -1; - if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) - { - control = 0; - } - if ((memspace_in == memory::HOST) && ((memspace_out == memory::DEVICE))) - { - control = 1; - } - if (((memspace_in == memory::DEVICE)) && (memspace_out == memory::HOST)) - { - control = 2; - } - if (((memspace_in == memory::DEVICE)) && ((memspace_out == memory::DEVICE))) - { - control = 3; - } - - if (memspace_out == memory::HOST) - { - // check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; - this->h_col_data_ = new IdxT[static_cast(nnz_current)]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - owns_cpu_values_ = true; - } - } - - if (memspace_out == memory::DEVICE) - { - // check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of device row or column data is null!\n"); - - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - owns_gpu_values_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - } - } - - // copy - switch (control) - { - case 0: // cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2: // gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1: // cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3: // gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; - } - return 0; - } - - template - int CsrMatrix::copyDataFrom(const IdxT* row_data, - const IdxT* col_data, - const RealT* val_data, - IdxT new_nnz, - memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - destroyMatrixData(memspace_out); - nnz_ = new_nnz; - return copyDataFrom(row_data, col_data, val_data, memspace_in, memspace_out); - } - - template - int CsrMatrix::allocateMatrixData(memory::MemorySpace memspace) - { - IdxT nnz_current = nnz_; - destroyMatrixData(memspace); // just in case - - if (memspace == memory::HOST) - { - this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; - std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); - this->h_col_data_ = new IdxT[static_cast(nnz_current)]; - std::fill(h_col_data_, h_col_data_ + nnz_current, 0); - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - return 0; - } - - if (memspace == memory::DEVICE) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - return 0; - } - return -1; - } - - /** - * @brief Sync data in memspace with the updated memory space. - * - * @param memspace - memory space to be synced up (HOST or DEVICE) - * @return int - 0 if successful, error code otherwise - * - * @pre The memory space other than `memspace` must be up-to-date. Otherwise, - * this function will return an error. - * - * @see CsrMatrix::setUpdated - */ - template - int CsrMatrix::syncData(memory::MemorySpace memspace) - { - using namespace memory; - - switch (memspace) - { - case HOST: - // check if we need to copy or not - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync host with device, but device is out of date!\n" - << "See CsrMatrix::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - h_row_data_ = new IdxT[static_cast(n_ + 1)]; - h_col_data_ = new IdxT[static_cast(nnz_)]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - h_val_data_ = new RealT[static_cast(nnz_)]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync device with host, but host is out of date!\n" - << "See CsrMatrix::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: - return 1; - } // switch - } - - /** - * @brief Prints matrix data. - * - * @param out - Output stream where the matrix data is printed - */ - template - void CsrMatrix::print(std::ostream& out, IdxT indexing_base) - { - out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for (IdxT i = 0; i < n_; ++i) - { - for (IdxT j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) - { - out << i + indexing_base << " " - << h_col_data_[j] + indexing_base << " " - << h_val_data_[j] << "\n"; - } - } - } - - template class CsrMatrix; - template class CsrMatrix; - } // namespace LinearAlgebra -} // namespace GridKit diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp index 44d35accb..04a808b25 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp @@ -1,8 +1,11 @@ #pragma once +#include #include #include +#include #include +#include #include @@ -15,10 +18,51 @@ namespace GridKit class CsrMatrix { public: - CsrMatrix(); + CsrMatrix() + { + } - CsrMatrix(IdxT n, IdxT m, IdxT nnz); + /** + * @brief basic constructor. It DOES NOT allocate any memory! + * + * @param[in] n - number of rows + * @param[in] m - number of columns + * @param[in] nnz - number of non-zeros + */ + CsrMatrix(IdxT n, IdxT m, IdxT nnz) + : n_{n}, + m_{m}, + nnz_{nnz} + { + setNotUpdated(); + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + } + + /** + * @brief Hijacking constructor + * + * @param[in] n + * @param[in] m + * @param[in] nnz + * @param[in,out] rows + * @param[in,out] cols + * @param[in,out] vals + * @param[in] memspace_src + * @param[in] memspace_dst + */ CsrMatrix(IdxT n, IdxT m, IdxT nnz, @@ -26,55 +70,719 @@ namespace GridKit IdxT** cols, RealT** vals, memory::MemorySpace memspace_src = memory::HOST, - memory::MemorySpace memspace_dst = memory::HOST); + memory::MemorySpace memspace_dst = memory::HOST) + : CsrMatrix(n, m, nnz) + { + int control = -1; + if ((memspace_src == memory::HOST) && (memspace_dst == memory::HOST)) + { + control = 0; + } + if ((memspace_src == memory::HOST) && (memspace_dst == memory::DEVICE)) + { + control = 1; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::HOST)) + { + control = 2; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::DEVICE)) + { + control = 3; + } + + switch (control) + { + case 0: // cpu->cpu + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + if (d_row_data_ || d_col_data_ || d_val_data_) + { + std::cerr << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + d_data_updated_ = false; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + syncData(memspace_dst); + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + syncData(memspace_dst); + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + if (h_row_data_ || h_col_data_ || h_val_data_) + { + std::cerr << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + h_data_updated_ = false; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + std::cerr << "CsrMatrix constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; + } + } - ~CsrMatrix(); + ~CsrMatrix() + { + this->destroyMatrixData(memory::HOST); + this->destroyMatrixData(memory::DEVICE); + } // accessors - IdxT getNumRows(); - IdxT getNumColumns(); - IdxT getNnz(); - void setNnz(IdxT nnz_new); // for resetting when removing duplicates - int setUpdated(memory::MemorySpace what); + IdxT getNumRows() const + { + return n_; + } + + IdxT getNumColumns() const + { + return m_; + } + + IdxT getNnz() const + { + return nnz_; + } + + IdxT getRows() const + { + return n_; + } + + IdxT getCols() const + { + return m_; + } - IdxT* getRowData(memory::MemorySpace memspace = memory::HOST); - IdxT* getColData(memory::MemorySpace memspace = memory::HOST); - RealT* getValues(memory::MemorySpace memspace = memory::HOST); + const IdxT* getRowPtr() const + { + return h_row_data_; + } + + const IdxT* getColInd() const + { + return h_col_data_; + } + + const RealT* getValues() const + { + return h_val_data_; + } + + void setNnz(IdxT nnz_new) + { + nnz_ = nnz_new; + } + + /** + * @brief Tags `memspace` as updated. + * + * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" + * + * @return 0 if successful, -1 if not. + * + * @warning This is an expert-level function. Use only if you know what you + * are doing. + */ + int setUpdated(memory::MemorySpace memspace) + { + using namespace memory; + switch (memspace) + { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: + d_data_updated_ = true; + h_data_updated_ = false; + break; + } + return 0; + } + + IdxT* getRowData(memory::MemorySpace memspace = memory::HOST) + { + using namespace memory; + switch (memspace) + { + case HOST: + return h_row_data_; + case DEVICE: + return d_row_data_; + default: + return nullptr; + } + } + + IdxT* getColData(memory::MemorySpace memspace = memory::HOST) + { + using namespace memory; + switch (memspace) + { + case HOST: + return h_col_data_; + case DEVICE: + return d_col_data_; + default: + return nullptr; + } + } + + RealT* getValues(memory::MemorySpace memspace = memory::HOST) + { + using namespace memory; + switch (memspace) + { + case HOST: + return h_val_data_; + case DEVICE: + return d_val_data_; + default: + return nullptr; + } + } int copyDataFrom(const IdxT* row_data, const IdxT* col_data, const RealT* val_data, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out); + memory::MemorySpace memspace_out) + { + IdxT nnz_current = nnz_; + setNotUpdated(); + int control = -1; + if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) + { + control = 0; + } + if ((memspace_in == memory::HOST) && ((memspace_out == memory::DEVICE))) + { + control = 1; + } + if (((memspace_in == memory::DEVICE)) && (memspace_out == memory::HOST)) + { + control = 2; + } + if (((memspace_in == memory::DEVICE)) && ((memspace_out == memory::DEVICE))) + { + control = 3; + } + + if (memspace_out == memory::HOST) + { + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; + this->h_col_data_ = new IdxT[static_cast(nnz_current)]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + owns_cpu_values_ = true; + } + } + + if (memspace_out == memory::DEVICE) + { + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of device row or column data is null!\n"); + + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + owns_gpu_values_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_sparsity_pattern_ = true; + } + } + + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; + } + return 0; + } + int copyDataFrom(const IdxT* row_data, const IdxT* col_data, const RealT* val_data, IdxT new_nnz, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out); + memory::MemorySpace memspace_out) + { + destroyMatrixData(memspace_out); + nnz_ = new_nnz; + return copyDataFrom(row_data, col_data, val_data, memspace_in, memspace_out); + } + + /** + * @brief Build CSR from sorted/deduplicated COO entries. + * + * Copies the provided row pointers, column indices, and values + * into the host arrays of this matrix. Allocates host data if needed. + * + * @param[in] row_data - Row pointers (size n_ + 1) + * @param[in] col_data - Column indices (size nnz_) + * @param[in] val_data - Values (size nnz_) + */ + void addEntries(const IdxT* row_data, const IdxT* col_data, const RealT* val_data) + { + for (IdxT i = 0; i < n_ + 1; ++i) + { + h_row_data_[i] = row_data[i]; + } + for (IdxT k = 0; k < nnz_; ++k) + { + h_col_data_[k] = col_data[k]; + h_val_data_[k] = val_data[k]; + } + h_data_updated_ = true; + d_data_updated_ = false; + } - int allocateMatrixData(memory::MemorySpace memspace); + int allocateMatrixData(memory::MemorySpace memspace) + { + IdxT nnz_current = nnz_; + destroyMatrixData(memspace); + + if (memspace == memory::HOST) + { + this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; + std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); + this->h_col_data_ = new IdxT[static_cast(nnz_current)]; + std::fill(h_col_data_, h_col_data_ + nnz_current, 0); + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + return 0; + } + + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + return 0; + } + return -1; + } + + /** + * @brief Set the pointers for matrix row, column, value data. + * + * Useful if interfacing with other codes - this function only assigns + * pointers, but it does not allocate nor copy anything. + * + * @param[in] row_data - pointer to row data + * @param[in] col_data - pointer to column data + * @param[in] val_data - pointer to value data + * @param[in] memspace - memory space (HOST or DEVICE) of incoming data + * + * @return 0 if successful, 1 if not. + */ int setDataPointers(IdxT* row_data, IdxT* col_data, RealT* val_data, - memory::MemorySpace memspace); + memory::MemorySpace memspace) + { + using namespace memory; + + setNotUpdated(); - int destroyMatrixData(memory::MemorySpace memspace); + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_cpu_values_ && h_val_data_) + { + std::cerr << "Trying to set matrix host values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_row_data_ = row_data; + h_col_data_ = col_data; + h_val_data_ = val_data; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_gpu_values_ && d_val_data_) + { + std::cerr << "Trying to set matrix device values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_row_data_ = row_data; + d_col_data_ = col_data; + d_val_data_ = val_data; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + break; + } + return 0; + } - void print(std::ostream& file_out = std::cout, IdxT indexing_base = 0); + /** + * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it. + * + * @param[in] memspace - memory space (HOST or DEVICE) + * + * @return 0 if successful, -1 if not. + */ + int destroyMatrixData(memory::MemorySpace memspace) + { + using namespace memory; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_) + { + delete[] h_row_data_; + delete[] h_col_data_; + h_row_data_ = nullptr; + h_col_data_ = nullptr; + } + if (owns_cpu_values_) + { + delete[] h_val_data_; + h_val_data_ = nullptr; + } + return 0; + case DEVICE: + if (owns_gpu_sparsity_pattern_) + { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + d_row_data_ = nullptr; + d_col_data_ = nullptr; + } + if (owns_gpu_values_) + { + mem_.deleteOnDevice(d_val_data_); + d_val_data_ = nullptr; + } + return 0; + default: + return -1; + } + } - int syncData(memory::MemorySpace memspace_out); + /** + * @brief Prints matrix data. + * + * @param out - Output stream where the matrix data is printed + * @param indexing_base - base index for printing (default 0) + */ + void print(std::ostream& out = std::cout, IdxT indexing_base = 0) + { + out << std::scientific << std::setprecision(std::numeric_limits::digits10); + for (IdxT i = 0; i < n_; ++i) + { + for (IdxT j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) + { + out << i + indexing_base << " " + << h_col_data_[j] + indexing_base << " " + << h_val_data_[j] << "\n"; + } + } + } - // update Values just updates values; it allocates if necessary. - // values have the same dimensions between different formats + /** + * @brief Sync data in memspace with the updated memory space. + * + * @param memspace - memory space to be synced up (HOST or DEVICE) + * @return int - 0 if successful, error code otherwise + * + * @pre The memory space other than `memspace` must be up-to-date. + */ + int syncData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync host with device, but device is out of date!\n" + << "See CsrMatrix::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new IdxT[static_cast(n_ + 1)]; + h_col_data_ = new IdxT[static_cast(nnz_)]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new RealT[static_cast(nnz_)]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); + return 1; + } + if (!h_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync device with host, but host is out of date!\n" + << "See CsrMatrix::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; + } + } + + /** + * @brief Update matrix values by copying from new_vals. + * + * Allocates if needed. Sets ownership and update flags. + * + * @param[in] new_vals - pointer to new values data + * @param[in] memspace_in - memory space of new_vals + * @param[in] memspace_out - memory space of matrix values to update + * + * @return 0 if successful, -1 if not. + */ int copyValues(const RealT* new_vals, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out); + memory::MemorySpace memspace_out) + { + IdxT nnz_current = nnz_; + setNotUpdated(); + int control = -1; + if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) + { + control = 0; + } + if ((memspace_in == memory::HOST) && (memspace_out == memory::DEVICE)) + { + control = 1; + } + if ((memspace_in == memory::DEVICE) && (memspace_out == memory::HOST)) + { + control = 2; + } + if ((memspace_in == memory::DEVICE) && (memspace_out == memory::DEVICE)) + { + control = 3; + } - // set new values just sets the pointer, use caution. + if (memspace_out == memory::HOST) + { + if (h_val_data_ == nullptr) + { + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + owns_cpu_values_ = true; + } + } + + if (memspace_out == memory::DEVICE) + { + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_values_ = true; + } + } + + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 2: // cuda->cpu + mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->cuda + mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + case 3: // cuda->cuda + mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; + } + return 0; + } + + /** + * @brief Set values pointer (does not copy). Use caution. + * + * @param[in] new_vals - pointer to new values data + * @param[in] memspace - memory space (HOST or DEVICE) of new_vals + * + * @return 0 if successful, -1 if not. + */ int setValuesPointer(RealT* new_vals, - memory::MemorySpace memspace); + memory::MemorySpace memspace) + { + using namespace memory; + setNotUpdated(); + + switch (memspace) + { + case HOST: + if (owns_cpu_values_ && h_val_data_) + { + std::cerr << "Trying to set matrix host values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_val_data_ = new_vals; + h_data_updated_ = true; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_values_ && d_val_data_) + { + std::cerr << "Trying to set matrix device values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_val_data_ = new_vals; + d_data_updated_ = true; + owns_gpu_values_ = false; + break; + default: + return -1; + } + return 0; + } private: IdxT n_{0}; ///< number of rows @@ -93,7 +801,11 @@ namespace GridKit RealT* d_val_data_{nullptr}; ///< value data (DEVICE) bool d_data_updated_{false}; ///< DEVICE update flag - void setNotUpdated(); + void setNotUpdated() + { + h_data_updated_ = false; + d_data_updated_ = false; + } // Data ownership flags bool owns_cpu_sparsity_pattern_{false}; ///< for row/col data diff --git a/GridKit/Model/Evaluator.hpp b/GridKit/Model/Evaluator.hpp index 93c812ffc..1a60dbe84 100644 --- a/GridKit/Model/Evaluator.hpp +++ b/GridKit/Model/Evaluator.hpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -19,8 +20,9 @@ namespace GridKit class Evaluator { public: - using RealT = typename GridKit::ScalarTraits::RealT; - using MatrixT = GridKit::LinearAlgebra::COO_Matrix; //\todo Use CsrMatrix + using RealT = typename GridKit::ScalarTraits::RealT; + using MatrixT = GridKit::LinearAlgebra::COO_Matrix; //\todo Use CsrMatrix + using CsrMatrix = GridKit::LinearAlgebra::CsrMatrix; Evaluator() { @@ -82,6 +84,11 @@ namespace GridKit { } + virtual const CsrMatrix* getCsrJacobian() const + { + return nullptr; + } + /** * @brief Is the Jacobian defined. Used in IDA to determine wether DQ is used or not * diff --git a/GridKit/Model/PowerElectronics/CircuitComponent.hpp b/GridKit/Model/PowerElectronics/CircuitComponent.hpp index cbf8a6135..d83b7a078 100644 --- a/GridKit/Model/PowerElectronics/CircuitComponent.hpp +++ b/GridKit/Model/PowerElectronics/CircuitComponent.hpp @@ -18,8 +18,9 @@ namespace GridKit class CircuitComponent : public Model::Evaluator { public: - using RealT = typename Model::Evaluator::RealT; - using MatrixT = typename Model::Evaluator::MatrixT; + using RealT = typename Model::Evaluator::RealT; + using MatrixT = typename Model::Evaluator::MatrixT; + using CsrMatrix = typename Model::Evaluator::CsrMatrix; CircuitComponent() = default; diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index f5796fc88..f5d636869 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -2,14 +2,17 @@ #pragma once +#include #include #include #include #include #include +#include +#include #include -#include +#include #include namespace GridKit @@ -60,7 +63,10 @@ namespace GridKit class PowerElectronicsModel : public CircuitComponent { using RealT = typename CircuitComponent::RealT; + using MatrixT = typename CircuitComponent::MatrixT; + using CsrMatrix = typename CircuitComponent::CsrMatrix; using component_type = CircuitComponent; + using node_type = CircuitNode; using CircuitComponent::size_; using CircuitComponent::nnz_; @@ -123,16 +129,22 @@ namespace GridKit virtual ~PowerElectronicsModel() { for (auto comp : this->components_) + { delete comp; + } + delete csr_jac_; } /** - * @brief allocator default + * @brief Allocate the system by computing total size from nodes and + * component internal variables. * - * @todo this should throw an exception as no allocation without a graph is allowed. - * Or needs to be removed from the base class + * Each node contributes 1 unknown (its voltage). Components that have + * internal variables (mapped to ground via INVALID_INDEX) are not + * counted here; they must still be wired to a global index via + * setExternalConnectionNodes. * - * @return int + * @return int 0 if successful */ int allocate() final { @@ -163,11 +175,17 @@ namespace GridKit /** * @brief Allocate the vector data with size amount - * @todo Add capability to go through component model connection to get the size of the actual vector * - * @param[in] s size of the vector + * Assembles the global COO from all component Jacobians, sorts and + * deduplicates the entries to build a CSR sparsity pattern, and + * distributes the mapping back to each component for efficient + * value updates during subsequent Jacobian evaluations. + * + * @param[in] s size of the vector (total number of unknowns) * * @post System model vectors allocated with size s + * @post CSR Jacobian sparsity pattern established + * @post Per-component CSR mappings computed * * @return int 0 if successful, positive if there's a recoverable error, negative if unrecoverable */ @@ -185,6 +203,66 @@ namespace GridKit yp_.resize(size_); f_.resize(size_); + // Evaluate component Jacobians to get sparsity + distributeVectors(); + for (const auto& component : components_) + { + component->evaluateJacobian(); + } + + // Count the number of non-zeros + IdxT nnz_dup = 0; + for (const auto& component : components_) + { + std::tuple&, std::vector&, std::vector&> entries = component->getJacobian().getEntries(); + const auto& [r, c, v] = entries; + + for (IdxT i = 0; i < static_cast(r.size()); ++i) + { + if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) + { + ++nnz_dup; + } + } + } + + // Allocate COO entries + std::vector rows(static_cast(nnz_dup)); + std::vector cols(static_cast(nnz_dup)); + std::vector vals(static_cast(nnz_dup)); + + IdxT counter = 0; + for (const auto& component : components_) + { + std::tuple&, std::vector&, std::vector&> entries = component->getJacobian().getEntries(); + const auto& [r, c, v] = entries; + + for (IdxT i = 0; i < static_cast(r.size()); ++i) + { + if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) + { + rows[counter] = component->getNodeConnection(r[i]); + cols[counter] = component->getNodeConnection(c[i]); + vals[counter] = v[i]; + counter++; + } + } + } + + // Build a COO from the collected entries + jac_.resetEntries(rows, cols, vals, size_, size_); + + // Extract CSR data + std::tuple, std::vector, std::vector> csrdata = jac_.getCsrData(); + const auto& [p, c, v] = csrdata; + + nnz_ = static_cast(v.size()); + + // Instantiate CSR Jacobian + csr_jac_ = new CsrMatrix(size_, size_, nnz_); + csr_jac_->allocateMatrixData(GridKit::LinearAlgebra::memory::HOST); + csr_jac_->addEntries(p.data(), c.data(), v.data()); + return 0; } @@ -282,46 +360,44 @@ namespace GridKit } /** - * @brief Creates the Sparse COO Jacobian representing \alpha dF/dy' + dF/dy + * @brief Creates the system Jacobian representing \alpha dF/dy' + dF/dy + * + * Updates the CSR Jacobian values using the per-component mappings + * computed during allocate(). * * @return int 0 if successful, positive if there's a recoverable error, negative if unrecoverable */ int evaluateJacobian() final { - jac_.zeroMatrix(); distributeVectors(); - // Evaluate component jacs + // Zero out values + RealT* vals = csr_jac_->getValues(); + for (IdxT i = 0; i < csr_jac_->getNnz(); ++i) + { + vals[i] = 0.0; + } + + // Update CSR values from component Jacobians + IdxT counter = 0; for (const auto& component : components_) { component->evaluateJacobian(); - // get references to local jacobian - std::tuple&, std::vector&, std::vector&> tpm = component->getJacobian().getEntries(); - const auto& [r, c, v] = tpm; + std::tuple&, std::vector&, std::vector&> entries = component->getJacobian().getEntries(); + const auto& [r, c, v] = entries; - // Create copies of data to handle groundings - std::vector rgr; - std::vector cgr; - std::vector vgr; - for (IdxT i = 0; i < static_cast(r.size()); i++) + for (IdxT i = 0; i < static_cast(r.size()); ++i) { if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) { - rgr.push_back(component->getNodeConnection(r[i])); - cgr.push_back(component->getNodeConnection(c[i])); - vgr.push_back(v[i]); + vals[jac_.getMapToCsr()[counter]] += v[i]; + ++counter; } } - - // AXPY to Global Jacobian - // elementwise jac_(rgr, cgr) += vgr - jac_.axpy(1.0, rgr, cgr, vgr); } - // jac_.printMatrixMarket("ScaleMicrogrid_Jacobian_N2_number" + std::to_string(jac_call_count_) + ".mtx", "Jacobian N2 number " + std::to_string(jac_call_count_)); jac_call_count_++; - return 0; } @@ -330,7 +406,6 @@ namespace GridKit */ int evaluateIntegrand() final { - return 0; } @@ -398,12 +473,16 @@ namespace GridKit * @param[in] filename * @param[in] title */ - void printJacobianMatrixMarket(std::string filename, std::string title) { jac_.printMatrixMarket(filename, title); } + const CsrMatrix* getCsrJacobian() const override + { + return csr_jac_; + } + void addComponent(component_type* component) { components_.push_back(component); @@ -414,6 +493,10 @@ namespace GridKit std::vector components_; + CsrMatrix* csr_jac_{nullptr}; + + std::vector map2csr_; ///< Mapping from COO entry to CSR value index + int jac_call_count_{0}; bool use_jac_; diff --git a/GridKit/Solver/Dynamic/Ida.cpp b/GridKit/Solver/Dynamic/Ida.cpp index aec2d9b0c..7286e8a88 100644 --- a/GridKit/Solver/Dynamic/Ida.cpp +++ b/GridKit/Solver/Dynamic/Ida.cpp @@ -168,8 +168,19 @@ namespace AnalysisManager { int retval = 0; - sunindextype n = static_cast(model_->size()); - sunindextype nnz = static_cast((model_->getJacobian()).nnz()); + sunindextype n = static_cast(model_->size()); + sunindextype nnz; + + bool useCsrJac = (model_->getCsrJacobian() != nullptr); + + if (useCsrJac) + { + nnz = static_cast(model_->getCsrJacobian()->getNnz()); + } + else + { + nnz = static_cast((model_->getJacobian()).nnz()); + } JacobianMat_ = SUNSparseMatrix(n, n, @@ -184,7 +195,14 @@ namespace AnalysisManager retval = IDASetLinearSolver(solver_, linearSolver_, JacobianMat_); checkOutput(retval, "IDASetLinearSolver"); - retval = IDASetJacFn(solver_, this->Jac); + if (useCsrJac) + { + retval = IDASetJacFn(solver_, this->CsrJac); + } + else + { + retval = IDASetJacFn(solver_, this->Jac); + } checkOutput(retval, "IDASetJacFn"); return retval; @@ -804,6 +822,50 @@ namespace AnalysisManager return 0; } + /** + * @brief CSR Jacobian evaluation + * + * @tparam ScalarT + * @tparam IdxT + * + * @todo Update PhasorDynamics to use this implementation. + */ + template + int Ida::CsrJac(RealT t, RealT cj, N_Vector yy, N_Vector yp, N_Vector, SUNMatrix J, void* user_data, N_Vector, N_Vector, N_Vector) + { + GridKit::Model::Evaluator* model = static_cast*>(user_data); + + model->updateTime(t, cj); + copyVec(yy, model->y()); + copyVec(yp, model->yp()); + + model->evaluateJacobian(); + + using CsrMatrix = GridKit::LinearAlgebra::CsrMatrix; + const CsrMatrix* Jac = model->getCsrJacobian(); + + SUNMatZero(J); + + sunindextype* sun_rptr = SUNSparseMatrix_IndexPointers(J); + sunindextype* sun_cind = SUNSparseMatrix_IndexValues(J); + RealT* sun_vals = SUNSparseMatrix_Data(J); + + IdxT n = Jac->getRows(); + IdxT nnz = Jac->getNnz(); + + // Get reference to the jacobian entries + const IdxT* rptr = Jac->getRowPtr(); + const IdxT* cind = Jac->getColInd(); + const RealT* vals = Jac->getValues(); + + // Copy data from model jac to sundials + std::copy(rptr, rptr + n + 1, sun_rptr); + std::copy(cind, cind + nnz, sun_cind); + std::copy(vals, vals + nnz, sun_vals); + + return 0; + } + /** * @brief Integrand evaluation * @@ -972,7 +1034,7 @@ namespace AnalysisManager /** * @brief Accumulate another stats object into this one, allowing for stats to be kept - * across multiple simulations with IDA. + * across multiple simulations with IDAs */ IdaStats& IdaStats::operator+=(const IdaStats& other) { diff --git a/GridKit/Solver/Dynamic/Ida.hpp b/GridKit/Solver/Dynamic/Ida.hpp index 20ff56aa8..0c4c09bf1 100644 --- a/GridKit/Solver/Dynamic/Ida.hpp +++ b/GridKit/Solver/Dynamic/Ida.hpp @@ -151,6 +151,17 @@ namespace AnalysisManager N_Vector tmp2, N_Vector tmp3); + static int CsrJac(RealT t, + RealT cj, + N_Vector yy, + N_Vector yp, + N_Vector resvec, + SUNMatrix J, + void* user_data, + N_Vector tmp1, + N_Vector tmp2, + N_Vector tmp3); + static int Integrand(RealT t, N_Vector yy, N_Vector yp, diff --git a/examples/LinearAlgebra/SparseTest/SparseTest.cpp b/examples/LinearAlgebra/SparseTest/SparseTest.cpp index 2e5224c7b..22ac31f2c 100644 --- a/examples/LinearAlgebra/SparseTest/SparseTest.cpp +++ b/examples/LinearAlgebra/SparseTest/SparseTest.cpp @@ -98,5 +98,79 @@ int main() std::cout << "Failed!" << std::endl; } + std::vector v3 = {1.2, 1.8, 1.0, 1.6, 1.4, 1.7, 1.1, 1.5, 1.3}; + std::vector x3 = {1, 2, 0, 0, 2, 1, 0, 2, 1}; + std::vector y3 = {1, 2, 0, 0, 0, 1, 1, 2, 2}; + size_t m3 = 3; + size_t n3 = 3; + + GridKit::LinearAlgebra::COO_Matrix C; + C.resetEntries(x3, y3, v3, m3, n3); + + std::vector csr_r; + std::vector csr_c; + std::vector csr_v; + std::tie(csr_r, csr_c, csr_v) = C.getCsrData(); + + std::cout << "C after getCsrData:\n"; + C.printMatrix(); + + for (size_t i = 0; i < csr_r.size() - 1; i++) + { + std::cout << csr_r[i] << std::endl; + size_t rdiff = csr_r[i + 1] - csr_r[i]; + for (size_t j = 0; j < rdiff; j++) + { + std::cout << csr_c[j + csr_r[i]] << ", " << csr_v[j + csr_r[i]] << std::endl; + } + } + std::cout << csr_r[csr_r.size() - 1] << std::endl; + + // Basic Verification test + std::vector csr_rtest = {0, 2, 4, 6}; + std::vector csr_ctest = {0, 1, 1, 2, 0, 2}; + std::vector csr_valtest = {2.6, 1.1, 2.9, 1.3, 1.4, 3.3}; + std::vector maptest = {2, 5, 0, 0, 4, 2, 1, 5, 3}; + + std::vector map2csr = C.getMapToCsr(); + + assert(csr_rtest.size() == csr_r.size()); + assert(csr_ctest.size() == csr_c.size()); + assert(csr_valtest.size() == csr_v.size()); + assert(maptest.size() == map2csr.size()); + + for (size_t i = 0; i < csr_rtest.size(); i++) + { + if (csr_r[i] != csr_rtest[i]) + { + failval--; + } + } + for (size_t i = 0; i < csr_ctest.size(); i++) + { + double vdiff = csr_v[i] - csr_valtest[i]; + if (csr_c[i] != csr_ctest[i] || -1e-14 > vdiff || vdiff > 1e-14) + { + failval--; + } + } + + for (size_t i = 0; i < maptest.size(); i++) + { + if (map2csr[i] != maptest[i]) + { + failval--; + } + } + + if (failval == 0) + { + std::cout << "Success!" << std::endl; + } + else + { + std::cout << "Failed!" << std::endl; + } + return failval; } From 2456c7f801426248c535c3efbf089dad2c68b061 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Fri, 13 Feb 2026 16:49:06 -0500 Subject: [PATCH 02/15] Update CHANGELOG.md --- CHANGELOG.md | 1 + GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb388b870..e2753c5c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - Minor performance improvements to residual evaluation in PowerElectronics module. - Added full support for sparse Jacobians obtained with Enzyme in PhasorDynamics. - Added `Node` class to the PowerElectronics module to separate nodes from circuit components. +- Refactor Jacobian assembly in `PowerElectronics` module to reuse the CSR pattern. ## v0.1 - Refactored code to support adding different model families. diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 9082f391f..c65e35381 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -226,8 +226,8 @@ namespace GridKit * @brief Convert to CSR format with deduplication and index mapping. * * 1. Sorts the COO entries - * 2. Deduplicates entries by summing their values - * 3. Builds CSR row pointers + * 2. Deduplicates entries by summing their values + * 3. Builds CSR row pointers * 4. Stores the mapping from original COO indices to deduplicated CSR value indices * * @tparam RealT - Real type for Jacobian entries From 8873793072617af1e439929787a2d0479ba2981e Mon Sep 17 00:00:00 2001 From: Kakeru Date: Fri, 13 Feb 2026 17:25:00 -0500 Subject: [PATCH 03/15] Remove unnecessary include --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index c65e35381..14a28f0d7 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include From 0ff5e9a855a49715f3fa8d207e292cb46154ba98 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Fri, 13 Feb 2026 18:25:20 -0500 Subject: [PATCH 04/15] Fix install error --- GridKit/LinearAlgebra/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/GridKit/LinearAlgebra/CMakeLists.txt b/GridKit/LinearAlgebra/CMakeLists.txt index d290617be..3d324d485 100644 --- a/GridKit/LinearAlgebra/CMakeLists.txt +++ b/GridKit/LinearAlgebra/CMakeLists.txt @@ -1,3 +1,9 @@ add_subdirectory(SparseMatrix) add_subdirectory(DenseMatrix) + +install( + FILES + ${CMAKE_CURRENT_SOURCE_DIR}/MemoryUtils.hpp + DESTINATION + include/GridKit/LinearAlgebra) From 162209799b98387d29e5d9a58b10ddd694e2c153 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Fri, 13 Feb 2026 20:52:34 -0500 Subject: [PATCH 05/15] Small fix --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 4 ++-- .../Model/PowerElectronics/SystemModelPowerElectronics.hpp | 6 +++--- examples/LinearAlgebra/SparseTest/SparseTest.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 14a28f0d7..cebd71740 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -87,7 +87,7 @@ namespace GridKit IdxT nnz(); std::tuple getDimensions(); - std::vector getMapToCsr(); + std::vector& getMapToCsr(); void printMatrix(std::string name = ""); @@ -739,7 +739,7 @@ namespace GridKit } template - inline std::vector COO_Matrix::getMapToCsr() + inline std::vector& COO_Matrix::getMapToCsr() { return map2csr_; } diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index f5d636869..429370c11 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -378,6 +378,8 @@ namespace GridKit vals[i] = 0.0; } + std::vector& map2csr = jac_.getMapToCsr(); + // Update CSR values from component Jacobians IdxT counter = 0; for (const auto& component : components_) @@ -391,7 +393,7 @@ namespace GridKit { if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) { - vals[jac_.getMapToCsr()[counter]] += v[i]; + vals[map2csr[counter]] += v[i]; ++counter; } } @@ -495,8 +497,6 @@ namespace GridKit CsrMatrix* csr_jac_{nullptr}; - std::vector map2csr_; ///< Mapping from COO entry to CSR value index - int jac_call_count_{0}; bool use_jac_; diff --git a/examples/LinearAlgebra/SparseTest/SparseTest.cpp b/examples/LinearAlgebra/SparseTest/SparseTest.cpp index 22ac31f2c..a5e31d8b0 100644 --- a/examples/LinearAlgebra/SparseTest/SparseTest.cpp +++ b/examples/LinearAlgebra/SparseTest/SparseTest.cpp @@ -132,7 +132,7 @@ int main() std::vector csr_valtest = {2.6, 1.1, 2.9, 1.3, 1.4, 3.3}; std::vector maptest = {2, 5, 0, 0, 4, 2, 1, 5, 3}; - std::vector map2csr = C.getMapToCsr(); + std::vector& map2csr = C.getMapToCsr(); assert(csr_rtest.size() == csr_r.size()); assert(csr_ctest.size() == csr_c.size()); From 05ef0e955d4c8c8ef8369f151c44c370c351fe2b Mon Sep 17 00:00:00 2001 From: Kakeru Date: Sat, 14 Feb 2026 07:35:34 -0500 Subject: [PATCH 06/15] Fix comments --- .../LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 12 ++++++---- .../SystemModelPowerElectronics.hpp | 22 ++++++------------- GridKit/Solver/Dynamic/Ida.cpp | 7 ++++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index cebd71740..1c643c18c 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -1019,14 +1019,17 @@ namespace GridKit } /** - * @brief Sorts unordered COO matrix + * @brief Sorts unordered COO matrix with mapping * * Matrix entries can appear in arbitrary order and will be sorted in * row-major order before the method returns. - * Duplicate entries are not allowed and should be pre-summed. + * Duplicate entries are allowed here. * - * @pre rows, columns, and values are of the same size and represent a COO matrix with no duplicates - * @post Matrix entries are sorted in row-major order + * The mapping records where each sorted entry came from in the original + * arrays: sorted[k] comes from original index map[k]. + * + * @pre rows, columns, and values are of the same size and represent a COO matrix + * @post Matrix entries are sorted in row-major order and map stores original indices * * @todo simple setup. Should add stable sorting since lists are pre-sorted_ * @@ -1036,6 +1039,7 @@ namespace GridKit * @param rows * @param columns * @param values + * @param map */ template inline void COO_Matrix::sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values, std::vector& map) diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index 429370c11..a3060d88a 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -136,15 +136,12 @@ namespace GridKit } /** - * @brief Allocate the system by computing total size from nodes and - * component internal variables. + * @brief allocator default * - * Each node contributes 1 unknown (its voltage). Components that have - * internal variables (mapped to ground via INVALID_INDEX) are not - * counted here; they must still be wired to a global index via - * setExternalConnectionNodes. + * @todo this should throw an exception as no allocation without a graph is allowed. + * Or needs to be removed from the base class * - * @return int 0 if successful + * @return int */ int allocate() final { @@ -174,18 +171,13 @@ namespace GridKit } /** - * @brief Allocate the vector data with size amount - * - * Assembles the global COO from all component Jacobians, sorts and - * deduplicates the entries to build a CSR sparsity pattern, and - * distributes the mapping back to each component for efficient - * value updates during subsequent Jacobian evaluations. + * @brief Allocate system vectors and construct the system CSR Jacobian * * @param[in] s size of the vector (total number of unknowns) * * @post System model vectors allocated with size s - * @post CSR Jacobian sparsity pattern established - * @post Per-component CSR mappings computed + * @post CSR Jacobian sparsity pattern computed + * @post COO->CSR mapping computed * * @return int 0 if successful, positive if there's a recoverable error, negative if unrecoverable */ diff --git a/GridKit/Solver/Dynamic/Ida.cpp b/GridKit/Solver/Dynamic/Ida.cpp index 7286e8a88..80ae9e7f0 100644 --- a/GridKit/Solver/Dynamic/Ida.cpp +++ b/GridKit/Solver/Dynamic/Ida.cpp @@ -171,6 +171,7 @@ namespace AnalysisManager sunindextype n = static_cast(model_->size()); sunindextype nnz; + /// @todo Remove this flag and require all apps to configure CsrJac bool useCsrJac = (model_->getCsrJacobian() != nullptr); if (useCsrJac) @@ -823,7 +824,9 @@ namespace AnalysisManager } /** - * @brief CSR Jacobian evaluation + * @brief Jacobian evaluation + * + * @note The model Jacobian is stored in CSR format. * * @tparam ScalarT * @tparam IdxT @@ -1034,7 +1037,7 @@ namespace AnalysisManager /** * @brief Accumulate another stats object into this one, allowing for stats to be kept - * across multiple simulations with IDAs + * across multiple simulations with IDA */ IdaStats& IdaStats::operator+=(const IdaStats& other) { From 1129c52079ece4bc2871ceb6aab8ab4eabb447c3 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Mon, 16 Feb 2026 16:56:01 -0500 Subject: [PATCH 07/15] Rename some variables --- .../LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 68 +++++++++---------- .../SystemModelPowerElectronics.hpp | 4 +- GridKit/Solver/Dynamic/Ida.cpp | 18 ++--- .../LinearAlgebra/SparseTest/SparseTest.cpp | 6 +- 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 1c643c18c..16d792abb 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -36,7 +36,7 @@ namespace GridKit IdxT rows_size_; IdxT columns_size_; bool sorted_; - std::vector map2csr_; + std::vector map_to_csr_; public: // Constructors @@ -59,7 +59,7 @@ namespace GridKit /// Reinitialize COO matrix with new data void resetEntries(std::vector r, std::vector c, std::vector v, IdxT m, IdxT n); - /// Convert to CSR with deduplication, storing the COO-to-CSR mapping in map2csr_ + /// Convert to CSR with deduplication, storing the COO-to-CSR mapping in map_to_csr_ std::tuple, std::vector, std::vector> getCsrData(); // Set values from vector storage. Will sort before storing @@ -240,14 +240,14 @@ namespace GridKit const IdxT nnz_dup = static_cast(row_indices_.size()); // Sort the entries while preserving the mapping - std::vector map2sorted(nnz_dup); - sortSparseCOO(row_indices_, column_indices_, values_, map2sorted); + std::vector map_to_sorted(nnz_dup); + sortSparseCOO(row_indices_, column_indices_, values_, map_to_sorted); // Deduplicate entries by summing values std::vector row_ptrs(rows_size_ + 1, 0); - std::vector map2dedup(nnz_dup); + std::vector map_to_dedup(nnz_dup); - map2dedup[0] = 0; + map_to_dedup[0] = 0; row_ptrs[row_indices_[0] + 1]++; // Write position @@ -258,7 +258,7 @@ namespace GridKit if (row_indices_[i] == row_indices_[w] && column_indices_[i] == column_indices_[w]) { values_[w] += values_[i]; - map2dedup[i] = w; + map_to_dedup[i] = w; } else { @@ -266,7 +266,7 @@ namespace GridKit row_indices_[w] = row_indices_[i]; column_indices_[w] = column_indices_[i]; values_[w] = values_[i]; - map2dedup[i] = w; + map_to_dedup[i] = w; row_ptrs[row_indices_[w] + 1]++; } } @@ -278,11 +278,11 @@ namespace GridKit row_ptrs[i + 1] += row_ptrs[i]; } - // map2csr_: original COO index -> deduplicated CSR index - map2csr_.resize(nnz_dup); + // map_to_csr_: original COO index -> deduplicated CSR index + map_to_csr_.resize(nnz_dup); for (IdxT k = 0; k < nnz_dup; ++k) { - map2csr_[map2sorted[k]] = map2dedup[k]; + map_to_csr_[map_to_sorted[k]] = map_to_dedup[k]; } // Shrink internal arrays to deduplicated size @@ -741,7 +741,7 @@ namespace GridKit template inline std::vector& COO_Matrix::getMapToCsr() { - return map2csr_; + return map_to_csr_; } /** @@ -989,31 +989,31 @@ namespace GridKit // index based sort code // https://stackoverflow.com/questions/25921706/creating-a-vector-of-indices-of-a-sorted_-vector // cannot call sort since two arrays are used instead - std::vector ordervec(rows.size()); + std::vector order_vec(rows.size()); std::size_t n(0); - std::generate(std::begin(ordervec), std::end(ordervec), [&] + std::generate(std::begin(order_vec), std::end(order_vec), [&] { return n++; }); // Sort by row first then column. - std::sort(std::begin(ordervec), - std::end(ordervec), + std::sort(std::begin(order_vec), + std::end(order_vec), [&](auto i1, auto i2) { return (rows[i1] < rows[i2]) || (rows[i1] == rows[i2] && columns[i1] < columns[i2]); }); // reorder based of index-sorting. Only swap cost no extra memory. // @todo see if extra memory creation is fine // https://stackoverflow.com/a/22183350 - for (size_t i = 0; i < ordervec.size(); i++) + for (size_t i = 0; i < order_vec.size(); i++) { // permutation swap - while (ordervec[i] != ordervec[ordervec[i]]) + while (order_vec[i] != order_vec[order_vec[i]]) { - std::swap(rows[ordervec[i]], rows[ordervec[ordervec[i]]]); - std::swap(columns[ordervec[i]], columns[ordervec[ordervec[i]]]); - std::swap(values[ordervec[i]], values[ordervec[ordervec[i]]]); + std::swap(rows[order_vec[i]], rows[order_vec[order_vec[i]]]); + std::swap(columns[order_vec[i]], columns[order_vec[order_vec[i]]]); + std::swap(values[order_vec[i]], values[order_vec[order_vec[i]]]); // swap orderings - std::swap(ordervec[i], ordervec[ordervec[i]]); + std::swap(order_vec[i], order_vec[order_vec[i]]); } } } @@ -1048,35 +1048,35 @@ namespace GridKit // index based sort code // https://stackoverflow.com/questions/25921706/creating-a-vector-of-indices-of-a-sorted_-vector // cannot call sort since two arrays are used instead - std::vector ordervec(rows.size()); + std::vector order_vec(rows.size()); std::size_t n(0); - std::generate(std::begin(ordervec), std::end(ordervec), [&] + std::generate(std::begin(order_vec), std::end(order_vec), [&] { return n++; }); // Sort by row first then column. - std::sort(std::begin(ordervec), - std::end(ordervec), + std::sort(std::begin(order_vec), + std::end(order_vec), [&](auto i1, auto i2) { return (rows[i1] < rows[i2]) || (rows[i1] == rows[i2] && columns[i1] < columns[i2]); }); // Preserve the mapping - map.resize(ordervec.size()); - std::copy(ordervec.begin(), ordervec.end(), map.begin()); + map.resize(order_vec.size()); + std::copy(order_vec.begin(), order_vec.end(), map.begin()); // reorder based of index-sorting. Only swap cost no extra memory. // @todo see if extra memory creation is fine // https://stackoverflow.com/a/22183350 - for (size_t i = 0; i < ordervec.size(); i++) + for (size_t i = 0; i < order_vec.size(); i++) { // permutation swap - while (ordervec[i] != ordervec[ordervec[i]]) + while (order_vec[i] != order_vec[order_vec[i]]) { - std::swap(rows[ordervec[i]], rows[ordervec[ordervec[i]]]); - std::swap(columns[ordervec[i]], columns[ordervec[ordervec[i]]]); - std::swap(values[ordervec[i]], values[ordervec[ordervec[i]]]); + std::swap(rows[order_vec[i]], rows[order_vec[order_vec[i]]]); + std::swap(columns[order_vec[i]], columns[order_vec[order_vec[i]]]); + std::swap(values[order_vec[i]], values[order_vec[order_vec[i]]]); // swap orderings - std::swap(ordervec[i], ordervec[ordervec[i]]); + std::swap(order_vec[i], order_vec[order_vec[i]]); } } } diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index a3060d88a..3d16f7d20 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -370,7 +370,7 @@ namespace GridKit vals[i] = 0.0; } - std::vector& map2csr = jac_.getMapToCsr(); + std::vector& map_to_csr = jac_.getMapToCsr(); // Update CSR values from component Jacobians IdxT counter = 0; @@ -385,7 +385,7 @@ namespace GridKit { if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) { - vals[map2csr[counter]] += v[i]; + vals[map_to_csr[counter]] += v[i]; ++counter; } } diff --git a/GridKit/Solver/Dynamic/Ida.cpp b/GridKit/Solver/Dynamic/Ida.cpp index 80ae9e7f0..22e3560a8 100644 --- a/GridKit/Solver/Dynamic/Ida.cpp +++ b/GridKit/Solver/Dynamic/Ida.cpp @@ -849,22 +849,22 @@ namespace AnalysisManager SUNMatZero(J); - sunindextype* sun_rptr = SUNSparseMatrix_IndexPointers(J); - sunindextype* sun_cind = SUNSparseMatrix_IndexValues(J); - RealT* sun_vals = SUNSparseMatrix_Data(J); + sunindextype* sun_row_ptrs = SUNSparseMatrix_IndexPointers(J); + sunindextype* sun_colunm_indices = SUNSparseMatrix_IndexValues(J); + RealT* sun_values = SUNSparseMatrix_Data(J); IdxT n = Jac->getRows(); IdxT nnz = Jac->getNnz(); // Get reference to the jacobian entries - const IdxT* rptr = Jac->getRowPtr(); - const IdxT* cind = Jac->getColInd(); - const RealT* vals = Jac->getValues(); + const IdxT* row_ptrs = Jac->getRowPtr(); + const IdxT* colunm_indices = Jac->getColInd(); + const RealT* values = Jac->getValues(); // Copy data from model jac to sundials - std::copy(rptr, rptr + n + 1, sun_rptr); - std::copy(cind, cind + nnz, sun_cind); - std::copy(vals, vals + nnz, sun_vals); + std::copy(row_ptrs, row_ptrs + n + 1, sun_row_ptrs); + std::copy(colunm_indices, colunm_indices + nnz, sun_colunm_indices); + std::copy(values, values + nnz, sun_values); return 0; } diff --git a/examples/LinearAlgebra/SparseTest/SparseTest.cpp b/examples/LinearAlgebra/SparseTest/SparseTest.cpp index a5e31d8b0..444c93bd5 100644 --- a/examples/LinearAlgebra/SparseTest/SparseTest.cpp +++ b/examples/LinearAlgebra/SparseTest/SparseTest.cpp @@ -132,12 +132,12 @@ int main() std::vector csr_valtest = {2.6, 1.1, 2.9, 1.3, 1.4, 3.3}; std::vector maptest = {2, 5, 0, 0, 4, 2, 1, 5, 3}; - std::vector& map2csr = C.getMapToCsr(); + std::vector& map_to_csr = C.getMapToCsr(); assert(csr_rtest.size() == csr_r.size()); assert(csr_ctest.size() == csr_c.size()); assert(csr_valtest.size() == csr_v.size()); - assert(maptest.size() == map2csr.size()); + assert(maptest.size() == map_to_csr.size()); for (size_t i = 0; i < csr_rtest.size(); i++) { @@ -157,7 +157,7 @@ int main() for (size_t i = 0; i < maptest.size(); i++) { - if (map2csr[i] != maptest[i]) + if (map_to_csr[i] != maptest[i]) { failval--; } From b7bccb1dfbba98b707e958ee9232cef04d14b001 Mon Sep 17 00:00:00 2001 From: KakeruUeda Date: Mon, 16 Feb 2026 21:56:37 +0000 Subject: [PATCH 08/15] Apply pre-commmit fixes --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 16d792abb..6bf2bf84e 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -257,7 +257,7 @@ namespace GridKit { if (row_indices_[i] == row_indices_[w] && column_indices_[i] == column_indices_[w]) { - values_[w] += values_[i]; + values_[w] += values_[i]; map_to_dedup[i] = w; } else @@ -266,7 +266,7 @@ namespace GridKit row_indices_[w] = row_indices_[i]; column_indices_[w] = column_indices_[i]; values_[w] = values_[i]; - map_to_dedup[i] = w; + map_to_dedup[i] = w; row_ptrs[row_indices_[w] + 1]++; } } From 4d302837baddd43b29d272dbd6647fc63692d693 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Wed, 18 Feb 2026 15:35:52 -0500 Subject: [PATCH 09/15] Add CooMatrix class and use it for system COO Jacobian --- .../LinearAlgebra/SparseMatrix/CMakeLists.txt | 19 +- .../LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 182 +--- .../LinearAlgebra/SparseMatrix/CooMatrix.cpp | 606 +++++++++++++ .../LinearAlgebra/SparseMatrix/CooMatrix.hpp | 90 ++ .../LinearAlgebra/SparseMatrix/CsrMatrix.cpp | 825 ++++++++++++++++++ .../LinearAlgebra/SparseMatrix/CsrMatrix.hpp | 764 +--------------- GridKit/Model/Evaluator.hpp | 2 +- .../SystemModelPowerElectronics.hpp | 62 +- GridKit/Solver/Dynamic/CMakeLists.txt | 6 +- GridKit/Solver/Dynamic/Ida.cpp | 28 +- .../LinearAlgebra/SparseTest/CMakeLists.txt | 2 +- .../LinearAlgebra/SparseTest/SparseTest.cpp | 76 +- .../PowerElectronics/Microgrid/Microgrid.cpp | 6 + .../LinearAlgebra/SparseMatrix/CMakeLists.txt | 10 +- .../SparseMatrix/SparseCooTests.hpp | 178 ++++ .../SparseMatrix/runSparseCooTests.cpp | 37 + 16 files changed, 1853 insertions(+), 1040 deletions(-) create mode 100644 GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp create mode 100644 GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp create mode 100644 GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp create mode 100644 tests/UnitTests/LinearAlgebra/SparseMatrix/SparseCooTests.hpp create mode 100644 tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp diff --git a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt index 9d431895a..b5149d74e 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -1,13 +1,8 @@ -add_library(CsrMatrix INTERFACE) -target_include_directories(CsrMatrix INTERFACE - $ - $) - -add_library(GridKit::CsrMatrix ALIAS CsrMatrix) - -install( - FILES +gridkit_add_library(sparse + SOURCES + CooMatrix.cpp + CsrMatrix.cpp + HEADERS COO_Matrix.hpp - CsrMatrix.hpp - DESTINATION - include/GridKit/LinearAlgebra/SparseMatrix) + CooMatrix.hpp + CsrMatrix.hpp) \ No newline at end of file diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 6bf2bf84e..daa703bfb 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -36,7 +36,6 @@ namespace GridKit IdxT rows_size_; IdxT columns_size_; bool sorted_; - std::vector map_to_csr_; public: // Constructors @@ -56,12 +55,6 @@ namespace GridKit std::tuple, std::vector, std::vector> setDataToCSR(); std::vector getCSRRowData(); - /// Reinitialize COO matrix with new data - void resetEntries(std::vector r, std::vector c, std::vector v, IdxT m, IdxT n); - - /// Convert to CSR with deduplication, storing the COO-to-CSR mapping in map_to_csr_ - std::tuple, std::vector, std::vector> getCsrData(); - // Set values from vector storage. Will sort before storing void setValues(std::vector r, std::vector c, std::vector v); @@ -87,14 +80,12 @@ namespace GridKit IdxT nnz(); std::tuple getDimensions(); - std::vector& getMapToCsr(); void printMatrix(std::string name = ""); void printMatrixMarket(const std::string& filename, const std::string& comment); static void sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values); - static void sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values, std::vector& map); private: IdxT indexStartRow(const std::vector& rows, IdxT r); @@ -198,88 +189,6 @@ namespace GridKit return {row_size_vec, this->column_indices_, this->values_}; } - /** - * @brief Reinitialize the COO matrix with new data. - * - * @tparam RealT - Real type for Jacobian entries - * @tparam IdxT - Integer data type for matrix indices - * - * @param[in] r row indices - * @param[in] c column indices - * @param[in] v values - * @param[in] m number of rows - * @param[in] n number of columns - */ - template - void COO_Matrix::resetEntries(std::vector r, std::vector c, std::vector v, IdxT m, IdxT n) - { - this->values_ = v; - this->row_indices_ = r; - this->column_indices_ = c; - this->rows_size_ = m; - this->columns_size_ = n; - this->sorted_ = false; // Set to false until explicitly sorted, though logically it is sorted. - } - - /** - * @brief Convert to CSR format with deduplication and index mapping. - * - * 1. Sorts the COO entries - * 2. Deduplicates entries by summing their values - * 3. Builds CSR row pointers - * 4. Stores the mapping from original COO indices to deduplicated CSR value indices - * - * @tparam RealT - Real type for Jacobian entries - * @tparam IdxT - Integer data type for matrix indices - * - * @return std::tuple, std::vector, std::vector> - */ - template - std::tuple, std::vector, std::vector> COO_Matrix::getCsrData() - { - const IdxT nnz_dup = static_cast(row_indices_.size()); - - // Sort the entries while preserving the mapping - std::vector map_to_sorted(nnz_dup); - sortSparseCOO(row_indices_, column_indices_, values_, map_to_sorted); - - // Deduplicate entries by summing values - std::vector row_ptrs(rows_size_ + 1, 0); - std::vector map_to_dedup(nnz_dup); - - map_to_dedup[0] = 0; - row_ptrs[row_indices_[0] + 1]++; - - // Write position - IdxT w = 0; - - for (IdxT i = 1; i < nnz_dup; ++i) - { - if (row_indices_[i] == row_indices_[w] && column_indices_[i] == column_indices_[w]) - { - values_[w] += values_[i]; - map_to_dedup[i] = w; - } - else - { - ++w; - row_indices_[w] = row_indices_[i]; - column_indices_[w] = column_indices_[i]; - values_[w] = values_[i]; - map_to_dedup[i] = w; - row_ptrs[row_indices_[w] + 1]++; - } - } - IdxT nnz_dedup = w + 1; - - // Cumulative sum for row pointers - for (IdxT i = 0; i < rows_size_; ++i) - { - row_ptrs[i + 1] += row_ptrs[i]; - } - - // map_to_csr_: original COO index -> deduplicated CSR index - map_to_csr_.resize(nnz_dup); for (IdxT k = 0; k < nnz_dup; ++k) { map_to_csr_[map_to_sorted[k]] = map_to_dedup[k]; @@ -738,12 +647,6 @@ namespace GridKit return std::tuple(this->rows_size_, this->columns_size_); } - template - inline std::vector& COO_Matrix::getMapToCsr() - { - return map_to_csr_; - } - /** * @brief Print matrix in sorted order * @@ -989,94 +892,31 @@ namespace GridKit // index based sort code // https://stackoverflow.com/questions/25921706/creating-a-vector-of-indices-of-a-sorted_-vector // cannot call sort since two arrays are used instead - std::vector order_vec(rows.size()); + std::vector ordervec(rows.size()); std::size_t n(0); - std::generate(std::begin(order_vec), std::end(order_vec), [&] + std::generate(std::begin(ordervec), std::end(ordervec), [&] { return n++; }); // Sort by row first then column. - std::sort(std::begin(order_vec), - std::end(order_vec), + std::sort(std::begin(ordervec), + std::end(ordervec), [&](auto i1, auto i2) { return (rows[i1] < rows[i2]) || (rows[i1] == rows[i2] && columns[i1] < columns[i2]); }); // reorder based of index-sorting. Only swap cost no extra memory. // @todo see if extra memory creation is fine // https://stackoverflow.com/a/22183350 - for (size_t i = 0; i < order_vec.size(); i++) - { - // permutation swap - while (order_vec[i] != order_vec[order_vec[i]]) - { - std::swap(rows[order_vec[i]], rows[order_vec[order_vec[i]]]); - std::swap(columns[order_vec[i]], columns[order_vec[order_vec[i]]]); - std::swap(values[order_vec[i]], values[order_vec[order_vec[i]]]); - - // swap orderings - std::swap(order_vec[i], order_vec[order_vec[i]]); - } - } - } - - /** - * @brief Sorts unordered COO matrix with mapping - * - * Matrix entries can appear in arbitrary order and will be sorted in - * row-major order before the method returns. - * Duplicate entries are allowed here. - * - * The mapping records where each sorted entry came from in the original - * arrays: sorted[k] comes from original index map[k]. - * - * @pre rows, columns, and values are of the same size and represent a COO matrix - * @post Matrix entries are sorted in row-major order and map stores original indices - * - * @todo simple setup. Should add stable sorting since lists are pre-sorted_ - * - * @tparam RealT - Real type for Jacobian entries - * @tparam IdxT - Integer data type for matrix indices - * - * @param rows - * @param columns - * @param values - * @param map - */ - template - inline void COO_Matrix::sortSparseCOO(std::vector& rows, std::vector& columns, std::vector& values, std::vector& map) - { - - // index based sort code - // https://stackoverflow.com/questions/25921706/creating-a-vector-of-indices-of-a-sorted_-vector - // cannot call sort since two arrays are used instead - std::vector order_vec(rows.size()); - std::size_t n(0); - std::generate(std::begin(order_vec), std::end(order_vec), [&] - { return n++; }); - - // Sort by row first then column. - std::sort(std::begin(order_vec), - std::end(order_vec), - [&](auto i1, auto i2) - { return (rows[i1] < rows[i2]) || (rows[i1] == rows[i2] && columns[i1] < columns[i2]); }); - - // Preserve the mapping - map.resize(order_vec.size()); - std::copy(order_vec.begin(), order_vec.end(), map.begin()); - - // reorder based of index-sorting. Only swap cost no extra memory. - // @todo see if extra memory creation is fine - // https://stackoverflow.com/a/22183350 - for (size_t i = 0; i < order_vec.size(); i++) + for (size_t i = 0; i < ordervec.size(); i++) { // permutation swap - while (order_vec[i] != order_vec[order_vec[i]]) + while (ordervec[i] != ordervec[ordervec[i]]) { - std::swap(rows[order_vec[i]], rows[order_vec[order_vec[i]]]); - std::swap(columns[order_vec[i]], columns[order_vec[order_vec[i]]]); - std::swap(values[order_vec[i]], values[order_vec[order_vec[i]]]); + std::swap(rows[ordervec[i]], rows[ordervec[ordervec[i]]]); + std::swap(columns[ordervec[i]], columns[ordervec[ordervec[i]]]); + std::swap(values[ordervec[i]], values[ordervec[ordervec[i]]]); // swap orderings - std::swap(order_vec[i], order_vec[order_vec[i]]); + std::swap(ordervec[i], ordervec[ordervec[i]]); } } } @@ -1158,4 +998,4 @@ namespace GridKit { } } // namespace LinearAlgebra -} // namespace GridKit +} // namespace GridKit \ No newline at end of file diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp new file mode 100644 index 000000000..0a1a4d96f --- /dev/null +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp @@ -0,0 +1,606 @@ + +#include "CooMatrix.hpp" + +#include +#include +#include +#include + +namespace GridKit +{ + namespace LinearAlgebra + { + template + CooMatrix::CooMatrix() + { + } + + /** + * @brief basic constructor. It DOES NOT allocate any memory! + * + * @param[in] n - number of rows + * @param[in] m - number of columns + * @param[in] nnz_ - number of non-zeros + */ + template + CooMatrix::CooMatrix(IdxT n, + IdxT m, + IdxT nnz) + : n_{n}, + m_{m}, + nnz_{nnz} + { + setNotUpdated(); + + // set everything to nullptr + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + } + + /** + * @brief Hijacking constructor + * + * @param[in] n + * @param[in] m + * @param[in] nnz_ + * @param[in,out] rows + * @param[in,out] cols + * @param[in,out] vals + * @param[in] memspace_src + * @param[in] memspace_dst + */ + template + CooMatrix::CooMatrix(IdxT n, + IdxT m, + IdxT nnz_, + IdxT** rows, + IdxT** cols, + RealT** vals, + memory::MemorySpace memspace_src, + memory::MemorySpace memspace_dst) + : CooMatrix(n, m, nnz_) + { + int control = -1; + if ((memspace_src == memory::HOST) && (memspace_dst == memory::HOST)) + { + control = 0; + } + if ((memspace_src == memory::HOST) && (memspace_dst == memory::DEVICE)) + { + control = 1; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::HOST)) + { + control = 2; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::DEVICE)) + { + control = 3; + } + + switch (control) + { + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + // Set device data to null + if (d_row_data_ || d_col_data_ || d_val_data_) + { + std::cerr << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + d_data_updated_ = false; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + syncData(memspace_dst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + syncData(memspace_dst); + + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + // Set host data to null + if (h_row_data_ || h_col_data_ || h_val_data_) + { + std::cerr << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + h_data_updated_ = false; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + std::cerr << "CooMatrix constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; + } + } + + template + CooMatrix::~CooMatrix() + { + this->destroyMatrixData(memory::HOST); + this->destroyMatrixData(memory::DEVICE); + delete[] map_to_sorted_; + delete[] map_to_dedup_; + } + + /** + * @brief set the matrix update flags to false (for both HOST and DEVICE). + */ + template + void CooMatrix::setNotUpdated() + { + h_data_updated_ = false; + d_data_updated_ = false; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + template + IdxT CooMatrix::getNumRows() const + { + return this->n_; + } + + /** + * @brief get number of matrix columns + * + * @return number of matrix columns. + */ + template + IdxT CooMatrix::getNumColumns() const + { + return this->m_; + } + + /** + * @brief get number of non-zeros in the matrix. + * + * @return number of non-zeros. + */ + template + IdxT CooMatrix::getNnz() const + { + return this->nnz_; + } + + /** + * @brief Set the pointers for matrix row, column, value data. + * + * Useful if interfacing with other codes - this function only assigns + * pointers, but it does not allocate nor copy anything. The data ownership + * flags would be set to false (default). + * + * @param[in] row_data - pointer to row data (array of integers) + * @param[in] col_data - pointer to column data (array of integers) + * @param[in] val_data - pointer to value data (array of real numbers) + * @param[in] memspace - memory space (HOST or DEVICE) of incoming data + * + * @return 0 if successful, 1 if not. + */ + template + int CooMatrix::setDataPointers(IdxT* row_data, + IdxT* col_data, + RealT* val_data, + memory::MemorySpace memspace) + { + using namespace memory; + + setNotUpdated(); + + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_cpu_values_ && h_val_data_) + { + std::cerr << "Trying to set matrix host values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_row_data_ = row_data; + h_col_data_ = col_data; + h_val_data_ = val_data; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_gpu_values_ && d_val_data_) + { + std::cerr << "Trying to set matrix device values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_row_data_ = row_data; + d_col_data_ = col_data; + d_val_data_ = val_data; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + break; + } + return 0; + } + + /** + * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it + * (will attempt to destroy all three arrays). + * + * @param[in] memspace - memory space (HOST or DEVICE) of incoming data + * + * @return 0 if successful, -1 if not. + * + */ + template + int CooMatrix::destroyMatrixData(memory::MemorySpace memspace) + { + using namespace memory; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_) + { + delete[] h_row_data_; + delete[] h_col_data_; + h_row_data_ = nullptr; + h_col_data_ = nullptr; + } + if (owns_cpu_values_) + { + delete[] h_val_data_; + h_val_data_ = nullptr; + } + return 0; + case DEVICE: + if (owns_gpu_sparsity_pattern_) + { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + d_row_data_ = nullptr; + d_col_data_ = nullptr; + } + if (owns_gpu_values_) + { + mem_.deleteOnDevice(d_val_data_); + d_val_data_ = nullptr; + } + return 0; + default: + return -1; + } + } + + /** + * @brief Sort COO entries by (row, col), merge duplicate + * entries, and build CSR row pointers. + * + * @note This function operates on host (CPU) data only. + * + * @return Pointer to CSR row pointer array + */ + template + IdxT* CooMatrix::getCsrRowData() + { + if (!h_data_updated_) + { + std::cerr << "CooMatrix::getCsrRowData requires up-to-date host data, but host is out of date!\n"; + assert(h_data_updated_); + return nullptr; + } + + map_to_sorted_ = new IdxT[nnz_]; + + for (IdxT i = 0; i < nnz_; i++) + { + map_to_sorted_[i] = i; + } + + // Sort by (row, col) + std::sort(map_to_sorted_, map_to_sorted_ + nnz_, [this](IdxT a, IdxT b) + { if (h_row_data_[a] != h_row_data_[b]) return h_row_data_[a] < h_row_data_[b]; + return h_col_data_[a] < h_col_data_[b]; }); + + IdxT* row_data_tmp = new IdxT[nnz_]; + IdxT* col_data_tmp = new IdxT[nnz_]; + RealT* val_data_tmp = new RealT[nnz_]; + + for (IdxT i = 0; i < nnz_; i++) + { + row_data_tmp[i] = h_row_data_[map_to_sorted_[i]]; + col_data_tmp[i] = h_col_data_[map_to_sorted_[i]]; + val_data_tmp[i] = h_val_data_[map_to_sorted_[i]]; + } + for (IdxT i = 0; i < nnz_; i++) + { + h_row_data_[i] = row_data_tmp[i]; + h_col_data_[i] = col_data_tmp[i]; + h_val_data_[i] = val_data_tmp[i]; + } + delete[] row_data_tmp; + delete[] col_data_tmp; + delete[] val_data_tmp; + + map_to_dedup_ = new IdxT[nnz_]; + IdxT* csr_row_data = new IdxT[n_ + 1](); + + map_to_dedup_[0] = 0; + csr_row_data[h_row_data_[0] + 1]++; + + // Deduplicate sroted entries + IdxT w = 0; + for (IdxT i = 1; i < nnz_; i++) + { + if (h_row_data_[i] == h_row_data_[w] && h_col_data_[i] == h_col_data_[w]) + { + h_val_data_[w] += h_val_data_[i]; + map_to_dedup_[i] = w; + } + else + { + ++w; + h_row_data_[w] = h_row_data_[i]; + h_col_data_[w] = h_col_data_[i]; + h_val_data_[w] = h_val_data_[i]; + map_to_dedup_[i] = w; + csr_row_data[h_row_data_[w] + 1]++; + } + } + // nnz after deduplication + nnz_ = w + 1; + + for (IdxT i = 0; i < n_; i++) + { + csr_row_data[i + 1] += csr_row_data[i]; + } + + return csr_row_data; + } + + template + const IdxT* CooMatrix::getMapToSorted() const + { + return map_to_sorted_; + } + + template + const IdxT* CooMatrix::getMapToDeduplicated() const + { + return map_to_dedup_; + } + + template + IdxT* CooMatrix::getRowData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; + } + } + + template + IdxT* CooMatrix::getColData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; + } + } + + template + RealT* CooMatrix::getValues(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; + } + } + + /** + * @brief Sync data in memspace with the updated memory space. + * + * @param memspace - memory space to be synced up (HOST or DEVICE) + * @return int - 0 if successful, error code otherwise + * + * @pre The memory space other than `memspace` must be up-to-date. Otherwise, + * this function will return an error. + * + * @see CooMatrix::setUpdated + */ + template + int CooMatrix::syncData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + // check if we need to copy or not + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CooMatrix::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + std::cerr << "CooMatrix::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + std::cerr << "CooMatrix::syncData is trying to sync host with device, but device is out of date!\n" + << "See CooMatrix::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new IdxT[static_cast(n_ + 1)]; + h_col_data_ = new IdxT[static_cast(nnz_)]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new RealT[static_cast(nnz_)]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CooMatrix::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + std::cerr << "CooMatrix::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); + return 1; + } + if (!h_data_updated_) + { + std::cerr << "CooMatrix::syncData is trying to sync device with host, but host is out of date!\n" + << "See CooMatrix::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; + } // switch + } + + /** + * @brief Prints matrix data. + * + * @param out - Output stream where the matrix data is printed + */ + template + void CooMatrix::print(std::ostream& out, IdxT indexing_base) + { + out << std::scientific << std::setprecision(std::numeric_limits::digits10); + for (IdxT i = 0; i < n_; ++i) + { + for (IdxT j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) + { + out << i + indexing_base << " " + << h_col_data_[j] + indexing_base << " " + << h_val_data_[j] << "\n"; + } + } + } + + template class CooMatrix; + template class CooMatrix; + template class CooMatrix; + } // namespace LinearAlgebra +} // namespace GridKit \ No newline at end of file diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp new file mode 100644 index 000000000..fb65e376a --- /dev/null +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include + +#include + +namespace GridKit +{ + namespace LinearAlgebra + { + + template + class CooMatrix + { + public: + CooMatrix(); + + CooMatrix(IdxT n, IdxT m, IdxT nnz); + + CooMatrix(IdxT n, + IdxT m, + IdxT nnz, + IdxT** rows, + IdxT** cols, + RealT** vals, + memory::MemorySpace memspace_src = memory::HOST, + memory::MemorySpace memspace_dst = memory::HOST); + + ~CooMatrix(); + + // accessors + IdxT getNumRows() const; + IdxT getNumColumns() const; + IdxT getNnz() const; + + const IdxT* getMapToSorted() const; + const IdxT* getMapToDeduplicated() const; + + IdxT* getRowData(memory::MemorySpace memspace = memory::HOST); + IdxT* getColData(memory::MemorySpace memspace = memory::HOST); + RealT* getValues(memory::MemorySpace memspace = memory::HOST); + + int setDataPointers(IdxT* row_data, + IdxT* col_data, + RealT* val_data, + memory::MemorySpace memspace); + + int destroyMatrixData(memory::MemorySpace memspace); + + void print(std::ostream& file_out = std::cout, IdxT indexing_base = 0); + + int syncData(memory::MemorySpace memspace_out); + + IdxT* getCsrRowData(); + + private: + IdxT n_{0}; ///< number of rows + IdxT m_{0}; ///< number of columns + IdxT nnz_{0}; ///< number of non-zeros + + // host data + IdxT* h_row_data_{nullptr}; ///< row data (HOST) + IdxT* h_col_data_{nullptr}; ///< column data (HOST) + RealT* h_val_data_{nullptr}; ///< value data (HOST) + bool h_data_updated_{false}; ///< HOST update flag + + // gpu data + IdxT* d_row_data_{nullptr}; ///< row data (DEVICE) + IdxT* d_col_data_{nullptr}; ///< column data (DEVICE) + RealT* d_val_data_{nullptr}; ///< value data (DEVICE) + bool d_data_updated_{false}; ///< DEVICE update flag + + void setNotUpdated(); + + // Data ownership flags + bool owns_cpu_sparsity_pattern_{false}; ///< for row/col data + bool owns_cpu_values_{false}; ///< for nonzero values + + bool owns_gpu_sparsity_pattern_{false}; ///< for row/col data + bool owns_gpu_values_{false}; ///< for nonzero values + + IdxT* map_to_sorted_ = {nullptr}; ///< map from orginal to sorted + IdxT* map_to_dedup_ = {nullptr}; ///< map from sorted to deduplicated + + MemoryHandler mem_; ///< Device memory manager object + }; + } // namespace LinearAlgebra +} // namespace GridKit \ No newline at end of file diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp new file mode 100644 index 000000000..d798949ab --- /dev/null +++ b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp @@ -0,0 +1,825 @@ +#include "CsrMatrix.hpp" + +#include +#include +#include + +namespace GridKit +{ + namespace LinearAlgebra + { + template + CsrMatrix::CsrMatrix() + { + } + + /** + * @brief basic constructor. It DOES NOT allocate any memory! + * + * @param[in] n - number of rows + * @param[in] m - number of columns + * @param[in] nnz - number of non-zeros + */ + template + CsrMatrix::CsrMatrix(IdxT n, + IdxT m, + IdxT nnz) + : n_{n}, + m_{m}, + nnz_{nnz} + { + setNotUpdated(); + + // set everything to nullptr + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + } + + /** + * @brief Hijacking constructor + * + * @param[in] n + * @param[in] m + * @param[in] nnz + * @param[in,out] rows + * @param[in,out] cols + * @param[in,out] vals + * @param[in] memspace_src + * @param[in] memspace_dst + */ + template + CsrMatrix::CsrMatrix(IdxT n, + IdxT m, + IdxT nnz, + IdxT** rows, + IdxT** cols, + RealT** vals, + memory::MemorySpace memspace_src, + memory::MemorySpace memspace_dst) + : CsrMatrix(n, m, nnz) + { + int control = -1; + if ((memspace_src == memory::HOST) && (memspace_dst == memory::HOST)) + { + control = 0; + } + if ((memspace_src == memory::HOST) && (memspace_dst == memory::DEVICE)) + { + control = 1; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::HOST)) + { + control = 2; + } + if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::DEVICE)) + { + control = 3; + } + + switch (control) + { + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + // Set device data to null + if (d_row_data_ || d_col_data_ || d_val_data_) + { + std::cerr << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + d_data_updated_ = false; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + syncData(memspace_dst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + syncData(memspace_dst); + + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + // Set host data to null + if (h_row_data_ || h_col_data_ || h_val_data_) + { + std::cerr << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + h_data_updated_ = false; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + std::cerr << "CsrMatrix constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; + } + } + + template + CsrMatrix::~CsrMatrix() + { + this->destroyMatrixData(memory::HOST); + this->destroyMatrixData(memory::DEVICE); + } + + /** + * @brief set the matrix update flags to false (for both HOST and DEVICE). + */ + template + void CsrMatrix::setNotUpdated() + { + h_data_updated_ = false; + d_data_updated_ = false; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + template + IdxT CsrMatrix::getNumRows() + { + return this->n_; + } + + /** + * @brief get number of matrix columns + * + * @return number of matrix columns. + */ + template + IdxT CsrMatrix::getNumColumns() + { + return this->m_; + } + + /** + * @brief get number of non-zeros in the matrix. + * + * @return number of non-zeros. + */ + template + IdxT CsrMatrix::getNnz() + { + return this->nnz_; + } + + /** + * @brief Set number of non-zeros. + * + * @param[in] nnz_new - new number of non-zeros + */ + template + void CsrMatrix::setNnz(IdxT nnz_new) + { + this->nnz_ = nnz_new; + } + + /** + * @brief Tags `memspace` as updated. + * + * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" + * + * @return 0 if successful, -1 if not. + * + * The method sets the boolean flag indicating that the `memspace` is updated. + * It automatically sets the other data mirror to non-updated. You would + * use this function if you update matrix data by accessing its raw pointers. + * In such case, the matrix has no way of knowing which data is most recent, so + * you have to tell it. + * + * @warning This is an expert-level function. Use only if you know what you are + * doing. + * + * @note If you want to set both DEVICE and HOST memory to the same value + * use syncData function. + */ + template + int CsrMatrix::setUpdated(memory::MemorySpace memspace) + { + using namespace memory; + switch (memspace) + { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: + d_data_updated_ = true; + h_data_updated_ = false; + break; + } + return 0; + } + + /** + * @brief Set the pointers for matrix row, column, value data. + * + * Useful if interfacing with other codes - this function only assigns + * pointers, but it does not allocate nor copy anything. The data ownership + * flags would be set to false (default). + * + * @param[in] row_data - pointer to row data (array of integers) + * @param[in] col_data - pointer to column data (array of integers) + * @param[in] val_data - pointer to value data (array of real numbers) + * @param[in] memspace - memory space (HOST or DEVICE) of incoming data + * + * @return 0 if successful, 1 if not. + */ + template + int CsrMatrix::setDataPointers(IdxT* row_data, + IdxT* col_data, + RealT* val_data, + memory::MemorySpace memspace) + { + using namespace memory; + + setNotUpdated(); + + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_cpu_values_ && h_val_data_) + { + std::cerr << "Trying to set matrix host values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_row_data_ = row_data; + h_col_data_ = col_data; + h_val_data_ = val_data; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) + { + std::cerr << "Trying to set matrix host data, but the data already set!\n"; + std::cerr << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_gpu_values_ && d_val_data_) + { + std::cerr << "Trying to set matrix device values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_row_data_ = row_data; + d_col_data_ = col_data; + d_val_data_ = val_data; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + break; + } + return 0; + } + + /** + * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it + * (will attempt to destroy all three arrays). + * + * @param[in] memspace - memory space (HOST or DEVICE) of incoming data + * + * @return 0 if successful, -1 if not. + * + */ + template + int CsrMatrix::destroyMatrixData(memory::MemorySpace memspace) + { + using namespace memory; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_) + { + delete[] h_row_data_; + delete[] h_col_data_; + h_row_data_ = nullptr; + h_col_data_ = nullptr; + } + if (owns_cpu_values_) + { + delete[] h_val_data_; + h_val_data_ = nullptr; + } + return 0; + case DEVICE: + if (owns_gpu_sparsity_pattern_) + { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + d_row_data_ = nullptr; + d_col_data_ = nullptr; + } + if (owns_gpu_values_) + { + mem_.deleteOnDevice(d_val_data_); + d_val_data_ = nullptr; + } + return 0; + default: + return -1; + } + } + + /** + * @brief updata matrix values using the _new_values_ provided either as HOST or as DEVICE array. + * + * This function will copy the data (not just assign a pointer) and allocate if needed. + * It also sets ownership and update flags. + * + * @param[in] new_vals - pointer to new values data (array of real numbers) + * @param[in] memspace_in - memory space (HOST or DEVICE) of _new_vals_ + * @param[in] memspace_out - memory space (HOST or DEVICE) of matrix values to be updated. + * + * @return 0 if successful, -1 if not. + */ + template + int CsrMatrix::copyValues(const RealT* new_vals, + memory::MemorySpace memspace_in, + memory::MemorySpace memspace_out) + { + + IdxT nnz_current = nnz_; + // four cases (for now) + setNotUpdated(); + int control = -1; + if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) + { + control = 0; + } + if ((memspace_in == memory::HOST) && (memspace_out == memory::DEVICE)) + { + control = 1; + } + if ((memspace_in == memory::DEVICE) && (memspace_out == memory::HOST)) + { + control = 2; + } + if ((memspace_in == memory::DEVICE) && (memspace_out == memory::DEVICE)) + { + control = 3; + } + + if (memspace_out == memory::HOST) + { + // check if cpu data allocated + if (h_val_data_ == nullptr) + { + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + owns_cpu_values_ = true; + } + } + + if (memspace_out == memory::DEVICE) + { + // check if cuda data allocated + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_values_ = true; + } + } + + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 2: // cuda->cpu + mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->cuda + mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + case 3: // cuda->cuda + mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; + } + return 0; + } + + /** + * @brief updata matrix values using the _new_values_ provided either as + * HOST or as DEVICE array. + * + * This function only assigns a pointer, but does not copy. It sets update + * flags. + * + * @param[in] new_vals - pointer to new values data (array of real numbers) + * @param[in] memspace - memory space (HOST or DEVICE) of _new_vals_ + * + * @return 0 if successful, -1 if not. + */ + template + int CsrMatrix::setValuesPointer(RealT* new_vals, + memory::MemorySpace memspace) + { + using namespace memory; + setNotUpdated(); + + switch (memspace) + { + case HOST: + if (owns_cpu_values_ && h_val_data_) + { + std::cerr << "Trying to set matrix host values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_val_data_ = new_vals; + h_data_updated_ = true; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_values_ && d_val_data_) + { + std::cerr << "Trying to set matrix device values, but the values already set!\n"; + std::cerr << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_val_data_ = new_vals; + d_data_updated_ = true; + owns_gpu_values_ = false; + break; + default: + return -1; + } + return 0; + } + + template + IdxT* CsrMatrix::getRowData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; + } + } + + template + IdxT* CsrMatrix::getColData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; + } + } + + template + RealT* CsrMatrix::getValues(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; + } + } + + template + int CsrMatrix::copyDataFrom(const IdxT* row_data, + const IdxT* col_data, + const RealT* val_data, + memory::MemorySpace memspace_in, + memory::MemorySpace memspace_out) + { + // four cases (for now) + IdxT nnz_current = nnz_; + setNotUpdated(); + int control = -1; + if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) + { + control = 0; + } + if ((memspace_in == memory::HOST) && ((memspace_out == memory::DEVICE))) + { + control = 1; + } + if (((memspace_in == memory::DEVICE)) && (memspace_out == memory::HOST)) + { + control = 2; + } + if (((memspace_in == memory::DEVICE)) && ((memspace_out == memory::DEVICE))) + { + control = 3; + } + + if (memspace_out == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; + this->h_col_data_ = new IdxT[static_cast(nnz_current)]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + owns_cpu_values_ = true; + } + } + + if (memspace_out == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of device row or column data is null!\n"); + + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + owns_gpu_values_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_sparsity_pattern_ = true; + } + } + + // copy + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; + } + return 0; + } + + template + int CsrMatrix::copyDataFrom(const IdxT* row_data, + const IdxT* col_data, + const RealT* val_data, + IdxT new_nnz, + memory::MemorySpace memspace_in, + memory::MemorySpace memspace_out) + { + destroyMatrixData(memspace_out); + nnz_ = new_nnz; + return copyDataFrom(row_data, col_data, val_data, memspace_in, memspace_out); + } + + template + int CsrMatrix::allocateMatrixData(memory::MemorySpace memspace) + { + IdxT nnz_current = nnz_; + destroyMatrixData(memspace); // just in case + + if (memspace == memory::HOST) + { + this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; + std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); + this->h_col_data_ = new IdxT[static_cast(nnz_current)]; + std::fill(h_col_data_, h_col_data_ + nnz_current, 0); + this->h_val_data_ = new RealT[static_cast(nnz_current)]; + std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + return 0; + } + + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + return 0; + } + return -1; + } + + /** + * @brief Sync data in memspace with the updated memory space. + * + * @param memspace - memory space to be synced up (HOST or DEVICE) + * @return int - 0 if successful, error code otherwise + * + * @pre The memory space other than `memspace` must be up-to-date. Otherwise, + * this function will return an error. + * + * @see CsrMatrix::setUpdated + */ + template + int CsrMatrix::syncData(memory::MemorySpace memspace) + { + using namespace memory; + + switch (memspace) + { + case HOST: + // check if we need to copy or not + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync host with device, but device is out of date!\n" + << "See CsrMatrix::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new IdxT[static_cast(n_ + 1)]; + h_col_data_ = new IdxT[static_cast(nnz_)]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new RealT[static_cast(nnz_)]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); + return 1; + } + if (!h_data_updated_) + { + std::cerr << "CsrMatrix::syncData is trying to sync device with host, but host is out of date!\n" + << "See CsrMatrix::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; + } // switch + } + + /** + * @brief Prints matrix data. + * + * @param out - Output stream where the matrix data is printed + */ + template + void CsrMatrix::print(std::ostream& out, IdxT indexing_base) + { + out << std::scientific << std::setprecision(std::numeric_limits::digits10); + for (IdxT i = 0; i < n_; ++i) + { + for (IdxT j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) + { + out << i + indexing_base << " " + << h_col_data_[j] + indexing_base << " " + << h_val_data_[j] << "\n"; + } + } + } + + template class CsrMatrix; + template class CsrMatrix; + template class CsrMatrix; + } // namespace LinearAlgebra +} // namespace GridKit \ No newline at end of file diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp index 04a808b25..909b09158 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp @@ -1,11 +1,8 @@ #pragma once -#include #include #include -#include #include -#include #include @@ -18,51 +15,10 @@ namespace GridKit class CsrMatrix { public: - CsrMatrix() - { - } + CsrMatrix(); - /** - * @brief basic constructor. It DOES NOT allocate any memory! - * - * @param[in] n - number of rows - * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros - */ - CsrMatrix(IdxT n, IdxT m, IdxT nnz) - : n_{n}, - m_{m}, - nnz_{nnz} - { - setNotUpdated(); + CsrMatrix(IdxT n, IdxT m, IdxT nnz); - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - } - - /** - * @brief Hijacking constructor - * - * @param[in] n - * @param[in] m - * @param[in] nnz - * @param[in,out] rows - * @param[in,out] cols - * @param[in,out] vals - * @param[in] memspace_src - * @param[in] memspace_dst - */ CsrMatrix(IdxT n, IdxT m, IdxT nnz, @@ -70,719 +26,55 @@ namespace GridKit IdxT** cols, RealT** vals, memory::MemorySpace memspace_src = memory::HOST, - memory::MemorySpace memspace_dst = memory::HOST) - : CsrMatrix(n, m, nnz) - { - int control = -1; - if ((memspace_src == memory::HOST) && (memspace_dst == memory::HOST)) - { - control = 0; - } - if ((memspace_src == memory::HOST) && (memspace_dst == memory::DEVICE)) - { - control = 1; - } - if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::HOST)) - { - control = 2; - } - if ((memspace_src == memory::DEVICE) && (memspace_dst == memory::DEVICE)) - { - control = 3; - } - - switch (control) - { - case 0: // cpu->cpu - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - if (d_row_data_ || d_col_data_ || d_val_data_) - { - std::cerr << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - d_data_updated_ = false; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - syncData(memspace_dst); - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - syncData(memspace_dst); - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - if (h_row_data_ || h_col_data_ || h_val_data_) - { - std::cerr << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - h_data_updated_ = false; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - std::cerr << "CsrMatrix constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; - } - } + memory::MemorySpace memspace_dst = memory::HOST); - ~CsrMatrix() - { - this->destroyMatrixData(memory::HOST); - this->destroyMatrixData(memory::DEVICE); - } + ~CsrMatrix(); // accessors + IdxT getNumRows(); + IdxT getNumColumns(); + IdxT getNnz(); - IdxT getNumRows() const - { - return n_; - } - - IdxT getNumColumns() const - { - return m_; - } - - IdxT getNnz() const - { - return nnz_; - } - - IdxT getRows() const - { - return n_; - } - - IdxT getCols() const - { - return m_; - } + void setNnz(IdxT nnz_new); // for resetting when removing duplicates + int setUpdated(memory::MemorySpace what); - const IdxT* getRowPtr() const - { - return h_row_data_; - } - - const IdxT* getColInd() const - { - return h_col_data_; - } - - const RealT* getValues() const - { - return h_val_data_; - } - - void setNnz(IdxT nnz_new) - { - nnz_ = nnz_new; - } - - /** - * @brief Tags `memspace` as updated. - * - * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" - * - * @return 0 if successful, -1 if not. - * - * @warning This is an expert-level function. Use only if you know what you - * are doing. - */ - int setUpdated(memory::MemorySpace memspace) - { - using namespace memory; - switch (memspace) - { - case HOST: - h_data_updated_ = true; - d_data_updated_ = false; - break; - case DEVICE: - d_data_updated_ = true; - h_data_updated_ = false; - break; - } - return 0; - } - - IdxT* getRowData(memory::MemorySpace memspace = memory::HOST) - { - using namespace memory; - switch (memspace) - { - case HOST: - return h_row_data_; - case DEVICE: - return d_row_data_; - default: - return nullptr; - } - } - - IdxT* getColData(memory::MemorySpace memspace = memory::HOST) - { - using namespace memory; - switch (memspace) - { - case HOST: - return h_col_data_; - case DEVICE: - return d_col_data_; - default: - return nullptr; - } - } - - RealT* getValues(memory::MemorySpace memspace = memory::HOST) - { - using namespace memory; - switch (memspace) - { - case HOST: - return h_val_data_; - case DEVICE: - return d_val_data_; - default: - return nullptr; - } - } + IdxT* getRowData(memory::MemorySpace memspace = memory::HOST); + IdxT* getColData(memory::MemorySpace memspace = memory::HOST); + RealT* getValues(memory::MemorySpace memspace = memory::HOST); int copyDataFrom(const IdxT* row_data, const IdxT* col_data, const RealT* val_data, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - IdxT nnz_current = nnz_; - setNotUpdated(); - int control = -1; - if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) - { - control = 0; - } - if ((memspace_in == memory::HOST) && ((memspace_out == memory::DEVICE))) - { - control = 1; - } - if (((memspace_in == memory::DEVICE)) && (memspace_out == memory::HOST)) - { - control = 2; - } - if (((memspace_in == memory::DEVICE)) && ((memspace_out == memory::DEVICE))) - { - control = 3; - } - - if (memspace_out == memory::HOST) - { - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; - this->h_col_data_ = new IdxT[static_cast(nnz_current)]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - owns_cpu_values_ = true; - } - } - - if (memspace_out == memory::DEVICE) - { - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::copyDataFrom one of device row or column data is null!\n"); - - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - owns_gpu_values_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - } - } - - switch (control) - { - case 0: // cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2: // gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1: // cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3: // gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; - } - return 0; - } - + memory::MemorySpace memspace_out); int copyDataFrom(const IdxT* row_data, const IdxT* col_data, const RealT* val_data, IdxT new_nnz, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - destroyMatrixData(memspace_out); - nnz_ = new_nnz; - return copyDataFrom(row_data, col_data, val_data, memspace_in, memspace_out); - } - - /** - * @brief Build CSR from sorted/deduplicated COO entries. - * - * Copies the provided row pointers, column indices, and values - * into the host arrays of this matrix. Allocates host data if needed. - * - * @param[in] row_data - Row pointers (size n_ + 1) - * @param[in] col_data - Column indices (size nnz_) - * @param[in] val_data - Values (size nnz_) - */ - void addEntries(const IdxT* row_data, const IdxT* col_data, const RealT* val_data) - { - for (IdxT i = 0; i < n_ + 1; ++i) - { - h_row_data_[i] = row_data[i]; - } - for (IdxT k = 0; k < nnz_; ++k) - { - h_col_data_[k] = col_data[k]; - h_val_data_[k] = val_data[k]; - } - h_data_updated_ = true; - d_data_updated_ = false; - } + memory::MemorySpace memspace_out); - int allocateMatrixData(memory::MemorySpace memspace) - { - IdxT nnz_current = nnz_; - destroyMatrixData(memspace); - - if (memspace == memory::HOST) - { - this->h_row_data_ = new IdxT[static_cast(n_ + 1)]; - std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); - this->h_col_data_ = new IdxT[static_cast(nnz_current)]; - std::fill(h_col_data_, h_col_data_ + nnz_current, 0); - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - return 0; - } - - if (memspace == memory::DEVICE) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - return 0; - } - return -1; - } - - /** - * @brief Set the pointers for matrix row, column, value data. - * - * Useful if interfacing with other codes - this function only assigns - * pointers, but it does not allocate nor copy anything. - * - * @param[in] row_data - pointer to row data - * @param[in] col_data - pointer to column data - * @param[in] val_data - pointer to value data - * @param[in] memspace - memory space (HOST or DEVICE) of incoming data - * - * @return 0 if successful, 1 if not. - */ + int allocateMatrixData(memory::MemorySpace memspace); int setDataPointers(IdxT* row_data, IdxT* col_data, RealT* val_data, - memory::MemorySpace memspace) - { - using namespace memory; - - setNotUpdated(); + memory::MemorySpace memspace); - switch (memspace) - { - case HOST: - if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) - { - std::cerr << "Trying to set matrix host data, but the data already set!\n"; - std::cerr << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_cpu_values_ && h_val_data_) - { - std::cerr << "Trying to set matrix host values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_row_data_ = row_data; - h_col_data_ = col_data; - h_val_data_ = val_data; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) - { - std::cerr << "Trying to set matrix host data, but the data already set!\n"; - std::cerr << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_gpu_values_ && d_val_data_) - { - std::cerr << "Trying to set matrix device values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_row_data_ = row_data; - d_col_data_ = col_data; - d_val_data_ = val_data; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - break; - } - return 0; - } + int destroyMatrixData(memory::MemorySpace memspace); - /** - * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it. - * - * @param[in] memspace - memory space (HOST or DEVICE) - * - * @return 0 if successful, -1 if not. - */ - int destroyMatrixData(memory::MemorySpace memspace) - { - using namespace memory; - switch (memspace) - { - case HOST: - if (owns_cpu_sparsity_pattern_) - { - delete[] h_row_data_; - delete[] h_col_data_; - h_row_data_ = nullptr; - h_col_data_ = nullptr; - } - if (owns_cpu_values_) - { - delete[] h_val_data_; - h_val_data_ = nullptr; - } - return 0; - case DEVICE: - if (owns_gpu_sparsity_pattern_) - { - mem_.deleteOnDevice(d_row_data_); - mem_.deleteOnDevice(d_col_data_); - d_row_data_ = nullptr; - d_col_data_ = nullptr; - } - if (owns_gpu_values_) - { - mem_.deleteOnDevice(d_val_data_); - d_val_data_ = nullptr; - } - return 0; - default: - return -1; - } - } + void print(std::ostream& file_out = std::cout, IdxT indexing_base = 0); - /** - * @brief Prints matrix data. - * - * @param out - Output stream where the matrix data is printed - * @param indexing_base - base index for printing (default 0) - */ - void print(std::ostream& out = std::cout, IdxT indexing_base = 0) - { - out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for (IdxT i = 0; i < n_; ++i) - { - for (IdxT j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) - { - out << i + indexing_base << " " - << h_col_data_[j] + indexing_base << " " - << h_val_data_[j] << "\n"; - } - } - } + int syncData(memory::MemorySpace memspace_out); - /** - * @brief Sync data in memspace with the updated memory space. - * - * @param memspace - memory space to be synced up (HOST or DEVICE) - * @return int - 0 if successful, error code otherwise - * - * @pre The memory space other than `memspace` must be up-to-date. - */ - int syncData(memory::MemorySpace memspace) - { - using namespace memory; - - switch (memspace) - { - case HOST: - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In CsrMatrix::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync host with device, but device is out of date!\n" - << "See CsrMatrix::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - h_row_data_ = new IdxT[static_cast(n_ + 1)]; - h_col_data_ = new IdxT[static_cast(nnz_)]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - h_val_data_ = new RealT[static_cast(nnz_)]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In CsrMatrix::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) - { - std::cerr << "CsrMatrix::syncData is trying to sync device with host, but host is out of date!\n" - << "See CsrMatrix::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: - return 1; - } - } - - /** - * @brief Update matrix values by copying from new_vals. - * - * Allocates if needed. Sets ownership and update flags. - * - * @param[in] new_vals - pointer to new values data - * @param[in] memspace_in - memory space of new_vals - * @param[in] memspace_out - memory space of matrix values to update - * - * @return 0 if successful, -1 if not. - */ + // update Values just updates values; it allocates if necessary. + // values have the same dimensions between different formats int copyValues(const RealT* new_vals, memory::MemorySpace memspace_in, - memory::MemorySpace memspace_out) - { - IdxT nnz_current = nnz_; - setNotUpdated(); - int control = -1; - if ((memspace_in == memory::HOST) && (memspace_out == memory::HOST)) - { - control = 0; - } - if ((memspace_in == memory::HOST) && (memspace_out == memory::DEVICE)) - { - control = 1; - } - if ((memspace_in == memory::DEVICE) && (memspace_out == memory::HOST)) - { - control = 2; - } - if ((memspace_in == memory::DEVICE) && (memspace_out == memory::DEVICE)) - { - control = 3; - } + memory::MemorySpace memspace_out); - if (memspace_out == memory::HOST) - { - if (h_val_data_ == nullptr) - { - this->h_val_data_ = new RealT[static_cast(nnz_current)]; - owns_cpu_values_ = true; - } - } - - if (memspace_out == memory::DEVICE) - { - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_values_ = true; - } - } - - switch (control) - { - case 0: // cpu->cpu - mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 2: // cuda->cpu - mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 1: // cpu->cuda - mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - case 3: // cuda->cuda - mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; - } - return 0; - } - - /** - * @brief Set values pointer (does not copy). Use caution. - * - * @param[in] new_vals - pointer to new values data - * @param[in] memspace - memory space (HOST or DEVICE) of new_vals - * - * @return 0 if successful, -1 if not. - */ + // set new values just sets the pointer, use caution. int setValuesPointer(RealT* new_vals, - memory::MemorySpace memspace) - { - using namespace memory; - setNotUpdated(); - - switch (memspace) - { - case HOST: - if (owns_cpu_values_ && h_val_data_) - { - std::cerr << "Trying to set matrix host values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_val_data_ = new_vals; - h_data_updated_ = true; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_values_ && d_val_data_) - { - std::cerr << "Trying to set matrix device values, but the values already set!\n"; - std::cerr << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_val_data_ = new_vals; - d_data_updated_ = true; - owns_gpu_values_ = false; - break; - default: - return -1; - } - return 0; - } + memory::MemorySpace memspace); private: IdxT n_{0}; ///< number of rows @@ -801,11 +93,7 @@ namespace GridKit RealT* d_val_data_{nullptr}; ///< value data (DEVICE) bool d_data_updated_{false}; ///< DEVICE update flag - void setNotUpdated() - { - h_data_updated_ = false; - d_data_updated_ = false; - } + void setNotUpdated(); // Data ownership flags bool owns_cpu_sparsity_pattern_{false}; ///< for row/col data @@ -817,4 +105,4 @@ namespace GridKit MemoryHandler mem_; ///< Device memory manager object }; } // namespace LinearAlgebra -} // namespace GridKit +} // namespace GridKit \ No newline at end of file diff --git a/GridKit/Model/Evaluator.hpp b/GridKit/Model/Evaluator.hpp index 1a60dbe84..7bd55cd5d 100644 --- a/GridKit/Model/Evaluator.hpp +++ b/GridKit/Model/Evaluator.hpp @@ -84,7 +84,7 @@ namespace GridKit { } - virtual const CsrMatrix* getCsrJacobian() const + virtual CsrMatrix* getCsrJacobian() const { return nullptr; } diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index 3d16f7d20..425821890 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -133,6 +134,7 @@ namespace GridKit delete comp; } delete csr_jac_; + delete[] map_to_csr_; } /** @@ -176,8 +178,8 @@ namespace GridKit * @param[in] s size of the vector (total number of unknowns) * * @post System model vectors allocated with size s - * @post CSR Jacobian sparsity pattern computed - * @post COO->CSR mapping computed + * @post CSR Jacobian sparsity pattern is computed + * @post COO->CSR mapping is computed * * @return int 0 if successful, positive if there's a recoverable error, negative if unrecoverable */ @@ -218,10 +220,10 @@ namespace GridKit } } - // Allocate COO entries - std::vector rows(static_cast(nnz_dup)); - std::vector cols(static_cast(nnz_dup)); - std::vector vals(static_cast(nnz_dup)); + // Allocate COO triplet arrays (we own these until we hand off to CsrMatrix) + IdxT* rows_dup = new IdxT[nnz_dup]; + IdxT* cols_dup = new IdxT[nnz_dup]; + RealT* vals_dup = new RealT[nnz_dup]; IdxT counter = 0; for (const auto& component : components_) @@ -233,27 +235,42 @@ namespace GridKit { if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) { - rows[counter] = component->getNodeConnection(r[i]); - cols[counter] = component->getNodeConnection(c[i]); - vals[counter] = v[i]; + rows_dup[counter] = component->getNodeConnection(r[i]); + cols_dup[counter] = component->getNodeConnection(c[i]); + vals_dup[counter] = v[i]; counter++; } } } - // Build a COO from the collected entries - jac_.resetEntries(rows, cols, vals, size_, size_); + // Build the system COO Jacobian + LinearAlgebra::CooMatrix jac(size_, size_, nnz_dup, &rows_dup, &cols_dup, &vals_dup); - // Extract CSR data - std::tuple, std::vector, std::vector> csrdata = jac_.getCsrData(); - const auto& [p, c, v] = csrdata; + // Populate CSR data with sort and deduplicate + IdxT* row_ptrs = jac.getCsrRowData(); - nnz_ = static_cast(v.size()); + // Deduplicated nnz + nnz_ = jac.getNnz(); - // Instantiate CSR Jacobian - csr_jac_ = new CsrMatrix(size_, size_, nnz_); - csr_jac_->allocateMatrixData(GridKit::LinearAlgebra::memory::HOST); - csr_jac_->addEntries(p.data(), c.data(), v.data()); + // Allocate cols/vals with deduplicated nnz + IdxT* cols = new IdxT[nnz_]; + RealT* vals = new RealT[nnz_]; + + std::copy(jac.getColData(), jac.getColData() + nnz_, cols); + std::copy(jac.getValues(), jac.getValues() + nnz_, vals); + + // Create the CSR Jacobian + csr_jac_ = new CsrMatrix(size_, size_, nnz_, &row_ptrs, &cols, &vals); + + const IdxT* map_to_sorted = jac.getMapToSorted(); + const IdxT* map_to_dedup = jac.getMapToDeduplicated(); + + // Build a mappping from original COO index to CSR index + map_to_csr_ = new IdxT[nnz_dup]; + for (IdxT i = 0; i < nnz_dup; ++i) + { + map_to_csr_[map_to_sorted[i]] = map_to_dedup[i]; + } return 0; } @@ -370,8 +387,6 @@ namespace GridKit vals[i] = 0.0; } - std::vector& map_to_csr = jac_.getMapToCsr(); - // Update CSR values from component Jacobians IdxT counter = 0; for (const auto& component : components_) @@ -385,7 +400,7 @@ namespace GridKit { if (component->getNodeConnection(r[i]) != neg1_ && component->getNodeConnection(c[i]) != neg1_) { - vals[map_to_csr[counter]] += v[i]; + vals[map_to_csr_[counter]] += v[i]; ++counter; } } @@ -472,7 +487,7 @@ namespace GridKit jac_.printMatrixMarket(filename, title); } - const CsrMatrix* getCsrJacobian() const override + CsrMatrix* getCsrJacobian() const override { return csr_jac_; } @@ -487,6 +502,7 @@ namespace GridKit std::vector components_; + IdxT* map_to_csr_{nullptr}; CsrMatrix* csr_jac_{nullptr}; int jac_call_count_{0}; diff --git a/GridKit/Solver/Dynamic/CMakeLists.txt b/GridKit/Solver/Dynamic/CMakeLists.txt index 0cceed1fd..bc114afd6 100644 --- a/GridKit/Solver/Dynamic/CMakeLists.txt +++ b/GridKit/Solver/Dynamic/CMakeLists.txt @@ -20,7 +20,8 @@ if (GRIDKIT_ENABLE_SUNDIALS_SPARSE) PUBLIC SUNDIALS::idas PUBLIC SUNDIALS::sunlinsolklu PUBLIC GridKit::definitions - PUBLIC GridKit::utilities_logger) + PUBLIC GridKit::utilities_logger + PUBLIC GridKit::sparse) else() gridkit_add_library(solvers_dyn SOURCES @@ -31,5 +32,6 @@ else() PUBLIC SUNDIALS::nvecserial PUBLIC SUNDIALS::idas PUBLIC GridKit::definitions - PUBLIC GridKit::utilities_logger) + PUBLIC GridKit::utilities_logger + PUBLIC GridKit::sparse) endif() diff --git a/GridKit/Solver/Dynamic/Ida.cpp b/GridKit/Solver/Dynamic/Ida.cpp index 22e3560a8..cac99195c 100644 --- a/GridKit/Solver/Dynamic/Ida.cpp +++ b/GridKit/Solver/Dynamic/Ida.cpp @@ -172,9 +172,9 @@ namespace AnalysisManager sunindextype nnz; /// @todo Remove this flag and require all apps to configure CsrJac - bool useCsrJac = (model_->getCsrJacobian() != nullptr); + bool use_csr_jac = (model_->getCsrJacobian() != nullptr); - if (useCsrJac) + if (use_csr_jac) { nnz = static_cast(model_->getCsrJacobian()->getNnz()); } @@ -196,7 +196,7 @@ namespace AnalysisManager retval = IDASetLinearSolver(solver_, linearSolver_, JacobianMat_); checkOutput(retval, "IDASetLinearSolver"); - if (useCsrJac) + if (use_csr_jac) { retval = IDASetJacFn(solver_, this->CsrJac); } @@ -844,27 +844,27 @@ namespace AnalysisManager model->evaluateJacobian(); - using CsrMatrix = GridKit::LinearAlgebra::CsrMatrix; - const CsrMatrix* Jac = model->getCsrJacobian(); + using CsrMatrix = GridKit::LinearAlgebra::CsrMatrix; + CsrMatrix* Jac = model->getCsrJacobian(); SUNMatZero(J); - sunindextype* sun_row_ptrs = SUNSparseMatrix_IndexPointers(J); - sunindextype* sun_colunm_indices = SUNSparseMatrix_IndexValues(J); - RealT* sun_values = SUNSparseMatrix_Data(J); + sunindextype* sun_row_ptrs = SUNSparseMatrix_IndexPointers(J); + sunindextype* sun_cols = SUNSparseMatrix_IndexValues(J); + RealT* sun_vals = SUNSparseMatrix_Data(J); - IdxT n = Jac->getRows(); + IdxT n = Jac->getNumRows(); IdxT nnz = Jac->getNnz(); // Get reference to the jacobian entries - const IdxT* row_ptrs = Jac->getRowPtr(); - const IdxT* colunm_indices = Jac->getColInd(); - const RealT* values = Jac->getValues(); + IdxT* row_ptrs = Jac->getRowData(); + IdxT* cols = Jac->getColData(); + RealT* vals = Jac->getValues(); // Copy data from model jac to sundials std::copy(row_ptrs, row_ptrs + n + 1, sun_row_ptrs); - std::copy(colunm_indices, colunm_indices + nnz, sun_colunm_indices); - std::copy(values, values + nnz, sun_values); + std::copy(cols, cols + nnz, sun_cols); + std::copy(vals, vals + nnz, sun_vals); return 0; } diff --git a/examples/LinearAlgebra/SparseTest/CMakeLists.txt b/examples/LinearAlgebra/SparseTest/CMakeLists.txt index 3ef32ea66..99e82d729 100644 --- a/examples/LinearAlgebra/SparseTest/CMakeLists.txt +++ b/examples/LinearAlgebra/SparseTest/CMakeLists.txt @@ -1,7 +1,7 @@ add_executable(spmattest SparseTest.cpp) -target_link_libraries(spmattest GridKit::CsrMatrix) +target_link_libraries(spmattest GridKit::sparse) add_test(NAME SparseMatrixTest COMMAND spmattest) install(TARGETS spmattest RUNTIME DESTINATION bin) diff --git a/examples/LinearAlgebra/SparseTest/SparseTest.cpp b/examples/LinearAlgebra/SparseTest/SparseTest.cpp index 444c93bd5..28f72c495 100644 --- a/examples/LinearAlgebra/SparseTest/SparseTest.cpp +++ b/examples/LinearAlgebra/SparseTest/SparseTest.cpp @@ -98,79 +98,5 @@ int main() std::cout << "Failed!" << std::endl; } - std::vector v3 = {1.2, 1.8, 1.0, 1.6, 1.4, 1.7, 1.1, 1.5, 1.3}; - std::vector x3 = {1, 2, 0, 0, 2, 1, 0, 2, 1}; - std::vector y3 = {1, 2, 0, 0, 0, 1, 1, 2, 2}; - size_t m3 = 3; - size_t n3 = 3; - - GridKit::LinearAlgebra::COO_Matrix C; - C.resetEntries(x3, y3, v3, m3, n3); - - std::vector csr_r; - std::vector csr_c; - std::vector csr_v; - std::tie(csr_r, csr_c, csr_v) = C.getCsrData(); - - std::cout << "C after getCsrData:\n"; - C.printMatrix(); - - for (size_t i = 0; i < csr_r.size() - 1; i++) - { - std::cout << csr_r[i] << std::endl; - size_t rdiff = csr_r[i + 1] - csr_r[i]; - for (size_t j = 0; j < rdiff; j++) - { - std::cout << csr_c[j + csr_r[i]] << ", " << csr_v[j + csr_r[i]] << std::endl; - } - } - std::cout << csr_r[csr_r.size() - 1] << std::endl; - - // Basic Verification test - std::vector csr_rtest = {0, 2, 4, 6}; - std::vector csr_ctest = {0, 1, 1, 2, 0, 2}; - std::vector csr_valtest = {2.6, 1.1, 2.9, 1.3, 1.4, 3.3}; - std::vector maptest = {2, 5, 0, 0, 4, 2, 1, 5, 3}; - - std::vector& map_to_csr = C.getMapToCsr(); - - assert(csr_rtest.size() == csr_r.size()); - assert(csr_ctest.size() == csr_c.size()); - assert(csr_valtest.size() == csr_v.size()); - assert(maptest.size() == map_to_csr.size()); - - for (size_t i = 0; i < csr_rtest.size(); i++) - { - if (csr_r[i] != csr_rtest[i]) - { - failval--; - } - } - for (size_t i = 0; i < csr_ctest.size(); i++) - { - double vdiff = csr_v[i] - csr_valtest[i]; - if (csr_c[i] != csr_ctest[i] || -1e-14 > vdiff || vdiff > 1e-14) - { - failval--; - } - } - - for (size_t i = 0; i < maptest.size(); i++) - { - if (map_to_csr[i] != maptest[i]) - { - failval--; - } - } - - if (failval == 0) - { - std::cout << "Success!" << std::endl; - } - else - { - std::cout << "Failed!" << std::endl; - } - return failval; -} +} \ No newline at end of file diff --git a/examples/PowerElectronics/Microgrid/Microgrid.cpp b/examples/PowerElectronics/Microgrid/Microgrid.cpp index fc2f379ef..1604f0d6b 100644 --- a/examples/PowerElectronics/Microgrid/Microgrid.cpp +++ b/examples/PowerElectronics/Microgrid/Microgrid.cpp @@ -1,6 +1,7 @@ #define _USE_MATH_DEFINES +#include #include #include #include @@ -340,7 +341,12 @@ int main(int /* argc */, char const** /* argv */) idas->getDefaultInitialCondition(); idas->initializeSimulation(t_init); + auto t_start = std::chrono::high_resolution_clock::now(); idas->runSimulation(t_final); + auto t_end = std::chrono::high_resolution_clock::now(); + std::cout << "runSimulation: " + << std::chrono::duration(t_end - t_start).count() + << " s\n"; std::vector& yfinial = sysmodel->y(); diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt b/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt index f94f273bd..8cf88f1c7 100644 --- a/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -1,8 +1,12 @@ add_executable(test_sparse runSparseTests.cpp) -target_link_libraries(test_sparse GridKit::CsrMatrix) - +target_link_libraries(test_sparse GridKit::sparse) + add_test(NAME SparseTest COMMAND $) +add_executable(test_sparse_coo runSparseCooTests.cpp) +target_link_libraries(test_sparse_coo GridKit::sparse) + +add_test(NAME SparseCooTest COMMAND $) -install(TARGETS test_sparse +install(TARGETS test_sparse test_sparse_coo RUNTIME DESTINATION bin) diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/SparseCooTests.hpp b/tests/UnitTests/LinearAlgebra/SparseMatrix/SparseCooTests.hpp new file mode 100644 index 000000000..d784029a6 --- /dev/null +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/SparseCooTests.hpp @@ -0,0 +1,178 @@ +#include +#include + +namespace GridKit +{ + namespace Testing + { + using namespace LinearAlgebra; + + template + class SparseCooTests + { + + using CooMatrix = LinearAlgebra::CooMatrix; + + public: + SparseCooTests(memory::MemorySpace memspace = memory::HOST) + : memspace_(memspace) + { + } + + ~SparseCooTests() + { + } + + /** + * @brief Constructor test for COO sparse matrix + * + * Constructs a COO matrix with the given parameters and confirms that the + * parameters are stored correctly. + * + * @param[in] n - number of rows + * @param[in] m - number of columns + * @param[in] nnz - number of non-zeros + * + * @return TestOutcome indicating success or failure of the test + */ + TestOutcome constructor(IdxT n, IdxT m, IdxT nnz) + { + TestStatus success; + success = true; + + CooMatrix A(n, m, nnz); + + if (A.getNumRows() != n || A.getNumColumns() != m || A.getNnz() != nnz) + { + std::cout << "Matrix dimensions do not match expected values.\n"; + success = false; + } + + return success.report(__func__); + } + + /** + * @brief Test set data pointers for the COO sparse matrix + * + * Sets the row, column, and value data pointers for the COO matrix and + * checks if the pointers are set correctly. + * + * @param[in] n - number of rows + * @param[in] m - number of columns + * @param[in] row_density - number of non-zeros per row + * + * @return TestOutcome indicating success or failure of the test + */ + TestOutcome setDataPointers(IdxT n, IdxT m, IdxT row_density) + { + TestStatus success; + success = true; + + IdxT nnz = n * row_density; + + CooMatrix A(n, m, nnz); + + IdxT* h_row_data = new IdxT[static_cast(nnz)]; + for (IdxT i = 0; i < nnz; ++i) + { + h_row_data[i] = i / row_density; + } + IdxT* h_col_data = new IdxT[static_cast(nnz)]; + for (IdxT i = 0; i < nnz; ++i) + { + h_col_data[i] = i % m; + } + ScalarT* h_val_data = new ScalarT[static_cast(nnz)]; + for (IdxT i = 0; i < nnz; ++i) + { + h_val_data[i] = static_cast(i + 1); + } + + if (A.setDataPointers(h_row_data, h_col_data, h_val_data, memory::HOST) != 0) + { + std::cout << "Failed to set data pointers.\n"; + success = false; + } + else if (A.getRowData(memory::HOST) != h_row_data || A.getColData(memory::HOST) != h_col_data || A.getValues(memory::HOST) != h_val_data) + { + std::cout << "Data pointers do not point to expected values.\n"; + success = false; + } + + delete[] h_row_data; + delete[] h_col_data; + delete[] h_val_data; + + return success.report(__func__); + } + + /** + * @brief Test getCsrRowData on a small COO matrix with duplicates + * + * Builds a 3x3 COO matrix with 5 entries (including one duplicate pair), + * calls getCsrRowData(), and verifies that the returned CSR row pointers + * and the updated nnz are correct. + * + * @return TestOutcome indicating success or failure of the test + */ + TestOutcome getCsrRowData() + { + TestStatus success; + success = true; + + const IdxT n = 3; + const IdxT m = 3; + const IdxT nnz_dup = 5; + + CooMatrix A(n, m, nnz_dup); + + IdxT* rows = new IdxT[nnz_dup]{2, 0, 0, 1, 1}; + IdxT* cols = new IdxT[nnz_dup]{2, 0, 1, 1, 1}; + ScalarT* vals = new ScalarT[nnz_dup]{5, 1, 2, 3, 4}; + + if (A.setDataPointers(rows, cols, vals, memory::HOST) != 0) + { + std::cout << "Failed to set data pointers.\n"; + success = false; + } + else + { + IdxT* row_ptrs = A.getCsrRowData(); + + const IdxT expected_nnz = 4; + const IdxT expected_row_ptrs[4] = {0, 2, 3, 4}; + + if (A.getNnz() != expected_nnz) + { + std::cout << "nnz after deduplication is " << A.getNnz() + << ", expected " << expected_nnz << ".\n"; + success = false; + } + + for (IdxT i = 0; i <= n; ++i) + { + if (row_ptrs[i] != expected_row_ptrs[i]) + { + std::cout << "row_ptrs[" << i << "] = " << row_ptrs[i] + << ", expected " << expected_row_ptrs[i] << ".\n"; + success = false; + break; + } + } + + delete[] row_ptrs; + } + + delete[] rows; + delete[] cols; + delete[] vals; + + return success.report(__func__); + } + + private: + memory::MemorySpace memspace_; + MemoryHandler mem_; + }; // class SparseCooTests + } // namespace Testing +} // namespace GridKit diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp new file mode 100644 index 000000000..bdfe583a3 --- /dev/null +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp @@ -0,0 +1,37 @@ +#include "SparseCooTests.hpp" + +using namespace GridKit; +using namespace LinearAlgebra; +using namespace Testing; + +/** + * @brief Run COO sparse matrix tests with a given backend + * + * @param[in] backend - name of the hardware backend + * @param[in] memspace - memory space for the tests + * @param[out] result - test results + */ +template +void runTests(const std::string& backend, memory::MemorySpace memspace, TestingResults& result) +{ + std::cout << "Running tests on " << backend << ":\n"; + + SparseCooTests test(memspace); + + result += test.constructor(50, 50, 2); + result += test.constructor(50, 100, 2); + + result += test.setDataPointers(50, 50, 2); + result += test.setDataPointers(50, 100, 2); + + result += test.getCsrRowData(); +} + +int main(int, char**) +{ + TestingResults result; + runTests("CPU", memory::HOST, result); + runTests("CPU", memory::HOST, result); + + return result.summary(); +} From 23338c47404ad47dadf0d78ee40f49f29acd661b Mon Sep 17 00:00:00 2001 From: Kakeru Date: Wed, 18 Feb 2026 15:39:51 -0500 Subject: [PATCH 10/15] Apply pre-commit --- GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt | 2 +- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 2 +- GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp | 2 +- GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp | 2 +- GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp | 2 +- GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp | 2 +- examples/LinearAlgebra/SparseTest/SparseTest.cpp | 2 +- examples/PowerElectronics/Microgrid/Microgrid.cpp | 6 ------ 8 files changed, 7 insertions(+), 13 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt index b5149d74e..c8c447395 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -5,4 +5,4 @@ gridkit_add_library(sparse HEADERS COO_Matrix.hpp CooMatrix.hpp - CsrMatrix.hpp) \ No newline at end of file + CsrMatrix.hpp) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index daa703bfb..8a2dde4dd 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -998,4 +998,4 @@ namespace GridKit { } } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp index 0a1a4d96f..7964dee19 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp @@ -603,4 +603,4 @@ namespace GridKit template class CooMatrix; template class CooMatrix; } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp index fb65e376a..b167d2760 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.hpp @@ -87,4 +87,4 @@ namespace GridKit MemoryHandler mem_; ///< Device memory manager object }; } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp index d798949ab..87fa7433d 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.cpp @@ -822,4 +822,4 @@ namespace GridKit template class CsrMatrix; template class CsrMatrix; } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit diff --git a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp index 909b09158..44d35accb 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CsrMatrix.hpp @@ -105,4 +105,4 @@ namespace GridKit MemoryHandler mem_; ///< Device memory manager object }; } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit diff --git a/examples/LinearAlgebra/SparseTest/SparseTest.cpp b/examples/LinearAlgebra/SparseTest/SparseTest.cpp index 28f72c495..2e5224c7b 100644 --- a/examples/LinearAlgebra/SparseTest/SparseTest.cpp +++ b/examples/LinearAlgebra/SparseTest/SparseTest.cpp @@ -99,4 +99,4 @@ int main() } return failval; -} \ No newline at end of file +} diff --git a/examples/PowerElectronics/Microgrid/Microgrid.cpp b/examples/PowerElectronics/Microgrid/Microgrid.cpp index 1604f0d6b..fc2f379ef 100644 --- a/examples/PowerElectronics/Microgrid/Microgrid.cpp +++ b/examples/PowerElectronics/Microgrid/Microgrid.cpp @@ -1,7 +1,6 @@ #define _USE_MATH_DEFINES -#include #include #include #include @@ -341,12 +340,7 @@ int main(int /* argc */, char const** /* argv */) idas->getDefaultInitialCondition(); idas->initializeSimulation(t_init); - auto t_start = std::chrono::high_resolution_clock::now(); idas->runSimulation(t_final); - auto t_end = std::chrono::high_resolution_clock::now(); - std::cout << "runSimulation: " - << std::chrono::duration(t_end - t_start).count() - << " s\n"; std::vector& yfinial = sysmodel->y(); From 7ae52d136f6c0f4208805de9713b425b9b94ebaf Mon Sep 17 00:00:00 2001 From: KakeruUeda Date: Wed, 18 Feb 2026 20:54:17 +0000 Subject: [PATCH 11/15] Apply pre-commmit fixes --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 8a2dde4dd..7f3671a22 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -189,19 +189,6 @@ namespace GridKit return {row_size_vec, this->column_indices_, this->values_}; } - for (IdxT k = 0; k < nnz_dup; ++k) - { - map_to_csr_[map_to_sorted[k]] = map_to_dedup[k]; - } - - // Shrink internal arrays to deduplicated size - row_indices_.resize(nnz_dedup); - column_indices_.resize(nnz_dedup); - values_.resize(nnz_dedup); - - return {row_ptrs, column_indices_, values_}; - } - /** * @brief Only creates the row data * From 4238d15db31709a74c604889ebeaeafb06c3e90f Mon Sep 17 00:00:00 2001 From: Kakeru Date: Wed, 18 Feb 2026 16:05:31 -0500 Subject: [PATCH 12/15] Fix error in COO_Matrix.hpp --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 7f3671a22..07dcbeacc 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -985,4 +985,4 @@ namespace GridKit { } } // namespace LinearAlgebra -} // namespace GridKit +} // namespace GridKit \ No newline at end of file From cdb2fe1daf5b4c804f15f42688faa2d304487576 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Thu, 19 Feb 2026 15:40:16 -0500 Subject: [PATCH 13/15] Fix typo and naming --- GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt | 2 +- GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp | 2 +- GridKit/Model/Evaluator.hpp | 5 +++++ .../PowerElectronics/SystemModelPowerElectronics.hpp | 2 +- GridKit/Solver/Dynamic/CMakeLists.txt | 4 ++-- examples/LinearAlgebra/SparseTest/CMakeLists.txt | 2 +- examples/PowerElectronics/RLCircuit/RLCircuit.cpp | 2 +- .../LinearAlgebra/SparseMatrix/CMakeLists.txt | 10 +++++----- .../LinearAlgebra/SparseMatrix/runSparseCooTests.cpp | 1 + .../LinearAlgebra/SparseMatrix/runSparseTests.cpp | 1 + 10 files changed, 19 insertions(+), 12 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt index c8c447395..14c11bebf 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/GridKit/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -1,4 +1,4 @@ -gridkit_add_library(sparse +gridkit_add_library(SparseMatrix SOURCES CooMatrix.cpp CsrMatrix.cpp diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp index 7964dee19..20e575c25 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp @@ -403,7 +403,7 @@ namespace GridKit map_to_dedup_[0] = 0; csr_row_data[h_row_data_[0] + 1]++; - // Deduplicate sroted entries + // Deduplicate sorted entries IdxT w = 0; for (IdxT i = 1; i < nnz_; i++) { diff --git a/GridKit/Model/Evaluator.hpp b/GridKit/Model/Evaluator.hpp index 7bd55cd5d..ffec10cf2 100644 --- a/GridKit/Model/Evaluator.hpp +++ b/GridKit/Model/Evaluator.hpp @@ -84,6 +84,11 @@ namespace GridKit { } + /** + * @brief Return a pointer to the CSR Jacobian + * + * @todo Remove this and use CsrMatirx for jac_ + */ virtual CsrMatrix* getCsrJacobian() const { return nullptr; diff --git a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp index 425821890..634409227 100644 --- a/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp +++ b/GridKit/Model/PowerElectronics/SystemModelPowerElectronics.hpp @@ -324,7 +324,7 @@ namespace GridKit return 0; } - int tagDifferentiable() + int tagDifferentiable() final { return 0; } diff --git a/GridKit/Solver/Dynamic/CMakeLists.txt b/GridKit/Solver/Dynamic/CMakeLists.txt index bc114afd6..4e4abd1e4 100644 --- a/GridKit/Solver/Dynamic/CMakeLists.txt +++ b/GridKit/Solver/Dynamic/CMakeLists.txt @@ -21,7 +21,7 @@ if (GRIDKIT_ENABLE_SUNDIALS_SPARSE) PUBLIC SUNDIALS::sunlinsolklu PUBLIC GridKit::definitions PUBLIC GridKit::utilities_logger - PUBLIC GridKit::sparse) + PUBLIC GridKit::SparseMatrix) else() gridkit_add_library(solvers_dyn SOURCES @@ -33,5 +33,5 @@ else() PUBLIC SUNDIALS::idas PUBLIC GridKit::definitions PUBLIC GridKit::utilities_logger - PUBLIC GridKit::sparse) + PUBLIC GridKit::SparseMatrix) endif() diff --git a/examples/LinearAlgebra/SparseTest/CMakeLists.txt b/examples/LinearAlgebra/SparseTest/CMakeLists.txt index 99e82d729..c4c664d5b 100644 --- a/examples/LinearAlgebra/SparseTest/CMakeLists.txt +++ b/examples/LinearAlgebra/SparseTest/CMakeLists.txt @@ -1,7 +1,7 @@ add_executable(spmattest SparseTest.cpp) -target_link_libraries(spmattest GridKit::sparse) +target_link_libraries(spmattest GridKit::SparseMatrix) add_test(NAME SparseMatrixTest COMMAND spmattest) install(TARGETS spmattest RUNTIME DESTINATION bin) diff --git a/examples/PowerElectronics/RLCircuit/RLCircuit.cpp b/examples/PowerElectronics/RLCircuit/RLCircuit.cpp index 44804aa7a..c8f970cc9 100644 --- a/examples/PowerElectronics/RLCircuit/RLCircuit.cpp +++ b/examples/PowerElectronics/RLCircuit/RLCircuit.cpp @@ -97,7 +97,7 @@ int main(int /* argc */, char const** /* argv */) sysmodel.updateTime(0.0, 1.0); sysmodel.evaluateJacobian(); std::cout << "Intial Jacobian with alpha = 1:\n"; - sysmodel.getJacobian().printMatrix(); + sysmodel.getCsrJacobian()->print(); // Create numerical integrator and configure it for the generator model AnalysisManager::Sundials::Ida idas(&sysmodel); diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt b/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt index 8cf88f1c7..25f371579 100644 --- a/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/CMakeLists.txt @@ -1,12 +1,12 @@ -add_executable(test_sparse runSparseTests.cpp) -target_link_libraries(test_sparse GridKit::sparse) +add_executable(test_sparse_csr runSparseTests.cpp) +target_link_libraries(test_sparse_csr GridKit::SparseMatrix) -add_test(NAME SparseTest COMMAND $) +add_test(NAME SparseTest COMMAND $) add_executable(test_sparse_coo runSparseCooTests.cpp) -target_link_libraries(test_sparse_coo GridKit::sparse) +target_link_libraries(test_sparse_coo GridKit::SparseMatrix) add_test(NAME SparseCooTest COMMAND $) -install(TARGETS test_sparse test_sparse_coo +install(TARGETS test_sparse_csr test_sparse_coo RUNTIME DESTINATION bin) diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp index bdfe583a3..7d74afa69 100644 --- a/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseCooTests.cpp @@ -32,6 +32,7 @@ int main(int, char**) TestingResults result; runTests("CPU", memory::HOST, result); runTests("CPU", memory::HOST, result); + runTests("CPU", memory::HOST, result); return result.summary(); } diff --git a/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseTests.cpp b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseTests.cpp index 44ec49195..393e9ad0b 100644 --- a/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseTests.cpp +++ b/tests/UnitTests/LinearAlgebra/SparseMatrix/runSparseTests.cpp @@ -43,6 +43,7 @@ int main(int, char**) TestingResults result; runTests("CPU", memory::HOST, result); runTests("CPU", memory::HOST, result); + runTests("CPU", memory::HOST, result); return result.summary(); } From 51c0f46670c1f9b65da420a95dedeffe1fd71f96 Mon Sep 17 00:00:00 2001 From: Kakeru Date: Thu, 19 Feb 2026 16:17:56 -0500 Subject: [PATCH 14/15] Resolve rebase conflicts --- GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp index 07dcbeacc..7f3671a22 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp +++ b/GridKit/LinearAlgebra/SparseMatrix/COO_Matrix.hpp @@ -985,4 +985,4 @@ namespace GridKit { } } // namespace LinearAlgebra -} // namespace GridKit \ No newline at end of file +} // namespace GridKit From 6bf260c17857355ef397303fb33b6bca98b13d2d Mon Sep 17 00:00:00 2001 From: Nicholson Koukpaizan Date: Thu, 19 Feb 2026 16:39:27 -0500 Subject: [PATCH 15/15] Fix conversion warnings. --- GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp index 20e575c25..f99521df3 100644 --- a/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp +++ b/GridKit/LinearAlgebra/SparseMatrix/CooMatrix.cpp @@ -365,7 +365,7 @@ namespace GridKit return nullptr; } - map_to_sorted_ = new IdxT[nnz_]; + map_to_sorted_ = new IdxT[static_cast(nnz_)]; for (IdxT i = 0; i < nnz_; i++) { @@ -377,9 +377,9 @@ namespace GridKit { if (h_row_data_[a] != h_row_data_[b]) return h_row_data_[a] < h_row_data_[b]; return h_col_data_[a] < h_col_data_[b]; }); - IdxT* row_data_tmp = new IdxT[nnz_]; - IdxT* col_data_tmp = new IdxT[nnz_]; - RealT* val_data_tmp = new RealT[nnz_]; + IdxT* row_data_tmp = new IdxT[static_cast(nnz_)]; + IdxT* col_data_tmp = new IdxT[static_cast(nnz_)]; + RealT* val_data_tmp = new RealT[static_cast(nnz_)]; for (IdxT i = 0; i < nnz_; i++) { @@ -397,8 +397,8 @@ namespace GridKit delete[] col_data_tmp; delete[] val_data_tmp; - map_to_dedup_ = new IdxT[nnz_]; - IdxT* csr_row_data = new IdxT[n_ + 1](); + map_to_dedup_ = new IdxT[static_cast(nnz_)]; + IdxT* csr_row_data = new IdxT[static_cast(n_ + 1)](); map_to_dedup_[0] = 0; csr_row_data[h_row_data_[0] + 1]++;