deepmodeling · maorz1998 · Apr 5, 2023 · Apr 5, 2023 · Apr 5, 2023
diff --git a/applications/solvers/dfLowMachFoam/Make/options b/applications/solvers/dfLowMachFoam/Make/options
@@ -23,7 +23,9 @@ EXE_INC = -std=c++14 \
     -I$(CANTERA_ROOT)/include \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include,) \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include/torch/csrc/api/include,) \
-    $(PYTHON_INC_DIR)
+    $(PYTHON_INC_DIR) \
+    -I/home/runze/deepflame-dev/src_gpu \
+    -I/usr/local/cuda-11.6/include
 
 EXE_LIBS = \
     -lcompressibleTransportModels \
@@ -44,4 +46,7 @@ EXE_LIBS = \
     $(if $(LIBTORCH_ROOT),-lpthread,) \
     $(if $(LIBTORCH_ROOT),$(DF_SRC)/dfChemistryModel/DNNInferencer/build/libDNNInferencer.so,) \
     $(if $(PYTHON_LIB_DIR),-L$(PYTHON_LIB_DIR),) \
-    $(if $(PYTHON_LIB_DIR),-lpython3.8,)
+    $(if $(PYTHON_LIB_DIR),-lpython3.8,) \
+    /home/runze/deepflame-dev/src_gpu/build/libdfMatrix.so \
+    /usr/local/cuda-11.6/lib64/libcudart.so
+
diff --git a/applications/solvers/dfLowMachFoam/UEqn.H b/applications/solvers/dfLowMachFoam/UEqn.H
@@ -15,3 +15,42 @@ if (pimple.momentumPredictor())
 
     K = 0.5*magSqr(U);
 }
+
+
+int offset = 0;
+forAll(U.boundaryField(), patchi)
+{
+    const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
+    const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
+
+    const scalarField& patchP = p.boundaryField()[patchi];
+    const vectorField& pSf = mesh.Sf().boundaryField()[patchi];
+
+    int patchSize = pw.size();
+
+    Field<vector> ueqn_internalCoeffs_vec = patchFlux*U.boundaryField()[patchi].valueInternalCoeffs(pw);
+    Field<vector> ueqn_boundaryCoeffs_vec = -patchFlux*U.boundaryField()[patchi].valueBoundaryCoeffs(pw); 
+
+    // only need to construct once
+    std::copy(&ueqn_internalCoeffs_vec[0][0], &ueqn_internalCoeffs_vec[0][0]+3*patchSize, ueqn_internalCoeffs_init.begin() + 3*offset);
+
+    // need to construct every time step
+    std::copy(&ueqn_boundaryCoeffs_vec[0][0], &ueqn_boundaryCoeffs_vec[0][0]+3*patchSize, ueqn_boundaryCoeffs_init.begin() + 3*offset);
+
+    // boundary pressure
+    std::copy(&patchP[0], &patchP[0]+patchSize, boundary_pressure_init.begin()+offset);
+    // boundary face vector
+    std::copy(&pSf[0][0], &pSf[0][0]+3*patchSize, boundary_face_vector_init.begin()+3*offset);
+
+    offset += patchSize;
+}
+fvVectorMatrix turb_source
+(
+    turbulence->divDevRhoReff(U)
+);
+
+UEqn_GPU.fvm_ddt(&rho.oldTime()[0], &rho[0], &mesh.V()[0], &U.oldTime()[0][0]);
+UEqn_GPU.fvm_div(&mesh.surfaceInterpolation::weights()[0], &phi[0], ueqn_internalCoeffs_init, ueqn_boundaryCoeffs_init);
+UEqn_GPU.fvc_grad(&mesh.Sf()[0][0], &p[0], boundary_face_vector_init, boundary_pressure_init);
+UEqn_GPU.add_fvMatrix(&turb_source.lower()[0], &turb_source.diag()[0], &turb_source.upper()[0], &turb_source.source()[0][0]);
+UEqn_GPU.print();
diff --git a/applications/solvers/dfLowMachFoam/createdfSolver.H b/applications/solvers/dfLowMachFoam/createdfSolver.H
@@ -0,0 +1,21 @@
+const labelUList& owner = mesh.owner();
+const labelUList& neighbour = mesh.neighbour();
+int num_cells = mesh.nCells();
+int num_surfaces = neighbour.size();
+
+std::vector<int> boundaryCellIndex;
+int num_boundary_faces = 0;
+int patchSize;
+forAll(mesh.boundary(), patchi)
+{
+    labelUList sub_boundary = mesh.boundary()[patchi].faceCells();
+    patchSize = sub_boundary.size();
+    boundaryCellIndex.insert(boundaryCellIndex.end(), &sub_boundary[0], &sub_boundary[0]+patchSize);
+    num_boundary_faces += patchSize;
+}
+int num_boundary_cells;
+
+dfMatrix UEqn_GPU(num_surfaces, num_cells, num_boundary_faces, num_boundary_cells, &neighbour[0], &owner[0], boundaryCellIndex);
+
+std::vector<double> ueqn_internalCoeffs_init(3*num_boundary_faces), ueqn_boundaryCoeffs_init(3*num_boundary_faces);
+std::vector<double> boundary_pressure_init(num_boundary_faces), boundary_face_vector_init(3*num_boundary_faces);
diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
@@ -59,6 +59,8 @@ Description
 #include "basicThermo.H"
 #include "CombustionModel.H"
 
+#include "dfMatrix.H"
+
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
 int main(int argc, char *argv[])
@@ -97,6 +99,8 @@ int main(int argc, char *argv[])
         #include "setInitialDeltaT.H"
     }
 
+    #include "createdfSolver.H"
+
     // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
     Info<< "\nStarting time loop\n" << endl;

diff --git a/src_gpu/CMakeLists.txt b/src_gpu/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.5)
+project(dfMatrix)
+FIND_PACKAGE(CUDA REQUIRED)
+enable_language(CUDA)
+
+include_directories(
+    ${CUDA_INCLUDE_DIRS}
+)
+
+SET(CMAKE_C_COMPILER g++)
+add_compile_options(-std=c++17)
+
+add_library(${PROJECT_NAME} SHARED dfMatrix.cu)
+
+target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
diff --git a/src_gpu/dfMatrix.H b/src_gpu/dfMatrix.H
@@ -0,0 +1,163 @@
+#ifndef dfMatrix_H
+#define dfMatrix_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <cuda_runtime.h>
+#include <vector>
+#include <numeric>
+#include <algorithm>
+#include <map>
+#include <iostream>
+
+static const char *_cudaGetErrorEnum(cudaError_t error) {
+  return cudaGetErrorName(error);
+}
+
+template <typename T>
+void check(T result, char const *const func, const char *const file,
+        int const line) {
+  if (result) {
+    fprintf(stderr, "cuda error at %s:%d code=%d(%s) \"%s\" \n", file, line,
+            static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
+    exit(EXIT_FAILURE);
+  }
+}
+
+#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
+
+class dfMatrix
+{
+private:
+    /**
+     * @brief data size
+     */
+    // - number of cell size
+    int num_cells;
+    // - number of face size
+    int num_surfaces;
+    // - number of offdiagnal entry size (2*num_surfaces)
+    int num_faces;
+    // - number of boundary cells
+    int num_boundary_cells;
+    // - number of boundary faces
+    int num_boundary_faces;
+
+    /**
+     * @brief input data (now these values are named according to UEqn)
+     */
+    // - mesh variables
+    // - csr_row_index
+    int *h_A_csr_row_index=nullptr, *d_A_csr_row_index=nullptr;
+    // - csr_col_index
+    int *h_A_csr_col_index=nullptr, *d_A_csr_col_index=nullptr;
+    // - csr_diag_index
+    int *h_A_csr_diag_index=nullptr, *d_A_csr_diag_index=nullptr;
+
+    // - the pre-permutated and post-permutated interpolation weight list
+    std::vector<double> h_weight_vec_init, h_weight_vec;
+    // - the pre-permutated and post-permutated flux (phi) list
+    std::vector<double> h_phi_vec_init, h_phi_vec;
+    // - the pre-permutated and post-permutated cell face vector list
+    std::vector<double> h_face_vector_vec_init, h_face_vector_vec;
+    // - the host pointer to rho_new, rho_old, velocity_old, pressure and volume list
+    double *h_rho_new = nullptr, *h_rho_old = nullptr, *h_velocity_old = nullptr, 
+    *h_pressure = nullptr;
+    const double *h_volume = nullptr;
+    // - the host pointer to the pre-permutated and post-permutated interpolation weight list
+    double *h_weight_init = nullptr, *h_weight = nullptr;
+    // - the host pointer to the pre-permutated and post-permutated flux (phi) list
+    double *h_phi_init = nullptr, *h_phi = nullptr;
+    // - the host pointer to the pre-permutated and post-permutated cell face vector list
+    double *h_face_vector_init = nullptr, *h_face_vector = nullptr;
+    // - the device pointer to rho_new, rho_old, velocity_old, pressure and volume list
+    double *d_rho_new = nullptr, *d_rho_old = nullptr, *d_velocity_old = nullptr, 
+    *d_pressure = nullptr, *d_volume = nullptr;
+    // - the device pointer to the pre-permutated and post-permutated interpolation weight list
+    double *d_weight_init = nullptr, *d_weight = nullptr;
+    // - the device pointer to the pre-permutated and post-permutated flux (phi) list
+    double *d_phi_init = nullptr, *d_phi = nullptr;
+    // - the device pointer to the pre-permutated and post-permutated cell face vector list
+    double *d_face_vector_init = nullptr, *d_face_vector = nullptr;
+
+    double rdelta_t = 1/1e-6;
+
+    /**
+     * @brief boundary related variables
+     */
+    int *h_boundary_cell_offset = nullptr, *d_boundary_cell_offset=nullptr;
+    int *h_boundary_cell_id = nullptr, *d_boundary_cell_id = nullptr;
+    double *h_internal_coeffs = nullptr, *h_boundary_coeffs = nullptr,
+    *h_boundary_pressure = nullptr, *h_boundary_face_vector = nullptr,
+    *d_internal_coeffs = nullptr, *d_boundary_coeffs = nullptr,
+    *d_boundary_pressure = nullptr, *d_boundary_face_vector = nullptr;
+    std::vector<int> boundPermutationList;
+    std::vector<double> ueqn_internalCoeffs, ueqn_boundaryCoeffs;
+    std::vector<double> boundary_face_vector;
+    std::vector<double> boundary_pressure;
+
+    // - the device pointer to the permutated index list
+    std::vector<int> permedIndex;
+    int *d_permedIndex=nullptr;
+
+    // bytesize
+    // - bytes of diagnal entries
+    size_t cell_bytes;
+    // - bytes of diagnal entries (vector)
+    size_t cell_vec_bytes;
+    // - bytes of diagnal index
+    size_t cell_index_bytes;
+     // - bytes of diagnal index
+    size_t face_bytes;
+    size_t face_vec_bytes;
+
+    size_t boundary_cell_bytes;
+    size_t boundary_cell_vec_bytes;
+    size_t boundary_cell_index_bytes;
+
+    size_t boundary_face_bytes;
+    size_t boundary_face_vec_bytes;
+    size_t boundary_face_index_bytes;
+
+    // A_csr has one more element in each row: itself
+    size_t csr_row_index_bytes;
+    size_t csr_col_index_bytes;
+    size_t csr_value_bytes;
+    size_t csr_value_vec_bytes;
+
+    // extra matrix information
+    double *h_A_csr = nullptr, *h_b = nullptr;
+    double *d_A_csr = nullptr, *d_b = nullptr;
+    double *d_turbSrc_A = nullptr, *d_turbSrc_b = nullptr;
+    std::vector<double> h_turbSrc_init_mtx_vec, h_turbSrc_init_1mtx;
+    std::vector<double> h_turbSrc_init_src_vec, h_turbSrc_src_vec;
+    std::vector<int> tmpPermutatedList;
+
+    /**
+     * @brief cuda functions
+     */
+    cudaStream_t stream;
+
+
+public:
+    dfMatrix();
+    dfMatrix(int num_surfaces, int num_cells, int num_boundary_faces, int &num_boundary_cells_output,
+    const int *neighbour, const int *owner, std::vector<int> boundary_cell_id_init);
+    ~dfMatrix();
+
+    void print();
+
+    void fvm_ddt(double *rho_old, double *rho_new, const double* volume, double* vector_old);
+
+    void fvm_div(const double* weight, double* phi, std::vector<double> ueqn_internalCoeffs_init,
+    std::vector<double> ueqn_boundaryCoeffs_init);
+
+    void fvc_grad(const double* face_vector, double* pressure, std::vector<double> boundary_face_vector_init,
+    std::vector<double> boundary_pressure_init);
+
+    void add_fvMatrix(double* turbSrc_low, double* turbSrc_diag, double* turbSrc_upp, double* turbSrc_source);
+
+    void solve();
+};
+
+#endif