deepmodeling · maorz1998 · Jun 5, 2023 · Jun 2, 2023 · Jun 5, 2023 · Jun 5, 2023
diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H
@@ -1,24 +1,18 @@
 {
     volScalarField& he = thermo.he();
-    #ifdef GPUSolver_
-        // start1 = std::clock();
-        // t.join();
-        // UEqn_GPU.updatePsi(&U[0][0]);
-        // K = 0.5*magSqr(U);
-        // end1 = std::clock();
-        // time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
-        // time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
-        // time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
-        // time_monitor_UinE += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+#ifdef GPUSolver_
         start1 = std::clock();
-        // // t.join();
         UEqn_GPU.updatePsi(&U[0][0]);
         K = 0.5*magSqr(U);
         end1 = std::clock();
         time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
         time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
         time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
-    #endif
+#endif
+
+#ifdef CPUSolver_
+    start1 = std::clock();
     fvScalarMatrix EEqn
     (
         fvm::ddt(rho, he) + fvm::div(phi, he)
@@ -33,4 +27,91 @@
     EEqn.relax();
 
     EEqn.solve();
+
+    end1 = std::clock();
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+#endif
+
+#ifdef GPUSolver_
+    // prepare data on CPU
+    start1 = std::clock();
+    start2 = std::clock();
+    const tmp<volScalarField> alphaEff_tmp(turbulence->alphaEff());
+    const volScalarField& alphaEff = alphaEff_tmp();
+    int eeqn_offset = 0;
+    forAll(he.boundaryField(), patchi)
+    {
+        const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
+        const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
+        int patchSize = pw.size();
+
+        const scalarField& patchK = K.boundaryField()[patchi];
+        const vectorField& patchhDiffCorrFlux = hDiffCorrFlux.boundaryField()[patchi];
+        const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi];
+        memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
+        memcpy(boundary_hDiffCorrFlux + eeqn_offset * 3, &patchhDiffCorrFlux[0][0], 3 * patchSize*sizeof(double));
+        memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
+
+        Field<scalar> valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw);
+        Field<scalar> valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw);
+        Field<scalar> gradientInternalCoeffs = he.boundaryField()[patchi].gradientInternalCoeffs();
+        Field<scalar> gradientBoundaryCoeffs = he.boundaryField()[patchi].gradientBoundaryCoeffs();
+        memcpy(eeqn_valueInternalCoeffs + eeqn_offset, &valueInternalCoeffs[0], patchSize*sizeof(double));
+        memcpy(eeqn_valueBoundaryCoeffs + eeqn_offset, &valueBoundaryCoeffs[0], patchSize*sizeof(double));
+        memcpy(eeqn_gradientInternalCoeffs + eeqn_offset, &gradientInternalCoeffs[0], patchSize*sizeof(double));
+        memcpy(eeqn_gradientBoundaryCoeffs + eeqn_offset, &gradientBoundaryCoeffs[0], patchSize*sizeof(double));
+
+        eeqn_offset += patchSize;
+    }
+    eeqn_offset = 0;
+    end1 = std::clock();
+    time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    // prepare data on GPU
+    start1 = std::clock();
+    EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
+            &dpdt[0], &diffAlphaD[0], &hDiffCorrFlux[0][0],
+            boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff,
+            eeqn_valueInternalCoeffs, eeqn_valueBoundaryCoeffs,
+            eeqn_gradientInternalCoeffs, eeqn_gradientBoundaryCoeffs);
+    if (doSync) EEqn_GPU.sync();
+    end1 = std::clock();
+    time_monitor_EEqn_mtxAssembly_GPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    start1 = std::clock();
+    EEqn_GPU.resetAb();
+    EEqn_GPU.fvm_ddt();
+    EEqn_GPU.fvm_div();
+    EEqn_GPU.fvm_laplacian();
+    EEqn_GPU.fvc_ddt();
+    EEqn_GPU.fvc_div_phi_scalar();
+    EEqn_GPU.fvc_div_vector();
+    EEqn_GPU.add_to_source();
+    if (doSync) EEqn_GPU.sync();
+    end1 = std::clock();
+    time_monitor_EEqn_mtxAssembly_GPU_Run += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    EEqn_GPU.sync();
+    end2 = std::clock();
+    time_monitor_EEqn += double(end2 - start2) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_mtxAssembly += double(end2 - start2) / double(CLOCKS_PER_SEC);
+
+    // check value of mtxAssembly, no time monitor
+    EEqn_GPU.checkValue(false);
+
+    start1 = std::clock();
+    EEqn_GPU.solve();
+    if (doSync) EEqn_GPU.sync();
+    end1 = std::clock();
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    start1 = std::clock();
+    EEqn_GPU.updatePsi(&he[0]);
+    end1 = std::clock();
+    time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+#endif
 }
diff --git a/applications/solvers/dfLowMachFoam/createdfSolver.H b/applications/solvers/dfLowMachFoam/createdfSolver.H
@@ -31,9 +31,11 @@ settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UE
 dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0], 
 &mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex);
 
+#ifdef GPUSolver_
 dfRhoEqn rhoEqn_GPU(dfDataBase);
 dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath);
 dfYEqn YEqn_GPU(dfDataBase, "dDDI", settingPath, inertIndex);
+dfEEqn EEqn_GPU(dfDataBase, "dDDI", settingPath);
 
 double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init, *boundary_velocity_init,
     *boundary_nuEff_init, *boundary_rho_init, *ueqn_laplac_internalCoeffs_init, *ueqn_laplac_boundaryCoeffs_init, *boundary_phi_init;
@@ -45,4 +47,15 @@ cudaMallocHost(&boundary_velocity_init, 3*num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_pressure_init, num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double));
-cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));
+cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));
+
+double *boundary_alphaEff, *boundary_K, *boundary_hDiffCorrFlux;
+double *eeqn_valueInternalCoeffs, *eeqn_valueBoundaryCoeffs, *eeqn_gradientInternalCoeffs, *eeqn_gradientBoundaryCoeffs;
+cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double));
+cudaMallocHost(&boundary_hDiffCorrFlux, 3 * num_boundary_faces*sizeof(double));
+cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double));
+cudaMallocHost(&eeqn_valueInternalCoeffs, num_boundary_faces*sizeof(double));
+cudaMallocHost(&eeqn_valueBoundaryCoeffs, num_boundary_faces*sizeof(double));
+cudaMallocHost(&eeqn_gradientInternalCoeffs, num_boundary_faces*sizeof(double));
+cudaMallocHost(&eeqn_gradientBoundaryCoeffs, num_boundary_faces*sizeof(double));
+#endif
diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
@@ -62,11 +62,13 @@ Description
 #include "dfUEqn.H"
 #include "dfYEqn.H"
 #include "dfRhoEqn.H"
+#include "dfEEqn.H"
 #include <cuda_runtime.h>
 #include <thread>
 #include "upwind.H"
 
 #define GPUSolver_
+//#define CPUSolver_
 
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
@@ -94,11 +96,19 @@ int main(int argc, char *argv[])
     #include "createFields.H"
     #include "createRhoUfIfPresent.H"
 
+    bool doSync = true;
     double time_monitor_flow=0;
     double time_monitor_UEqn=0;
     double time_monitor_UEqn_mtxAssembly=0;
     double time_monitor_UEqn_Solve=0;
     double time_monitor_UEqn_sum=0;
+    double time_monitor_EEqn=0;
+    double time_monitor_EEqn_mtxAssembly=0;
+    double time_monitor_EEqn_mtxAssembly_CPU_Prepare;
+    double time_monitor_EEqn_mtxAssembly_GPU_Prepare;
+    double time_monitor_EEqn_mtxAssembly_GPU_Run;
+    double time_monitor_EEqn_Solve=0;
+    double time_monitor_EEqn_sum=0;
     double time_monitor_chem=0;
     double time_monitor_Y=0;
     double time_monitor_E=0;
@@ -223,6 +233,7 @@ int main(int argc, char *argv[])
 
         runTime.write();
         time_monitor_UEqn_sum += time_monitor_UEqn;
+        time_monitor_EEqn_sum += time_monitor_EEqn;
 
         Info << "output time index " << runTime.timeIndex() << endl;
 
@@ -238,6 +249,12 @@ int main(int argc, char *argv[])
         Info<< "UEqn Time solve            = " << time_monitor_UEqn_Solve << " s" << endl;
         // Info<< "UEqn sum Time              = " << time_monitor_UEqn_sum << " s" << endl;
         // Info<< "UEqn sum Time - overhead   = " << time_monitor_UEqn_sum - time_UEqn_initial << " s" << endl;
+        Info<< "EEqn Time                  = " << time_monitor_EEqn << " s" << endl;
+        Info<< "EEqn Time assamble Mtx     = " << time_monitor_EEqn_mtxAssembly << " s" << endl;
+        Info<< "EEqn assamble(CPU prepare) = " << time_monitor_EEqn_mtxAssembly_CPU_Prepare << " s" << endl;
+        Info<< "EEqn assamble(GPU prepare) = " << time_monitor_EEqn_mtxAssembly_GPU_Prepare << " s" << endl;
+        Info<< "EEqn assamble(GPU run)     = " << time_monitor_EEqn_mtxAssembly_GPU_Run << " s" << endl;
+        Info<< "EEqn Time solve            = " << time_monitor_EEqn_Solve << " s" << endl;
         Info<< "sum Time                   = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
         Info<< "CPU Time (get turb souce)  = " << time_monitor_CPU << " s" << endl;
         Info<< "UEqn time in EEqn          = " << time_monitor_UinE << " s" << endl;
@@ -248,6 +265,12 @@ int main(int argc, char *argv[])
         time_monitor_UEqn = 0;
         time_monitor_UEqn_mtxAssembly = 0;
         time_monitor_UEqn_Solve = 0;
+        time_monitor_EEqn = 0;
+        time_monitor_EEqn_mtxAssembly = 0;
+        time_monitor_EEqn_mtxAssembly_CPU_Prepare = 0;
+        time_monitor_EEqn_mtxAssembly_GPU_Prepare = 0;
+        time_monitor_EEqn_mtxAssembly_GPU_Run = 0;
+        time_monitor_EEqn_Solve = 0;
         time_monitor_chem = 0;
         time_monitor_Y = 0;
         time_monitor_E = 0;

diff --git a/examples/dfLowMachFoam/threeD_reactingTGV/H2/cvodeIntegrator_gpu/system/fvSchemes b/examples/dfLowMachFoam/threeD_reactingTGV/H2/cvodeIntegrator_gpu/system/fvSchemes
@@ -30,15 +30,15 @@ divSchemes
     default             none;
 
     div(phi,U)          Gauss linear;
-    div(phi,Yi)         Gauss limitedLinear01 1;
-    div(phi,h)          Gauss limitedLinear 1;
-    div(phi,ha)          Gauss limitedLinear 1;
-    div(phi,K)          Gauss limitedLinear 1;
-    div(phid,p)         Gauss limitedLinear 1;
-    div(phi,epsilon)    Gauss limitedLinear 1;
-    div(phi,Yi_h)       Gauss limitedLinear01 1;
-    div(phi,k)          Gauss limitedLinear 1;
-    div(hDiffCorrFlux)             Gauss cubic;
+    div(phi,Yi)         Gauss linear;
+    div(phi,h)          Gauss linear;
+    div(phi,ha)          Gauss linear;
+    div(phi,K)          Gauss linear;
+    div(phid,p)         Gauss linear;
+    div(phi,epsilon)    Gauss linear;
+    div(phi,Yi_h)       Gauss upwind;
+    div(phi,k)          Gauss linear;
+    div(hDiffCorrFlux)             Gauss linear;
     div(((rho*nuEff)*dev2(T(grad(U)))))     Gauss linear;
 }
 

diff --git a/src_gpu/CMakeLists.txt b/src_gpu/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(${PROJECT_NAME}
         dfUEqn.cu 
         dfRhoEqn.cu 
         dfYEqn.cu
+        dfEEqn.cu
         AmgXSolver.cu)
 
 target_link_libraries(${PROJECT_NAME}

diff --git a/src_gpu/dfEEqn.H b/src_gpu/dfEEqn.H
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "AmgXSolver.H"
+#include <amgx_c.h>
+#include "dfMatrixDataBase.H"
+
+class dfEEqn
+{
+private:
+    dfMatrixDataBase &dataBase_;
+    cudaStream_t stream;
+    AmgXSolver *ESolver = nullptr;
+    int num_iteration;
+
+    // common variables
+    int num_cells, cell_bytes, num_faces, num_surfaces, cell_vec_bytes, csr_value_vec_bytes, num_boundary_cells;
+    int num_boundary_faces, boundary_face_bytes;
+    int *d_A_csr_row_index, *d_A_csr_diag_index, *d_A_csr_col_index;
+
+    // Matrix variables
+    double *d_A_csr, *d_b = nullptr;
+    double *h_A_csr, *h_b = nullptr;
+    double *d_he_old = nullptr;
+    double *h_he_new = nullptr;
+
+    // fields used by EEqn
+    double *d_alphaEff = nullptr;
+    double *d_K = nullptr;
+    double *d_K_old = nullptr;
+    double *d_dpdt = nullptr;
+    double *d_diffAlphaD = nullptr;
+    double *d_hDiffCorrFlux = nullptr;
+    double *d_boundary_K_init = nullptr;
+    double *d_boundary_K = nullptr;
+    double *d_boundary_hDiffCorrFlux_init = nullptr;
+    double *d_boundary_hDiffCorrFlux = nullptr;
+    double *d_boundary_alphaEff_init = nullptr;
+    double *d_boundary_alphaEff = nullptr;
+    double *d_value_internal_coeffs_init = nullptr;
+    double *d_value_boundary_coeffs_init = nullptr;
+    double *d_gradient_internal_coeffs_init = nullptr;
+    double *d_gradient_boundary_coeffs_init = nullptr;
+    double *d_value_internal_coeffs = nullptr;
+    double *d_value_boundary_coeffs = nullptr;
+    double *d_gradient_internal_coeffs = nullptr;
+    double *d_gradient_boundary_coeffs = nullptr;
+
+public:
+    dfEEqn();
+    dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile);
+    ~dfEEqn();
+
+    void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
+            const double *hDiffCorrFlux, const double *dpdt, const double *diffAlphaD,
+            const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff,
+            const double *valueInternalCoeffs, const double *valueBoundaryCoeffs,
+            const double *gradientInternalCoeffs, const double *gradientBoundaryCoeffs);
+
+    void resetAb();
+
+    void fvm_ddt();
+    void fvm_div();
+    void fvm_laplacian();
+
+    void fvc_ddt();
+    void fvc_div_phi_scalar();
+    void fvc_div_vector();
+    void add_to_source();
+
+    void solve();
+    void checkValue(bool print);
+    void updatePsi(double *Psi);
+
+    void sync();
+};