deepmodeling · maorz1998 · Jun 7, 2023 · Jun 7, 2023
diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H
@@ -23,13 +23,16 @@
      ==
         fvc::div(hDiffCorrFlux)
     );
+    end1 = std::clock();
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
-    EEqn.relax();
-
+    // EEqn.relax();
+    start1 = std::clock();
     EEqn.solve();
-
     end1 = std::clock();
     time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 #endif
 
 #ifdef GPUSolver_
@@ -52,35 +55,22 @@
         memcpy(boundary_hDiffCorrFlux + eeqn_offset * 3, &patchhDiffCorrFlux[0][0], 3 * patchSize*sizeof(double));
         memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
 
-        Field<scalar> valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw);
-        Field<scalar> valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw);
-        Field<scalar> gradientInternalCoeffs = he.boundaryField()[patchi].gradientInternalCoeffs();
-        Field<scalar> gradientBoundaryCoeffs = he.boundaryField()[patchi].gradientBoundaryCoeffs();
-        memcpy(eeqn_valueInternalCoeffs + eeqn_offset, &valueInternalCoeffs[0], patchSize*sizeof(double));
-        memcpy(eeqn_valueBoundaryCoeffs + eeqn_offset, &valueBoundaryCoeffs[0], patchSize*sizeof(double));
-        memcpy(eeqn_gradientInternalCoeffs + eeqn_offset, &gradientInternalCoeffs[0], patchSize*sizeof(double));
-        memcpy(eeqn_gradientBoundaryCoeffs + eeqn_offset, &gradientBoundaryCoeffs[0], patchSize*sizeof(double));
-
         eeqn_offset += patchSize;
     }
-    eeqn_offset = 0;
     end1 = std::clock();
-    time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_EEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
     // prepare data on GPU
     start1 = std::clock();
     EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
             &dpdt[0], &diffAlphaD[0], &hDiffCorrFlux[0][0],
-            boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff,
-            eeqn_valueInternalCoeffs, eeqn_valueBoundaryCoeffs,
-            eeqn_gradientInternalCoeffs, eeqn_gradientBoundaryCoeffs);
+            boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff);
     if (doSync) EEqn_GPU.sync();
     end1 = std::clock();
     time_monitor_EEqn_mtxAssembly_GPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
     start1 = std::clock();
-    EEqn_GPU.resetAb();
+    EEqn_GPU.initializeTimeStep();
     EEqn_GPU.fvm_ddt();
     EEqn_GPU.fvm_div();
     EEqn_GPU.fvm_laplacian();
@@ -98,7 +88,7 @@
     time_monitor_EEqn_mtxAssembly += double(end2 - start2) / double(CLOCKS_PER_SEC);
 
     // check value of mtxAssembly, no time monitor
-    EEqn_GPU.checkValue(false);
+    // EEqn_GPU.checkValue(false);
 
     start1 = std::clock();
     EEqn_GPU.solve();
@@ -109,8 +99,8 @@
 
     start1 = std::clock();
     EEqn_GPU.updatePsi(&he[0]);
+    he.correctBoundaryConditions();
     end1 = std::clock();
-    time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
 #endif

diff --git a/applications/solvers/dfLowMachFoam/UEqn.H b/applications/solvers/dfLowMachFoam/UEqn.H
@@ -1,37 +1,21 @@
 // Solve the Momentum equation
 #ifdef GPUSolver_
     start1 = std::clock();
+    UEqn_GPU.initializeTimeStep();
     UEqn_GPU.fvm_ddt(&U.oldTime()[0][0]);
     start2 = std::clock();
     int offset = 0;
     const tmp<volScalarField> nuEff_tmp(turbulence->nuEff());
     const volScalarField& nuEff = nuEff_tmp();
     forAll(U.boundaryField(), patchi)
     {
-        const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
-        const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
-
         const scalarField& patchP = p.boundaryField()[patchi];
-        const vectorField& pSf = mesh.Sf().boundaryField()[patchi];
         const vectorField& patchU = U.boundaryField()[patchi];
         const scalarField& patchRho = rho.boundaryField()[patchi];
         const scalarField& patchNuEff = nuEff.boundaryField()[patchi];
 
-        int patchSize = pw.size();
-
-        Field<vector> ueqn_internalCoeffs_vec = patchFlux*U.boundaryField()[patchi].valueInternalCoeffs(pw);
-        Field<vector> ueqn_boundaryCoeffs_vec = -patchFlux*U.boundaryField()[patchi].valueBoundaryCoeffs(pw); 
-        Field<vector> ueqn_laplac_internalCoeffs_vec = U.boundaryField()[patchi].gradientInternalCoeffs();
-        Field<vector> ueqn_laplac_boundaryCoeffs_vec = U.boundaryField()[patchi].gradientBoundaryCoeffs();
+        int patchSize = patchP.size();
 
-        // only need to construct once
-        memcpy(ueqn_internalCoeffs_init + 3*offset, &ueqn_internalCoeffs_vec[0][0], 3*patchSize*sizeof(double));
-        // need to construct every time step
-        memcpy(ueqn_boundaryCoeffs_init + 3*offset, &ueqn_boundaryCoeffs_vec[0][0], 3*patchSize*sizeof(double));
-        // laplacian internalCoeffs
-        memcpy(ueqn_laplac_internalCoeffs_init + 3*offset, &ueqn_laplac_internalCoeffs_vec[0][0], 3*patchSize*sizeof(double));
-        // laplacian boundaryCoeffs
-        memcpy(ueqn_laplac_boundaryCoeffs_init + 3*offset, &ueqn_laplac_boundaryCoeffs_vec[0][0], 3*patchSize*sizeof(double));
         // boundary pressure
         memcpy(boundary_pressure_init+offset, &patchP[0], patchSize*sizeof(double));
         // boundary velocity
@@ -44,14 +28,12 @@
     }
     end1 = std::clock();
     end2 = std::clock();
-    time_monitor_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);
+    time_monitor_UEqn_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);
     time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
     start1 = std::clock();
-    UEqn_GPU.fvm_div(&phi[0], ueqn_internalCoeffs_init, ueqn_boundaryCoeffs_init, 
-        ueqn_laplac_internalCoeffs_init, ueqn_laplac_boundaryCoeffs_init, 
-        boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
+    UEqn_GPU.fvm_div(boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
     UEqn_GPU.fvc_grad(&p[0]);
     UEqn_GPU.fvc_grad_vector();
     UEqn_GPU.dev2T();

diff --git a/applications/solvers/dfLowMachFoam/YEqn.H b/applications/solvers/dfLowMachFoam/YEqn.H
@@ -1,5 +1,4 @@
 start = std::clock();
-cudaSetDevice(0);
 hDiffCorrFlux = Zero;
 diffAlphaD = Zero;
 sumYDiffError = Zero;
@@ -15,12 +14,15 @@ tmp<fv::convectionScheme<scalar>> mvConvection
     )
 );
 
-
+start1 = std::clock();
 forAll(Y, i)
 {
     sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]);
 }
 const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
+start1 = std::clock();
+time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
 #ifdef GPUSolver_
     start1 = std::clock();
@@ -32,13 +34,14 @@ const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
 #endif
 
 #ifdef GPUSolver_
-    std::vector<double*> Y_new(Y.size()), Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
+    start1 = std::clock();
+    start2 = std::clock();
+    std::vector<double*> Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
     std::vector<const double*> rhoD_GPU(Y.size());
     for (size_t i = 0; i < Y.size(); ++i)
     {
         volScalarField& Yi = Y[i];
         const volScalarField& rhoDi = chemistry->rhoD(i);
-        Y_new[i] = &Yi[0];
         Y_old[i] = &Yi.oldTime()[0];
         rhoD_GPU[i] = &chemistry->rhoD(i)[0];
         cudaMallocHost(&boundary_Y_init[i], num_boundary_faces*sizeof(double));
@@ -64,15 +67,25 @@ const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
         int patchSize = patchMut_sct.size();
         boundary_mutsct.insert(boundary_mutsct.end(), &patchMut_sct[0], &patchMut_sct[0] + patchSize);
     }
+    end2 = std::clock();
+    time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end2 - start2) / double(CLOCKS_PER_SEC);
 
     YEqn_GPU.upwindWeight();
-    YEqn_GPU.correctVelocity(Y_new, boundary_Y_init, rhoD_GPU);
-    YEqn_GPU.fvm_ddt(Y_old);
+    YEqn_GPU.correctVelocity(Y_old, boundary_Y_init, rhoD_GPU);
+    YEqn_GPU.fvm_ddt();
     YEqn_GPU.fvm_div_phi();
     YEqn_GPU.fvm_div_phiUc();
     YEqn_GPU.fvm_laplacian(&mut_sct[0], boundary_mutsct.data(), boundary_rhoD_init);
 
+    end1 = std::clock();
+    time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    start1 = std::clock();
     YEqn_GPU.solve();
+    end1 = std::clock();
+    time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 #endif
 
 //MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
@@ -108,9 +121,15 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
         if (i != inertIndex)
         {
             #ifdef GPUSolver_
+                start1 = std::clock();
                 YEqn_GPU.updatePsi(&Yi[0], speciesIndex);
                 Yi.correctBoundaryConditions();
+                end1 = std::clock();
+                time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+                time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+                time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
             #else
+                start1 = std::clock();
                 tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
                 fvScalarMatrix YiEqn
                 (
@@ -124,10 +143,16 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
                     :  (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
                     )
                 );
+                end1 = std::clock();
+                time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+                time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+                // YiEqn.relax();
 
-                YiEqn.relax();
-
+                start1 = std::clock();
                 YiEqn.solve("Yi");
+                end1 = std::clock();
+                time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+                time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
             #endif
 
             Yi.max(0.0);
@@ -141,5 +166,4 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
 
     end = std::clock();
     time_monitor_Y += double(end - start) / double(CLOCKS_PER_SEC);
-    cudaSetDevice(0);
 }
diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
@@ -102,13 +102,18 @@ int main(int argc, char *argv[])
     double time_monitor_UEqn_mtxAssembly=0;
     double time_monitor_UEqn_Solve=0;
     double time_monitor_UEqn_sum=0;
+    double time_monitor_UEqn_CPU=0;
     double time_monitor_EEqn=0;
     double time_monitor_EEqn_mtxAssembly=0;
     double time_monitor_EEqn_mtxAssembly_CPU_Prepare;
     double time_monitor_EEqn_mtxAssembly_GPU_Prepare;
     double time_monitor_EEqn_mtxAssembly_GPU_Run;
     double time_monitor_EEqn_Solve=0;
     double time_monitor_EEqn_sum=0;
+    double time_monitor_YEqn=0;
+    double time_monitor_YEqn_mtxAssembly=0;
+    double time_monitor_YEqn_mtxAssembly_CPU_Prepare=0;
+    double time_monitor_YEqn_Solve=0;
     double time_monitor_chem=0;
     double time_monitor_Y=0;
     double time_monitor_E=0;
@@ -213,7 +218,7 @@ int main(int argc, char *argv[])
             {
                 if (pimple.consistent())
                 {
-                    #include "pcEqn.H"
+                    // #include "pcEqn.H"
                 }
                 else
                 {
@@ -255,9 +260,12 @@ int main(int argc, char *argv[])
         Info<< "EEqn assamble(GPU prepare) = " << time_monitor_EEqn_mtxAssembly_GPU_Prepare << " s" << endl;
         Info<< "EEqn assamble(GPU run)     = " << time_monitor_EEqn_mtxAssembly_GPU_Run << " s" << endl;
         Info<< "EEqn Time solve            = " << time_monitor_EEqn_Solve << " s" << endl;
+        Info<< "YEqn Time                  = " << time_monitor_YEqn << " s" << endl;
+        Info<< "YEqn Time assamble Mtx     = " << time_monitor_YEqn_mtxAssembly << " s" << endl;
+        Info<< "YEqn assamble(CPU prepare) = " << time_monitor_YEqn_mtxAssembly_CPU_Prepare << " s" << endl;
+        Info<< "YEqn Time solve            = " << time_monitor_YEqn_Solve << " s" << endl;
         Info<< "sum Time                   = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
         Info<< "CPU Time (get turb souce)  = " << time_monitor_CPU << " s" << endl;
-        Info<< "UEqn time in EEqn          = " << time_monitor_UinE << " s" << endl;
         Info<< "============================================"<<nl<< endl;
 
         Info<< "ExecutionTime = " << runTime.elapsedCpuTime() << " s"
@@ -279,6 +287,10 @@ int main(int argc, char *argv[])
         time_monitor_corrDiff = 0;
         time_monitor_CPU = 0;
         time_monitor_UinE = 0;
+        time_monitor_YEqn=0;
+        time_monitor_YEqn_mtxAssembly=0;
+        time_monitor_YEqn_mtxAssembly_CPU_Prepare=0;
+        time_monitor_YEqn_Solve=0;
 
 #ifdef USE_PYTORCH
         if (log_ && torch_)

diff --git a/applications/solvers/dfLowMachFoam/pEqn.H b/applications/solvers/dfLowMachFoam/pEqn.H
@@ -53,12 +53,12 @@ const volScalarField psip0(psi*p);
     volScalarField rAU(1.0/UEqn.A());
     surfaceScalarField rhorAUf("rhorAUf", fvc::interpolate(rho*rAU));
     volVectorField HbyA(constrainHbyA(rAU*UEqn.H(), U, p));
-#endif
 
-if (pimple.nCorrPiso() <= 1)
-{
-    tUEqn.clear();
-}
+    if (pimple.nCorrPiso() <= 1)
+    {
+        tUEqn.clear();
+    }
+#endif
 
 surfaceScalarField phiHbyA
 (
@@ -143,7 +143,10 @@ p.relax();
 U = HbyA - rAU*fvc::grad(p);
 U.correctBoundaryConditions();
 K = 0.5*magSqr(U);
-UEqn_GPU.correctPsi(&U[0][0]);
+
+#ifdef GPUSolver_
+    UEqn_GPU.correctPsi(&U[0][0]);
+#endif
 
 if (pimple.simpleRho())
 {

diff --git a/applications/solvers/dfLowMachFoam/rhoEqn.H b/applications/solvers/dfLowMachFoam/rhoEqn.H
@@ -41,7 +41,7 @@ Description
         offset += patchSize;
     }
     rhoEqn_GPU.fvc_div(&phi[0], boundary_phi_init);
-    rhoEqn_GPU.fvm_ddt(&rho.oldTime()[0], &rho[0]);
+    rhoEqn_GPU.fvm_ddt(&rho.oldTime()[0]);
     rhoEqn_GPU.updatePsi(&rho.primitiveFieldRef()[0]);
     rho.correctBoundaryConditions();
 }

diff --git a/src_gpu/dfEEqn.H b/src_gpu/dfEEqn.H
@@ -52,11 +52,9 @@ public:
 
     void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
             const double *hDiffCorrFlux, const double *dpdt, const double *diffAlphaD,
-            const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff,
-            const double *valueInternalCoeffs, const double *valueBoundaryCoeffs,
-            const double *gradientInternalCoeffs, const double *gradientBoundaryCoeffs);
+            const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff);
 
-    void resetAb();
+    void initializeTimeStep();
 
     void fvm_ddt();
     void fvm_div();