deepmodeling · maorz1998 · May 4, 2023 · May 4, 2023
diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H
@@ -1,5 +1,14 @@
 {
     volScalarField& he = thermo.he();
+    // start1 = std::clock();
+    // t.join();
+    // UEqn_GPU.updatePsi(&U[0][0]);
+    // K = 0.5*magSqr(U);
+    // end1 = std::clock();
+    // time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    // time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    // time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    // time_monitor_UinE += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
     fvScalarMatrix EEqn
     (

diff --git a/applications/solvers/dfLowMachFoam/Make/options b/applications/solvers/dfLowMachFoam/Make/options
@@ -25,9 +25,9 @@ EXE_INC = -std=c++14 \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include,) \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include/torch/csrc/api/include,) \
     $(PYTHON_INC_DIR) \
-    -I/home/runze/deepflame-dev/src_gpu \
+    -I$(DF_ROOT)/src_gpu \
     -I/usr/local/cuda-11.6/include \
-    -I/home/runze/AmgX/AMGX/include
+    -I$(AMGX_DIR)/include
 
 EXE_LIBS = \
     -lcompressibleTransportModels \
@@ -49,7 +49,7 @@ EXE_LIBS = \
     $(if $(LIBTORCH_ROOT),$(DF_SRC)/dfChemistryModel/DNNInferencer/build/libDNNInferencer.so,) \
     $(if $(PYTHON_LIB_DIR),-L$(PYTHON_LIB_DIR),) \
     $(if $(PYTHON_LIB_DIR),-lpython3.8,) \
-    /home/runze/deepflame-dev/src_gpu/build/libdfMatrix.so \
+    $(DF_ROOT)/src_gpu/build/libdfMatrix.so \
     /usr/local/cuda-11.6/lib64/libcudart.so \
-    /home/runze/AmgX/AMGX/build/libamgxsh.so
+    $(AMGX_DIR)/build/libamgxsh.so
 
diff --git a/applications/solvers/dfLowMachFoam/UEqn.H b/applications/solvers/dfLowMachFoam/UEqn.H
@@ -55,13 +55,13 @@ forAll(U.boundaryField(), patchi)
     Field<vector> ueqn_boundaryCoeffs_vec = -patchFlux*U.boundaryField()[patchi].valueBoundaryCoeffs(pw); 
 
     // only need to construct once
-    std::copy(&ueqn_internalCoeffs_vec[0][0], &ueqn_internalCoeffs_vec[0][0]+3*patchSize, ueqn_internalCoeffs_init.begin() + 3*offset);
+    std::copy(&ueqn_internalCoeffs_vec[0][0], &ueqn_internalCoeffs_vec[0][0]+3*patchSize, ueqn_internalCoeffs_init + 3*offset);
 
     // need to construct every time step
-    std::copy(&ueqn_boundaryCoeffs_vec[0][0], &ueqn_boundaryCoeffs_vec[0][0]+3*patchSize, ueqn_boundaryCoeffs_init.begin() + 3*offset);
+    std::copy(&ueqn_boundaryCoeffs_vec[0][0], &ueqn_boundaryCoeffs_vec[0][0]+3*patchSize, ueqn_boundaryCoeffs_init + 3*offset);
 
     // boundary pressure
-    std::copy(&patchP[0], &patchP[0]+patchSize, boundary_pressure_init.begin()+offset);
+    std::copy(&patchP[0], &patchP[0]+patchSize, boundary_pressure_init+offset);
 
     offset += patchSize;
 }
@@ -87,12 +87,14 @@ time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
 // UEqn_GPU.checkValue(false);
 
 start1 = std::clock();
+// std::thread t(&dfMatrix::solve, &UEqn_GPU);
 UEqn_GPU.solve();
 end1 = std::clock();
 time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
 time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
 start1 = std::clock();
+// t.join();
 UEqn_GPU.updatePsi(&U[0][0]);
 K = 0.5*magSqr(U);
 end1 = std::clock();

diff --git a/applications/solvers/dfLowMachFoam/YEqn.H b/applications/solvers/dfLowMachFoam/YEqn.H
@@ -1,5 +1,5 @@
 start = std::clock();
-
+cudaSetDevice(0);
 hDiffCorrFlux = Zero;
 diffAlphaD = Zero;
 sumYDiffError = Zero;
@@ -14,6 +14,11 @@ tmp<fv::convectionScheme<scalar>> mvConvection
         mesh.divScheme("div(phi,Yi_h)")
     )
 );
+// start1 = std::clock();
+// std::thread t(&dfMatrix::solve, &UEqn_GPU);
+// end1 = std::clock();
+// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+// time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
 forAll(Y, i)
 {
@@ -80,4 +85,5 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
 
     end = std::clock();
     time_monitor_Y += double(end - start) / double(CLOCKS_PER_SEC);
+    cudaSetDevice(0);
 }
diff --git a/applications/solvers/dfLowMachFoam/createdfSolver.H b/applications/solvers/dfLowMachFoam/createdfSolver.H
@@ -20,9 +20,13 @@ forAll(mesh.boundary(), patchi)
 }
 int num_boundary_cells;
 
+string settingPath;
+settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UEqnSettingPath", string(""));
+
 dfMatrix UEqn_GPU(num_surfaces, num_cells, num_boundary_faces, num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0], 
-&mesh.Sf()[0][0], boundary_face_vector_init, boundaryCellIndex, "dDDI", "/home/runze/deepflame-dev/examples/dfLowMachFoam/threeD_reactingTGV/H2/cvodeIntegrator/system/amgxUOptions");
+&mesh.Sf()[0][0], boundary_face_vector_init, boundaryCellIndex, "dDDI", settingPath);
 
-std::vector<double> ueqn_internalCoeffs_init(3*num_boundary_faces), ueqn_boundaryCoeffs_init(3*num_boundary_faces);
-std::vector<double> boundary_pressure_init(num_boundary_faces);
-// std::vector<double> boundary_face_vector_init(3*num_boundary_faces);
+double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init;
+cudaMallocHost(&ueqn_internalCoeffs_init, 3*num_boundary_faces*sizeof(double));
+cudaMallocHost(&ueqn_boundaryCoeffs_init, 3*num_boundary_faces*sizeof(double));
+cudaMallocHost(&boundary_pressure_init, num_boundary_faces*sizeof(double));
diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
@@ -60,6 +60,8 @@ Description
 #include "CombustionModel.H"
 
 #include "dfMatrix.H"
+#include <cuda_runtime.h>
+#include <thread>
 
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
@@ -70,6 +72,11 @@ int main(int argc, char *argv[])
 #endif
     #include "postProcess.H"
 
+    unsigned int flags = 0;
+    checkCudaErrors(cudaGetDeviceFlags(&flags));
+    flags |= cudaDeviceScheduleYield;
+    checkCudaErrors(cudaSetDeviceFlags(flags));
+
     // #include "setRootCaseLists.H"
     #include "listOptions.H"
     #include "setRootCase2.H"
@@ -93,6 +100,7 @@ int main(int argc, char *argv[])
     double time_monitor_corrThermo=0;
     double time_monitor_corrDiff=0;
     double time_monitor_CPU=0;
+    double time_monitor_UinE=0;
     label timeIndex = 0;
     clock_t start, end, start1, end1, start2, end2;
 
@@ -227,6 +235,7 @@ int main(int argc, char *argv[])
         // Info<< "UEqn sum Time - overhead   = " << time_monitor_UEqn_sum - time_UEqn_initial << " s" << endl;
         Info<< "sum Time                   = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
         Info<< "CPU Time (get turb souce)  = " << time_monitor_CPU << " s" << endl;
+        Info<< "UEqn time in EEqn          = " << time_monitor_UinE << " s" << endl;
         Info<< "============================================"<<nl<< endl;
 
         Info<< "ExecutionTime = " << runTime.elapsedCpuTime() << " s"
@@ -241,6 +250,7 @@ int main(int argc, char *argv[])
         time_monitor_corrThermo = 0;
         time_monitor_corrDiff = 0;
         time_monitor_CPU = 0;
+        time_monitor_UinE = 0;
 
 #ifdef USE_PYTORCH
         if (log_ && torch_)

diff --git a/bashrc.in b/bashrc.in
@@ -14,6 +14,6 @@ export DF_LIBBIN=pwd/lib
 export PATH=$DF_APPBIN:$PATH
 export LD_LIBRARY_PATH=$DF_LIBBIN:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=/home/runze/AmgX/AMGX/build:$LD_LIBRARY_PATH
-export AMGX_DIR=/home/runze/AmgX/AMGX
+export LD_LIBRARY_PATH=path-to-amgx/build:$LD_LIBRARY_PATH
+export AMGX_DIR=path-to-amgx