Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 30 additions & 29 deletions applications/solvers/dfLowMachFoam/EEqn.H
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
{
volScalarField& he = thermo.he();

#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef CPUSolver_
start1 = std::clock();
fvScalarMatrix EEqn
Expand All @@ -32,7 +22,16 @@
EEqn.solve();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
Expand All @@ -41,33 +40,39 @@
start2 = std::clock();
const tmp<volScalarField> alphaEff_tmp(turbulence->alphaEff());
const volScalarField& alphaEff = alphaEff_tmp();
end2 = std::clock();
int eeqn_offset = 0;
int patchNum = 0;
forAll(he.boundaryField(), patchi)
{
const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
patchNum++;
const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
int patchSize = pw.size();

const scalarField& patchK = K.boundaryField()[patchi];
const vectorField& patchhDiffCorrFlux = hDiffCorrFlux.boundaryField()[patchi];
const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi];
memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
memcpy(boundary_hDiffCorrFlux + eeqn_offset * 3, &patchhDiffCorrFlux[0][0], 3 * patchSize*sizeof(double));
memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
fprintf(stderr, "time_monitor_EEqn_mtxAssembly_CPU_prepare: %lf, build alphaEff time: %lf, patchNum: %d\n",
time_monitor_EEqn_mtxAssembly_CPU_prepare,
double(end2 - start2) / double(CLOCKS_PER_SEC), patchNum);

// prepare data on GPU
start1 = std::clock();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
&dpdt[0], &diffAlphaD[0], &hDiffCorrFlux[0][0],
boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff);
if (doSync) EEqn_GPU.sync();
&dpdt[0], boundary_K, boundary_alphaEff);
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_GPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.initializeTimeStep();
Expand All @@ -78,30 +83,26 @@
EEqn_GPU.fvc_div_phi_scalar();
EEqn_GPU.fvc_div_vector();
EEqn_GPU.add_to_source();
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_GPU_Run += double(end1 - start1) / double(CLOCKS_PER_SEC);

EEqn_GPU.sync();
end2 = std::clock();
time_monitor_EEqn += double(end2 - start2) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end2 - start2) / double(CLOCKS_PER_SEC);
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

// check value of mtxAssembly, no time monitor
// EEqn_GPU.checkValue(false);

start1 = std::clock();
EEqn_GPU.solve();
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.updatePsi(&he[0]);
he.correctBoundaryConditions();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
}
14 changes: 7 additions & 7 deletions applications/solvers/dfLowMachFoam/UEqn.H
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// Solve the Momentum equation
#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.initializeTimeStep();
UEqn_GPU.fvm_ddt(&U.oldTime()[0][0]);
start2 = std::clock();
int offset = 0;
const tmp<volScalarField> nuEff_tmp(turbulence->nuEff());
const volScalarField& nuEff = nuEff_tmp();
Expand All @@ -27,21 +24,24 @@
offset += patchSize;
}
end1 = std::clock();
end2 = std::clock();
time_monitor_UEqn_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
UEqn_GPU.initializeTimeStep();
UEqn_GPU.fvm_ddt(&U.oldTime()[0][0]);
UEqn_GPU.fvm_div(boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
UEqn_GPU.fvc_grad(&p[0]);
UEqn_GPU.fvc_grad_vector();
UEqn_GPU.dev2T();
UEqn_GPU.fvc_div_tensor(&nuEff[0]);
UEqn_GPU.fvm_laplacian();
UEqn_GPU.sync();
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

// start2 = std::clock();
// fvVectorMatrix turb_source
Expand Down Expand Up @@ -88,15 +88,15 @@
}
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

// start1 = std::clock();
// // // std::thread t(&dfMatrix::solve, &UEqn_GPU);
// UEqn_GPU.solve();
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

// start1 = std::clock();
// // // t.join();
Expand Down
132 changes: 69 additions & 63 deletions applications/solvers/dfLowMachFoam/YEqn.H
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
start = std::clock();
hDiffCorrFlux = Zero;
diffAlphaD = Zero;
sumYDiffError = Zero;
Expand All @@ -14,6 +13,7 @@ tmp<fv::convectionScheme<scalar>> mvConvection
)
);

#ifdef CPUSolver_
start1 = std::clock();
forAll(Y, i)
{
Expand All @@ -22,78 +22,86 @@ forAll(Y, i)
const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
start1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
start1 = std::clock();
// // std::thread t(&dfMatrix::solve, &UEqn_GPU);
UEqn_GPU.solve();
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

#ifdef GPUSolver_
start1 = std::clock();
start2 = std::clock();
std::vector<double*> Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
std::vector<const double*> rhoD_GPU(Y.size());
std::vector<double*> Y_old(Y.size()), boundary_Y(Y.size()), boundary_hai(Y.size()), boundary_rhoD(Y.size());
std::vector<const double*> hai(Y.size()), rhoD(Y.size());
for (size_t i = 0; i < Y.size(); ++i)
{
volScalarField& Yi = Y[i];
const volScalarField& rhoDi = chemistry->rhoD(i);
Y_old[i] = &Yi.oldTime()[0];
rhoD_GPU[i] = &chemistry->rhoD(i)[0];
cudaMallocHost(&boundary_Y_init[i], num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_rhoD_init[i], num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double));
const volScalarField& haii = chemistry->hai(i);
const volScalarField& rhoDi = chemistry->rhoD(i);
hai[i] = &haii[0];
rhoD[i] = &rhoDi[0];
cudaMallocHost(&boundary_hai[i], num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_rhoD[i], num_boundary_faces*sizeof(double));
int offset = 0;
forAll(Yi.boundaryField(), patchi)
{
const scalarField& patchYi = Yi.boundaryField()[patchi];
const scalarField& patchHaii = haii.boundaryField()[patchi];
const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi];
int patchSize = patchYi.size();

memcpy(boundary_Y_init[i]+offset, &patchYi[0], patchSize*sizeof(double));
memcpy(boundary_rhoD_init[i]+offset, &patchRhoDi[0], patchSize*sizeof(double));
memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double));
memcpy(boundary_hai[i] + offset, &patchHaii[0], patchSize*sizeof(double));
memcpy(boundary_rhoD[i] + offset, &patchRhoDi[0], patchSize*sizeof(double));
offset += patchSize;
}
}

volScalarField mut_sct = turbulence->mut().ref()/Sct;
std::vector<double> boundary_mutsct;
double *boundary_mutsct = nullptr;
cudaMallocHost(&boundary_mutsct, num_boundary_faces*sizeof(double));
int offset = 0;
forAll(p.boundaryField(), patchi)
{
const scalarField& patchMut_sct = mut_sct.boundaryField()[patchi];
int patchSize = patchMut_sct.size();
boundary_mutsct.insert(boundary_mutsct.end(), &patchMut_sct[0], &patchMut_sct[0] + patchSize);
memcpy(boundary_mutsct + offset, &patchMut_sct[0], patchSize*sizeof(double));
offset += patchSize;
}
end2 = std::clock();
time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end2 - start2) / double(CLOCKS_PER_SEC);
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
fprintf(stderr, "time_monitor_YEqn_mtxAssembly_CPU_prepare: %lf\n", time_monitor_YEqn_mtxAssembly_CPU_prepare);

start1 = std::clock();
YEqn_GPU.initializeTimeStep();
YEqn_GPU.upwindWeight();
YEqn_GPU.correctVelocity(Y_old, boundary_Y_init, rhoD_GPU);
YEqn_GPU.fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(Y_old, boundary_Y,
hai, boundary_hai, rhoD, boundary_rhoD, &mut_sct[0], boundary_mutsct, &thermo.alpha()[0]);
YEqn_GPU.fvm_ddt();
YEqn_GPU.fvm_div_phi();
YEqn_GPU.fvm_div_phiUc();
YEqn_GPU.fvm_laplacian(&mut_sct[0], boundary_mutsct.data(), boundary_rhoD_init);

YEqn_GPU.sync();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
YEqn_GPU.solve();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

//MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
label flag_mpi_init;
MPI_Initialized(&flag_mpi_init);
if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
end = std::clock();
time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);

{
if (!splitting)
Expand All @@ -108,52 +116,50 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
time_monitor_chem += processingTime.count();
}

#ifdef GPUSolver_
start1 = std::clock();
forAll(Y, i)
{
volScalarField& Yi = Y[i];
YEqn_GPU.updatePsi(&Yi[0], i);
Yi.correctBoundaryConditions();
}
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
#else
start2 = std::clock();
volScalarField Yt(0.0*Y[0]);

start = std::clock();
int speciesIndex = 0;
forAll(Y, i)
{
volScalarField& Yi = Y[i];
hDiffCorrFlux += chemistry->hai(i)*(chemistry->rhoD(i)*fvc::grad(Yi) - Yi*sumYDiffError);
diffAlphaD += fvc::laplacian(thermo.alpha()*chemistry->hai(i), Yi);

if (i != inertIndex)
{
#ifdef GPUSolver_
start1 = std::clock();
YEqn_GPU.updatePsi(&Yi[0], speciesIndex);
Yi.correctBoundaryConditions();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
#else
start1 = std::clock();
tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
fvScalarMatrix YiEqn
start1 = std::clock();
tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
fvScalarMatrix YiEqn
(
fvm::ddt(rho, Yi)
+ mvConvection->fvmDiv(phi, Yi)
+ mvConvection->fvmDiv(phiUc, Yi)
==
(
splitting
? fvm::laplacian(DEff(), Yi)
: (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
)
fvm::ddt(rho, Yi)
+ mvConvection->fvmDiv(phi, Yi)
+ mvConvection->fvmDiv(phiUc, Yi)
==
(
splitting
? fvm::laplacian(DEff(), Yi)
: (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
)
);
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// YiEqn.relax();
end1 = std::clock();
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// YiEqn.relax();

start1 = std::clock();
YiEqn.solve("Yi");
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
start1 = std::clock();
YiEqn.solve("Yi");
end1 = std::clock();
time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

Yi.max(0.0);
Yt += Yi;
Expand All @@ -163,7 +169,7 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);

Y[inertIndex] = scalar(1) - Yt;
Y[inertIndex].max(0.0);

end = std::clock();
time_monitor_Y += double(end - start) / double(CLOCKS_PER_SEC);
end2 = std::clock();
time_monitor_YEqn += double(end2 - start2) / double(CLOCKS_PER_SEC);
#endif
}
Loading