Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 10 additions & 20 deletions applications/solvers/dfLowMachFoam/EEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@
==
fvc::div(hDiffCorrFlux)
);
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

EEqn.relax();

// EEqn.relax();
start1 = std::clock();
EEqn.solve();

end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
Expand All @@ -52,35 +55,22 @@
memcpy(boundary_hDiffCorrFlux + eeqn_offset * 3, &patchhDiffCorrFlux[0][0], 3 * patchSize*sizeof(double));
memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));

Field<scalar> valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw);
Field<scalar> valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw);
Field<scalar> gradientInternalCoeffs = he.boundaryField()[patchi].gradientInternalCoeffs();
Field<scalar> gradientBoundaryCoeffs = he.boundaryField()[patchi].gradientBoundaryCoeffs();
memcpy(eeqn_valueInternalCoeffs + eeqn_offset, &valueInternalCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_valueBoundaryCoeffs + eeqn_offset, &valueBoundaryCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_gradientInternalCoeffs + eeqn_offset, &gradientInternalCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_gradientBoundaryCoeffs + eeqn_offset, &gradientBoundaryCoeffs[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
eeqn_offset = 0;
end1 = std::clock();
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

// prepare data on GPU
start1 = std::clock();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
&dpdt[0], &diffAlphaD[0], &hDiffCorrFlux[0][0],
boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff,
eeqn_valueInternalCoeffs, eeqn_valueBoundaryCoeffs,
eeqn_gradientInternalCoeffs, eeqn_gradientBoundaryCoeffs);
boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff);
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_GPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.resetAb();
EEqn_GPU.initializeTimeStep();
EEqn_GPU.fvm_ddt();
EEqn_GPU.fvm_div();
EEqn_GPU.fvm_laplacian();
Expand All @@ -98,7 +88,7 @@
time_monitor_EEqn_mtxAssembly += double(end2 - start2) / double(CLOCKS_PER_SEC);

// check value of mtxAssembly, no time monitor
EEqn_GPU.checkValue(false);
// EEqn_GPU.checkValue(false);

start1 = std::clock();
EEqn_GPU.solve();
Expand All @@ -109,8 +99,8 @@

start1 = std::clock();
EEqn_GPU.updatePsi(&he[0]);
he.correctBoundaryConditions();
end1 = std::clock();
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
Expand Down
26 changes: 4 additions & 22 deletions applications/solvers/dfLowMachFoam/UEqn.H
Original file line number Diff line number Diff line change
@@ -1,37 +1,21 @@
// Solve the Momentum equation
#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.initializeTimeStep();
UEqn_GPU.fvm_ddt(&U.oldTime()[0][0]);
start2 = std::clock();
int offset = 0;
const tmp<volScalarField> nuEff_tmp(turbulence->nuEff());
const volScalarField& nuEff = nuEff_tmp();
forAll(U.boundaryField(), patchi)
{
const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];

const scalarField& patchP = p.boundaryField()[patchi];
const vectorField& pSf = mesh.Sf().boundaryField()[patchi];
const vectorField& patchU = U.boundaryField()[patchi];
const scalarField& patchRho = rho.boundaryField()[patchi];
const scalarField& patchNuEff = nuEff.boundaryField()[patchi];

int patchSize = pw.size();

Field<vector> ueqn_internalCoeffs_vec = patchFlux*U.boundaryField()[patchi].valueInternalCoeffs(pw);
Field<vector> ueqn_boundaryCoeffs_vec = -patchFlux*U.boundaryField()[patchi].valueBoundaryCoeffs(pw);
Field<vector> ueqn_laplac_internalCoeffs_vec = U.boundaryField()[patchi].gradientInternalCoeffs();
Field<vector> ueqn_laplac_boundaryCoeffs_vec = U.boundaryField()[patchi].gradientBoundaryCoeffs();
int patchSize = patchP.size();

// only need to construct once
memcpy(ueqn_internalCoeffs_init + 3*offset, &ueqn_internalCoeffs_vec[0][0], 3*patchSize*sizeof(double));
// need to construct every time step
memcpy(ueqn_boundaryCoeffs_init + 3*offset, &ueqn_boundaryCoeffs_vec[0][0], 3*patchSize*sizeof(double));
// laplacian internalCoeffs
memcpy(ueqn_laplac_internalCoeffs_init + 3*offset, &ueqn_laplac_internalCoeffs_vec[0][0], 3*patchSize*sizeof(double));
// laplacian boundaryCoeffs
memcpy(ueqn_laplac_boundaryCoeffs_init + 3*offset, &ueqn_laplac_boundaryCoeffs_vec[0][0], 3*patchSize*sizeof(double));
// boundary pressure
memcpy(boundary_pressure_init+offset, &patchP[0], patchSize*sizeof(double));
// boundary velocity
Expand All @@ -44,14 +28,12 @@
}
end1 = std::clock();
end2 = std::clock();
time_monitor_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
UEqn_GPU.fvm_div(&phi[0], ueqn_internalCoeffs_init, ueqn_boundaryCoeffs_init,
ueqn_laplac_internalCoeffs_init, ueqn_laplac_boundaryCoeffs_init,
boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
UEqn_GPU.fvm_div(boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
UEqn_GPU.fvc_grad(&p[0]);
UEqn_GPU.fvc_grad_vector();
UEqn_GPU.dev2T();
Expand Down
42 changes: 33 additions & 9 deletions applications/solvers/dfLowMachFoam/YEqn.H
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
start = std::clock();
cudaSetDevice(0);
hDiffCorrFlux = Zero;
diffAlphaD = Zero;
sumYDiffError = Zero;
Expand All @@ -15,12 +14,15 @@ tmp<fv::convectionScheme<scalar>> mvConvection
)
);


start1 = std::clock();
forAll(Y, i)
{
sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]);
}
const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
start1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

#ifdef GPUSolver_
start1 = std::clock();
Expand All @@ -32,13 +34,14 @@ const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
#endif

#ifdef GPUSolver_
std::vector<double*> Y_new(Y.size()), Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
start1 = std::clock();
start2 = std::clock();
std::vector<double*> Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
std::vector<const double*> rhoD_GPU(Y.size());
for (size_t i = 0; i < Y.size(); ++i)
{
volScalarField& Yi = Y[i];
const volScalarField& rhoDi = chemistry->rhoD(i);
Y_new[i] = &Yi[0];
Y_old[i] = &Yi.oldTime()[0];
rhoD_GPU[i] = &chemistry->rhoD(i)[0];
cudaMallocHost(&boundary_Y_init[i], num_boundary_faces*sizeof(double));
Expand All @@ -64,15 +67,25 @@ const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
int patchSize = patchMut_sct.size();
boundary_mutsct.insert(boundary_mutsct.end(), &patchMut_sct[0], &patchMut_sct[0] + patchSize);
}
end2 = std::clock();
time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end2 - start2) / double(CLOCKS_PER_SEC);

YEqn_GPU.upwindWeight();
YEqn_GPU.correctVelocity(Y_new, boundary_Y_init, rhoD_GPU);
YEqn_GPU.fvm_ddt(Y_old);
YEqn_GPU.correctVelocity(Y_old, boundary_Y_init, rhoD_GPU);
YEqn_GPU.fvm_ddt();
YEqn_GPU.fvm_div_phi();
YEqn_GPU.fvm_div_phiUc();
YEqn_GPU.fvm_laplacian(&mut_sct[0], boundary_mutsct.data(), boundary_rhoD_init);

end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
YEqn_GPU.solve();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

//MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
Expand Down Expand Up @@ -108,9 +121,15 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
if (i != inertIndex)
{
#ifdef GPUSolver_
start1 = std::clock();
YEqn_GPU.updatePsi(&Yi[0], speciesIndex);
Yi.correctBoundaryConditions();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
#else
start1 = std::clock();
tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
fvScalarMatrix YiEqn
(
Expand All @@ -124,10 +143,16 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
: (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
)
);
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// YiEqn.relax();

YiEqn.relax();

start1 = std::clock();
YiEqn.solve("Yi");
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

Yi.max(0.0);
Expand All @@ -141,5 +166,4 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);

end = std::clock();
time_monitor_Y += double(end - start) / double(CLOCKS_PER_SEC);
cudaSetDevice(0);
}
16 changes: 14 additions & 2 deletions applications/solvers/dfLowMachFoam/dfLowMachFoam.C
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,18 @@ int main(int argc, char *argv[])
double time_monitor_UEqn_mtxAssembly=0;
double time_monitor_UEqn_Solve=0;
double time_monitor_UEqn_sum=0;
double time_monitor_UEqn_CPU=0;
double time_monitor_EEqn=0;
double time_monitor_EEqn_mtxAssembly=0;
double time_monitor_EEqn_mtxAssembly_CPU_Prepare;
double time_monitor_EEqn_mtxAssembly_GPU_Prepare;
double time_monitor_EEqn_mtxAssembly_GPU_Run;
double time_monitor_EEqn_Solve=0;
double time_monitor_EEqn_sum=0;
double time_monitor_YEqn=0;
double time_monitor_YEqn_mtxAssembly=0;
double time_monitor_YEqn_mtxAssembly_CPU_Prepare=0;
double time_monitor_YEqn_Solve=0;
double time_monitor_chem=0;
double time_monitor_Y=0;
double time_monitor_E=0;
Expand Down Expand Up @@ -213,7 +218,7 @@ int main(int argc, char *argv[])
{
if (pimple.consistent())
{
#include "pcEqn.H"
// #include "pcEqn.H"
}
else
{
Expand Down Expand Up @@ -255,9 +260,12 @@ int main(int argc, char *argv[])
Info<< "EEqn assamble(GPU prepare) = " << time_monitor_EEqn_mtxAssembly_GPU_Prepare << " s" << endl;
Info<< "EEqn assamble(GPU run) = " << time_monitor_EEqn_mtxAssembly_GPU_Run << " s" << endl;
Info<< "EEqn Time solve = " << time_monitor_EEqn_Solve << " s" << endl;
Info<< "YEqn Time = " << time_monitor_YEqn << " s" << endl;
Info<< "YEqn Time assamble Mtx = " << time_monitor_YEqn_mtxAssembly << " s" << endl;
Info<< "YEqn assamble(CPU prepare) = " << time_monitor_YEqn_mtxAssembly_CPU_Prepare << " s" << endl;
Info<< "YEqn Time solve = " << time_monitor_YEqn_Solve << " s" << endl;
Info<< "sum Time = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
Info<< "CPU Time (get turb souce) = " << time_monitor_CPU << " s" << endl;
Info<< "UEqn time in EEqn = " << time_monitor_UinE << " s" << endl;
Info<< "============================================"<<nl<< endl;

Info<< "ExecutionTime = " << runTime.elapsedCpuTime() << " s"
Expand All @@ -279,6 +287,10 @@ int main(int argc, char *argv[])
time_monitor_corrDiff = 0;
time_monitor_CPU = 0;
time_monitor_UinE = 0;
time_monitor_YEqn=0;
time_monitor_YEqn_mtxAssembly=0;
time_monitor_YEqn_mtxAssembly_CPU_Prepare=0;
time_monitor_YEqn_Solve=0;

#ifdef USE_PYTORCH
if (log_ && torch_)
Expand Down
15 changes: 9 additions & 6 deletions applications/solvers/dfLowMachFoam/pEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ const volScalarField psip0(psi*p);
volScalarField rAU(1.0/UEqn.A());
surfaceScalarField rhorAUf("rhorAUf", fvc::interpolate(rho*rAU));
volVectorField HbyA(constrainHbyA(rAU*UEqn.H(), U, p));
#endif

if (pimple.nCorrPiso() <= 1)
{
tUEqn.clear();
}
if (pimple.nCorrPiso() <= 1)
{
tUEqn.clear();
}
#endif

surfaceScalarField phiHbyA
(
Expand Down Expand Up @@ -143,7 +143,10 @@ p.relax();
U = HbyA - rAU*fvc::grad(p);
U.correctBoundaryConditions();
K = 0.5*magSqr(U);
UEqn_GPU.correctPsi(&U[0][0]);

#ifdef GPUSolver_
UEqn_GPU.correctPsi(&U[0][0]);
#endif

if (pimple.simpleRho())
{
Expand Down
2 changes: 1 addition & 1 deletion applications/solvers/dfLowMachFoam/rhoEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Description
offset += patchSize;
}
rhoEqn_GPU.fvc_div(&phi[0], boundary_phi_init);
rhoEqn_GPU.fvm_ddt(&rho.oldTime()[0], &rho[0]);
rhoEqn_GPU.fvm_ddt(&rho.oldTime()[0]);
rhoEqn_GPU.updatePsi(&rho.primitiveFieldRef()[0]);
rho.correctBoundaryConditions();
}
Expand Down
6 changes: 2 additions & 4 deletions src_gpu/dfEEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,9 @@ public:

void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
const double *hDiffCorrFlux, const double *dpdt, const double *diffAlphaD,
const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff,
const double *valueInternalCoeffs, const double *valueBoundaryCoeffs,
const double *gradientInternalCoeffs, const double *gradientBoundaryCoeffs);
const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff);

void resetAb();
void initializeTimeStep();

void fvm_ddt();
void fvm_div();
Expand Down
Loading