Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 1 addition & 106 deletions applications/solvers/dfLowMachFoam/EEqn.H
Original file line number Diff line number Diff line change
@@ -1,110 +1,6 @@
{
volScalarField& he = thermo.he();
#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
UEqn_GPU.correctBoundaryConditions();
U.correctBoundaryConditions();
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);

// prepare data on CPU
start1 = std::clock();
start2 = std::clock();
// const tmp<volScalarField> alphaEff_tmp(thermo.alpha());
// const volScalarField& alphaEff = alphaEff_tmp();
double *alphaEff = nullptr; // tmp
end2 = std::clock();
int eeqn_offset = 0;
int patchNum = 0;

forAll(he.boundaryField(), patchi)
{
patchNum++;
const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
int patchSize = pw.size();

// construct gradient manually
const fvPatchScalarField& hew = he.boundaryField()[patchi];
const basicThermo& bThermo = basicThermo::lookupThermo(hew);
const scalarField& ppw = bThermo.p().boundaryField()[patchi];
fvPatchScalarField& Tw =
const_cast<fvPatchScalarField&>(bThermo.T().boundaryField()[patchi]);
scalarField& Tw_v = Tw;

Tw.evaluate();
const scalarField& patchDeltaCoeff = mesh.boundary()[patchi].deltaCoeffs();
const scalarField heInternal = bThermo.he(ppw, Tw, patchi)();
const scalarField heBoundary = bThermo.he(ppw, Tw, mesh.boundary()[patchi].faceCells())();
const scalarField patchGradMau = patchDeltaCoeff * (heInternal - heBoundary);

const scalarField& patchK = K.boundaryField()[patchi];
// const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi]; // not H2Dcopy when use UnityLewis
// const scalarField& patchGrad = he.boundaryField()[patchi].gradientBoundaryCoeffs(); // gradient_

// const DimensionedField<scalar, volMesh>& patchHa_ = he.boundaryField()[patchi];
// const gradientEnergyFvPatchScalarField patchHa(mesh.boundary()[patchi], patchHa_);
// const scalarField& patchGrad = patchHa.gradient(); // gradient_
memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
// memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
memcpy(boundary_gradient + eeqn_offset, &patchGradMau[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
fprintf(stderr, "time_monitor_EEqn_mtxAssembly_CPU_prepare: %lf, build alphaEff time: %lf, patchNum: %d\n",
time_monitor_EEqn_mtxAssembly_CPU_prepare,
double(end2 - start2) / double(CLOCKS_PER_SEC), patchNum);

// prepare data on GPU
start1 = std::clock();
he.oldTime();
K.oldTime();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], alphaEff,
&dpdt[0], boundary_K, boundary_alphaEff, boundary_gradient);
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.initializeTimeStep();
EEqn_GPU.fvm_ddt();
EEqn_GPU.fvm_div();
EEqn_GPU.fvm_laplacian();
EEqn_GPU.fvc_ddt();
EEqn_GPU.fvc_div_phi_scalar();
EEqn_GPU.fvc_div_vector();
EEqn_GPU.add_to_source();
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

// check value of mtxAssembly, no time monitor
// EEqn_GPU.checkValue(true);

start1 = std::clock();
EEqn_GPU.solve();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.updatePsi(&he[0]);
he.correctBoundaryConditions();
he.write();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
#else

start1 = std::clock();
fvScalarMatrix EEqn
(
Expand Down Expand Up @@ -137,5 +33,4 @@
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
}
106 changes: 106 additions & 0 deletions applications/solvers/dfLowMachFoam/EEqn_GPU.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
{
volScalarField& he = thermo.he();
start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
UEqn_GPU.correctBoundaryConditions();
U.correctBoundaryConditions();
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);

// prepare data on CPU
start1 = std::clock();
start2 = std::clock();
// const tmp<volScalarField> alphaEff_tmp(thermo.alpha());
// const volScalarField& alphaEff = alphaEff_tmp();
double *alphaEff = nullptr; // tmp
end2 = std::clock();
int eeqn_offset = 0;
int patchNum = 0;

forAll(he.boundaryField(), patchi)
{
patchNum++;
const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
int patchSize = pw.size();

// construct gradient manually
const fvPatchScalarField& hew = he.boundaryField()[patchi];
const basicThermo& bThermo = basicThermo::lookupThermo(hew);
const scalarField& ppw = bThermo.p().boundaryField()[patchi];
fvPatchScalarField& Tw =
const_cast<fvPatchScalarField&>(bThermo.T().boundaryField()[patchi]);
scalarField& Tw_v = Tw;

Tw.evaluate();
const scalarField& patchDeltaCoeff = mesh.boundary()[patchi].deltaCoeffs();
const scalarField heInternal = bThermo.he(ppw, Tw, patchi)();
const scalarField heBoundary = bThermo.he(ppw, Tw, mesh.boundary()[patchi].faceCells())();
const scalarField patchGradMau = patchDeltaCoeff * (heInternal - heBoundary);

const scalarField& patchK = K.boundaryField()[patchi];
// const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi]; // not H2Dcopy when use UnityLewis
// const scalarField& patchGrad = he.boundaryField()[patchi].gradientBoundaryCoeffs(); // gradient_

// const DimensionedField<scalar, volMesh>& patchHa_ = he.boundaryField()[patchi];
// const gradientEnergyFvPatchScalarField patchHa(mesh.boundary()[patchi], patchHa_);
// const scalarField& patchGrad = patchHa.gradient(); // gradient_
memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
// memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
memcpy(boundary_gradient + eeqn_offset, &patchGradMau[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);
fprintf(stderr, "time_monitor_EEqn_mtxAssembly_CPU_prepare: %lf, build alphaEff time: %lf, patchNum: %d\n",
time_monitor_EEqn_mtxAssembly_CPU_prepare,
double(end2 - start2) / double(CLOCKS_PER_SEC), patchNum);

// prepare data on GPU
start1 = std::clock();
he.oldTime();
K.oldTime();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], alphaEff,
&dpdt[0], boundary_K, boundary_alphaEff, boundary_gradient);
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.initializeTimeStep();
EEqn_GPU.fvm_ddt();
EEqn_GPU.fvm_div();
EEqn_GPU.fvm_laplacian();
EEqn_GPU.fvc_ddt();
EEqn_GPU.fvc_div_phi_scalar();
EEqn_GPU.fvc_div_vector();
EEqn_GPU.add_to_source();
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

// check value of mtxAssembly, no time monitor
// EEqn_GPU.checkValue(true);

start1 = std::clock();
EEqn_GPU.solve();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.updatePsi(&he[0]);
he.correctBoundaryConditions();
he.write();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
}
154 changes: 23 additions & 131 deletions applications/solvers/dfLowMachFoam/UEqn.H
Original file line number Diff line number Diff line change
@@ -1,132 +1,24 @@
// Solve the Momentum equation
#ifdef GPUSolver_
start1 = std::clock();
int offset = 0;
const tmp<volScalarField> nuEff_tmp(turbulence->nuEff());
const volScalarField& nuEff = nuEff_tmp();
forAll(U.boundaryField(), patchi)
{
const scalarField& patchP = p.boundaryField()[patchi];
const vectorField& patchU = U.boundaryField()[patchi];
const scalarField& patchRho = rho.boundaryField()[patchi];
const scalarField& patchNuEff = nuEff.boundaryField()[patchi];

int patchSize = patchP.size();

// boundary pressure
memcpy(boundary_pressure_init+offset, &patchP[0], patchSize*sizeof(double));
// boundary velocity
memcpy(boundary_velocity_init+3*offset, &patchU[0][0], 3*patchSize*sizeof(double));
// boundary nuEff
memcpy(boundary_nuEff_init+offset, &patchNuEff[0], patchSize*sizeof(double));
// boundary rho
memcpy(boundary_rho_init+offset, &patchRho[0], patchSize*sizeof(double));
offset += patchSize;
}
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly_CPU_prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
UEqn_GPU.initializeTimeStep();
U.oldTime();
UEqn_GPU.fvm_ddt(&U.oldTime()[0][0]);
UEqn_GPU.fvm_div(boundary_pressure_init, boundary_velocity_init, boundary_nuEff_init, boundary_rho_init);
UEqn_GPU.fvc_grad(&p[0]);
UEqn_GPU.fvc_grad_vector();
UEqn_GPU.dev2T();
UEqn_GPU.fvc_div_tensor(&nuEff[0]);
UEqn_GPU.fvm_laplacian();
UEqn_GPU.sync();
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC);

// start2 = std::clock();
// fvVectorMatrix turb_source
// (
// turbulence->divDevRhoReff(U)
// );
// end2 = std::clock();
// time_monitor_CPU += double(end2 - start2) / double(CLOCKS_PER_SEC);

// UEqn_GPU.add_fvMatrix(&turb_source.lower()[0], &turb_source.diag()[0], &turb_source.upper()[0], &turb_source.source()[0][0]);
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

// check value
// U.oldTime();
// tmp<fvVectorMatrix> tUEqn
// (
// fvm::ddt(rho, U)
// +
// fvm::div(phi, U)
// +
// turbulence->divDevRhoReff(U)
// == -fvc::grad(p)
// );
// fvVectorMatrix& UEqn = tUEqn.ref();
// printf("b_cpu = %e\n", UEqn.source()[1][1]);
// forAll(U.boundaryField(), patchi){
// labelUList sub_boundary = mesh.boundary()[patchi].faceCells();
// forAll(sub_boundary, i){
// if (sub_boundary[i] == 1){
// printf("b_cpu_bou = %e\n", UEqn.boundaryCoeffs()[patchi][i][1]);
// printf("patchi = %d, i = %d\n", patchi, i);
// }
// }
// }
// if (pimple.momentumPredictor())
// {
// solve(UEqn);
// Info << "U_CPU\n" << U << endl;
// K = 0.5*magSqr(U);
// }
// UEqn_GPU.checkValue(true);
#else
start1 = std::clock();
tmp<fvVectorMatrix> tUEqn
(
fvm::ddt(rho, U) + fvm::div(phi, U)
+ turbulence->divDevRhoReff(U)
== -fvc::grad(p)
);
fvVectorMatrix& UEqn = tUEqn.ref();

end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

UEqn.relax();
start1 = std::clock();
if (pimple.momentumPredictor())
{
solve(UEqn);

K = 0.5*magSqr(U);
}
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

// start1 = std::clock();
// // // std::thread t(&dfMatrix::solve, &UEqn_GPU);
// UEqn_GPU.solve();
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

// start1 = std::clock();
// // // t.join();
// // UEqn_GPU.updatePsi(&U[0][0]);
// K = 0.5*magSqr(U);
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
// // Info << "U_amgx = " << U << endl;
start1 = std::clock();
tmp<fvVectorMatrix> tUEqn
(
fvm::ddt(rho, U) + fvm::div(phi, U)
+ turbulence->divDevRhoReff(U)
);
fvVectorMatrix& UEqn = tUEqn.ref();

end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);

UEqn.relax();
start1 = std::clock();
if (pimple.momentumPredictor())
{
solve(UEqn == -fvc::grad(p));

K = 0.5*magSqr(U);
}
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

Loading