Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 93 additions & 12 deletions applications/solvers/dfLowMachFoam/EEqn.H
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
{
volScalarField& he = thermo.he();
#ifdef GPUSolver_
// start1 = std::clock();
// t.join();
// UEqn_GPU.updatePsi(&U[0][0]);
// K = 0.5*magSqr(U);
// end1 = std::clock();
// time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
// time_monitor_UinE += double(end1 - start1) / double(CLOCKS_PER_SEC);

#ifdef GPUSolver_
start1 = std::clock();
// // t.join();
UEqn_GPU.updatePsi(&U[0][0]);
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
#endif

#ifdef CPUSolver_
start1 = std::clock();
fvScalarMatrix EEqn
(
fvm::ddt(rho, he) + fvm::div(phi, he)
Expand All @@ -33,4 +27,91 @@
EEqn.relax();

EEqn.solve();

end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
// prepare data on CPU
start1 = std::clock();
start2 = std::clock();
const tmp<volScalarField> alphaEff_tmp(turbulence->alphaEff());
const volScalarField& alphaEff = alphaEff_tmp();
int eeqn_offset = 0;
forAll(he.boundaryField(), patchi)
{
const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi];
const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi];
int patchSize = pw.size();

const scalarField& patchK = K.boundaryField()[patchi];
const vectorField& patchhDiffCorrFlux = hDiffCorrFlux.boundaryField()[patchi];
const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi];
memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
memcpy(boundary_hDiffCorrFlux + eeqn_offset * 3, &patchhDiffCorrFlux[0][0], 3 * patchSize*sizeof(double));
memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));

Field<scalar> valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw);
Field<scalar> valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw);
Field<scalar> gradientInternalCoeffs = he.boundaryField()[patchi].gradientInternalCoeffs();
Field<scalar> gradientBoundaryCoeffs = he.boundaryField()[patchi].gradientBoundaryCoeffs();
memcpy(eeqn_valueInternalCoeffs + eeqn_offset, &valueInternalCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_valueBoundaryCoeffs + eeqn_offset, &valueBoundaryCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_gradientInternalCoeffs + eeqn_offset, &gradientInternalCoeffs[0], patchSize*sizeof(double));
memcpy(eeqn_gradientBoundaryCoeffs + eeqn_offset, &gradientBoundaryCoeffs[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
eeqn_offset = 0;
end1 = std::clock();
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly_CPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

// prepare data on GPU
start1 = std::clock();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
&dpdt[0], &diffAlphaD[0], &hDiffCorrFlux[0][0],
boundary_K, boundary_hDiffCorrFlux, boundary_alphaEff,
eeqn_valueInternalCoeffs, eeqn_valueBoundaryCoeffs,
eeqn_gradientInternalCoeffs, eeqn_gradientBoundaryCoeffs);
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_GPU_Prepare += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.resetAb();
EEqn_GPU.fvm_ddt();
EEqn_GPU.fvm_div();
EEqn_GPU.fvm_laplacian();
EEqn_GPU.fvc_ddt();
EEqn_GPU.fvc_div_phi_scalar();
EEqn_GPU.fvc_div_vector();
EEqn_GPU.add_to_source();
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn_mtxAssembly_GPU_Run += double(end1 - start1) / double(CLOCKS_PER_SEC);

EEqn_GPU.sync();
end2 = std::clock();
time_monitor_EEqn += double(end2 - start2) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end2 - start2) / double(CLOCKS_PER_SEC);

// check value of mtxAssembly, no time monitor
EEqn_GPU.checkValue(false);

start1 = std::clock();
EEqn_GPU.solve();
if (doSync) EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);

start1 = std::clock();
EEqn_GPU.updatePsi(&he[0]);
end1 = std::clock();
time_monitor_CPU += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif
}
15 changes: 14 additions & 1 deletion applications/solvers/dfLowMachFoam/createdfSolver.H
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UE
dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0],
&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex);

#ifdef GPUSolver_
dfRhoEqn rhoEqn_GPU(dfDataBase);
dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath);
dfYEqn YEqn_GPU(dfDataBase, "dDDI", settingPath, inertIndex);
dfEEqn EEqn_GPU(dfDataBase, "dDDI", settingPath);

double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init, *boundary_velocity_init,
*boundary_nuEff_init, *boundary_rho_init, *ueqn_laplac_internalCoeffs_init, *ueqn_laplac_boundaryCoeffs_init, *boundary_phi_init;
Expand All @@ -45,4 +47,15 @@ cudaMallocHost(&boundary_velocity_init, 3*num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_pressure_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));

double *boundary_alphaEff, *boundary_K, *boundary_hDiffCorrFlux;
double *eeqn_valueInternalCoeffs, *eeqn_valueBoundaryCoeffs, *eeqn_gradientInternalCoeffs, *eeqn_gradientBoundaryCoeffs;
cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_hDiffCorrFlux, 3 * num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_valueInternalCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_valueBoundaryCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_gradientInternalCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_gradientBoundaryCoeffs, num_boundary_faces*sizeof(double));
#endif
23 changes: 23 additions & 0 deletions applications/solvers/dfLowMachFoam/dfLowMachFoam.C
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ Description
#include "dfUEqn.H"
#include "dfYEqn.H"
#include "dfRhoEqn.H"
#include "dfEEqn.H"
#include <cuda_runtime.h>
#include <thread>
#include "upwind.H"

#define GPUSolver_
//#define CPUSolver_

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

Expand Down Expand Up @@ -94,11 +96,19 @@ int main(int argc, char *argv[])
#include "createFields.H"
#include "createRhoUfIfPresent.H"

bool doSync = true;
double time_monitor_flow=0;
double time_monitor_UEqn=0;
double time_monitor_UEqn_mtxAssembly=0;
double time_monitor_UEqn_Solve=0;
double time_monitor_UEqn_sum=0;
double time_monitor_EEqn=0;
double time_monitor_EEqn_mtxAssembly=0;
double time_monitor_EEqn_mtxAssembly_CPU_Prepare;
double time_monitor_EEqn_mtxAssembly_GPU_Prepare;
double time_monitor_EEqn_mtxAssembly_GPU_Run;
double time_monitor_EEqn_Solve=0;
double time_monitor_EEqn_sum=0;
double time_monitor_chem=0;
double time_monitor_Y=0;
double time_monitor_E=0;
Expand Down Expand Up @@ -223,6 +233,7 @@ int main(int argc, char *argv[])

runTime.write();
time_monitor_UEqn_sum += time_monitor_UEqn;
time_monitor_EEqn_sum += time_monitor_EEqn;

Info << "output time index " << runTime.timeIndex() << endl;

Expand All @@ -238,6 +249,12 @@ int main(int argc, char *argv[])
Info<< "UEqn Time solve = " << time_monitor_UEqn_Solve << " s" << endl;
// Info<< "UEqn sum Time = " << time_monitor_UEqn_sum << " s" << endl;
// Info<< "UEqn sum Time - overhead = " << time_monitor_UEqn_sum - time_UEqn_initial << " s" << endl;
Info<< "EEqn Time = " << time_monitor_EEqn << " s" << endl;
Info<< "EEqn Time assamble Mtx = " << time_monitor_EEqn_mtxAssembly << " s" << endl;
Info<< "EEqn assamble(CPU prepare) = " << time_monitor_EEqn_mtxAssembly_CPU_Prepare << " s" << endl;
Info<< "EEqn assamble(GPU prepare) = " << time_monitor_EEqn_mtxAssembly_GPU_Prepare << " s" << endl;
Info<< "EEqn assamble(GPU run) = " << time_monitor_EEqn_mtxAssembly_GPU_Run << " s" << endl;
Info<< "EEqn Time solve = " << time_monitor_EEqn_Solve << " s" << endl;
Info<< "sum Time = " << (time_monitor_chem + time_monitor_Y + time_monitor_flow + time_monitor_E + time_monitor_corrThermo + time_monitor_corrDiff) << " s" << endl;
Info<< "CPU Time (get turb souce) = " << time_monitor_CPU << " s" << endl;
Info<< "UEqn time in EEqn = " << time_monitor_UinE << " s" << endl;
Expand All @@ -248,6 +265,12 @@ int main(int argc, char *argv[])
time_monitor_UEqn = 0;
time_monitor_UEqn_mtxAssembly = 0;
time_monitor_UEqn_Solve = 0;
time_monitor_EEqn = 0;
time_monitor_EEqn_mtxAssembly = 0;
time_monitor_EEqn_mtxAssembly_CPU_Prepare = 0;
time_monitor_EEqn_mtxAssembly_GPU_Prepare = 0;
time_monitor_EEqn_mtxAssembly_GPU_Run = 0;
time_monitor_EEqn_Solve = 0;
time_monitor_chem = 0;
time_monitor_Y = 0;
time_monitor_E = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ divSchemes
default none;

div(phi,U) Gauss linear;
div(phi,Yi) Gauss limitedLinear01 1;
div(phi,h) Gauss limitedLinear 1;
div(phi,ha) Gauss limitedLinear 1;
div(phi,K) Gauss limitedLinear 1;
div(phid,p) Gauss limitedLinear 1;
div(phi,epsilon) Gauss limitedLinear 1;
div(phi,Yi_h) Gauss limitedLinear01 1;
div(phi,k) Gauss limitedLinear 1;
div(hDiffCorrFlux) Gauss cubic;
div(phi,Yi) Gauss linear;
div(phi,h) Gauss linear;
div(phi,ha) Gauss linear;
div(phi,K) Gauss linear;
div(phid,p) Gauss linear;
div(phi,epsilon) Gauss linear;
div(phi,Yi_h) Gauss upwind;
div(phi,k) Gauss linear;
div(hDiffCorrFlux) Gauss linear;
div(((rho*nuEff)*dev2(T(grad(U))))) Gauss linear;
}

Expand Down
1 change: 1 addition & 0 deletions src_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_library(${PROJECT_NAME}
dfUEqn.cu
dfRhoEqn.cu
dfYEqn.cu
dfEEqn.cu
AmgXSolver.cu)

target_link_libraries(${PROJECT_NAME}
Expand Down
75 changes: 75 additions & 0 deletions src_gpu/dfEEqn.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once

#include "AmgXSolver.H"
#include <amgx_c.h>
#include "dfMatrixDataBase.H"

class dfEEqn
{
private:
dfMatrixDataBase &dataBase_;
cudaStream_t stream;
AmgXSolver *ESolver = nullptr;
int num_iteration;

// common variables
int num_cells, cell_bytes, num_faces, num_surfaces, cell_vec_bytes, csr_value_vec_bytes, num_boundary_cells;
int num_boundary_faces, boundary_face_bytes;
int *d_A_csr_row_index, *d_A_csr_diag_index, *d_A_csr_col_index;

// Matrix variables
double *d_A_csr, *d_b = nullptr;
double *h_A_csr, *h_b = nullptr;
double *d_he_old = nullptr;
double *h_he_new = nullptr;

// fields used by EEqn
double *d_alphaEff = nullptr;
double *d_K = nullptr;
double *d_K_old = nullptr;
double *d_dpdt = nullptr;
double *d_diffAlphaD = nullptr;
double *d_hDiffCorrFlux = nullptr;
double *d_boundary_K_init = nullptr;
double *d_boundary_K = nullptr;
double *d_boundary_hDiffCorrFlux_init = nullptr;
double *d_boundary_hDiffCorrFlux = nullptr;
double *d_boundary_alphaEff_init = nullptr;
double *d_boundary_alphaEff = nullptr;
double *d_value_internal_coeffs_init = nullptr;
double *d_value_boundary_coeffs_init = nullptr;
double *d_gradient_internal_coeffs_init = nullptr;
double *d_gradient_boundary_coeffs_init = nullptr;
double *d_value_internal_coeffs = nullptr;
double *d_value_boundary_coeffs = nullptr;
double *d_gradient_internal_coeffs = nullptr;
double *d_gradient_boundary_coeffs = nullptr;

public:
dfEEqn();
dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile);
~dfEEqn();

void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
const double *hDiffCorrFlux, const double *dpdt, const double *diffAlphaD,
const double *boundary_K, const double *boundary_hDiffCorrFlux, const double *boundary_alphaEff,
const double *valueInternalCoeffs, const double *valueBoundaryCoeffs,
const double *gradientInternalCoeffs, const double *gradientBoundaryCoeffs);

void resetAb();

void fvm_ddt();
void fvm_div();
void fvm_laplacian();

void fvc_ddt();
void fvc_div_phi_scalar();
void fvc_div_vector();
void add_to_source();

void solve();
void checkValue(bool print);
void updatePsi(double *Psi);

void sync();
};
Loading