Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion applications/solvers/dfLowMachFoam/EEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#ifdef GPUSolver_
start1 = std::clock();
UEqn_GPU.updatePsi(&U[0][0]);
UEqn_GPU.correctBoundaryConditions();
U.correctBoundaryConditions();
K = 0.5*magSqr(U);
end1 = std::clock();
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
Expand All @@ -51,8 +53,10 @@

const scalarField& patchK = K.boundaryField()[patchi];
const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi];
const scalarField& patchGrad = he.boundaryField()[patchi].gradientBoundaryCoeffs(); // gradient_
memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
memcpy(boundary_gradient + eeqn_offset, &patchGrad[0], patchSize*sizeof(double));

eeqn_offset += patchSize;
}
Expand All @@ -67,7 +71,7 @@
// prepare data on GPU
start1 = std::clock();
EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
&dpdt[0], boundary_K, boundary_alphaEff);
&dpdt[0], boundary_K, boundary_alphaEff, boundary_gradient);
EEqn_GPU.sync();
end1 = std::clock();
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
Expand Down
13 changes: 11 additions & 2 deletions applications/solvers/dfLowMachFoam/YEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
{
volScalarField& Yi = Y[i];
Y_old[i] = &Yi.oldTime()[0];
cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double));
if (updateBoundaryFields)
{
cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double));
Info << "here" << endl;
}
const volScalarField& haii = chemistry->hai(i);
const volScalarField& rhoDi = chemistry->rhoD(i);
hai[i] = &haii[0];
Expand All @@ -54,12 +58,16 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi];
int patchSize = patchYi.size();

memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double));
if (updateBoundaryFields)
{
memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double));
}
memcpy(boundary_hai[i] + offset, &patchHaii[0], patchSize*sizeof(double));
memcpy(boundary_rhoD[i] + offset, &patchRhoDi[0], patchSize*sizeof(double));
offset += patchSize;
}
}
updateBoundaryFields = false;
volScalarField mut_sct = turbulence->mut().ref()/Sct;
double *boundary_mutsct = nullptr;
cudaMallocHost(&boundary_mutsct, num_boundary_faces*sizeof(double));
Expand Down Expand Up @@ -124,6 +132,7 @@ if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
YEqn_GPU.updatePsi(&Yi[0], i);
Yi.correctBoundaryConditions();
}
YEqn_GPU.correctBoundaryConditions();
end1 = std::clock();
time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
time_monitor_YEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
Expand Down
20 changes: 13 additions & 7 deletions applications/solvers/dfLowMachFoam/createdfSolver.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ std::vector<int> boundaryCellIndex;
std::vector<double> boundary_face_vector_init;
std::vector<double> boundary_face_init;
std::vector<double> boundary_deltaCoeffs_init;
std::vector<std::vector<int>> patchTypes;
std::vector<int> patchTypeU, patchTypeY;
int num_boundary_faces = 0;
int patchSize;
forAll(mesh.boundary(), patchi)
Expand All @@ -22,14 +24,20 @@ forAll(mesh.boundary(), patchi)
boundary_face_init.insert(boundary_face_init.end(), &pMagSf[0], &pMagSf[0]+patchSize);
boundary_deltaCoeffs_init.insert(boundary_deltaCoeffs_init.end(), &pDeltaCoeffs[0], &pDeltaCoeffs[0]+patchSize);
num_boundary_faces += patchSize;

constructBoundarySelector(patchTypeU, U.boundaryField()[patchi].type(), patchSize);
constructBoundarySelector(patchTypeY, Y[0].boundaryField()[patchi].type(), patchSize);
}
patchTypes.emplace_back(patchTypeU);
patchTypes.emplace_back(patchTypeY);

int num_boundary_cells;

string settingPath;
settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UEqnSettingPath", string(""));

dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0],
&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex);
&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex, patchTypes);

#ifdef GPUSolver_
dfRhoEqn rhoEqn_GPU(dfDataBase);
Expand All @@ -49,12 +57,10 @@ cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));

double *boundary_alphaEff, *boundary_K;
double *eeqn_valueInternalCoeffs, *eeqn_valueBoundaryCoeffs, *eeqn_gradientInternalCoeffs, *eeqn_gradientBoundaryCoeffs;
double *boundary_alphaEff, *boundary_K, *boundary_gradient;
cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_valueInternalCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_valueBoundaryCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_gradientInternalCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&eeqn_gradientBoundaryCoeffs, num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_gradient, num_boundary_faces * sizeof(double));

bool updateBoundaryFields = true; // make sure that the boundary fields do H2D copy at 1st timestep
#endif
3 changes: 2 additions & 1 deletion src_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ add_library(${PROJECT_NAME}
dfRhoEqn.cu
dfYEqn.cu
dfEEqn.cu
AmgXSolver.cu)
AmgXSolver.cu
dfMatrixDataBase.cpp)

target_link_libraries(${PROJECT_NAME}
${MPI_LIBRARIES}
Expand Down
4 changes: 3 additions & 1 deletion src_gpu/dfEEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ private:
double *d_value_boundary_coeffs = nullptr;
double *d_gradient_internal_coeffs = nullptr;
double *d_gradient_boundary_coeffs = nullptr;
double *d_boundary_gradient_init = nullptr;
double *d_boundary_gradient = nullptr;

public:
dfEEqn();
dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile);
~dfEEqn();

void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
const double *dpdt, const double *boundary_K, const double *boundary_alphaEff);
const double *dpdt, const double *boundary_K, const double *boundary_alphaEff, const double *boundary_gradient);

void initializeTimeStep();

Expand Down
59 changes: 35 additions & 24 deletions src_gpu/dfEEqn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,10 @@ __global__ void eeqn_add_to_source_kernel(int num_cells,

__global__ void eeqn_boundaryPermutation(const int num_boundary_faces, const int *bouPermedIndex,
const double *boundary_K_init,
const double *boundary_alphaEff_init,
const double *boundary_alphaEff_init, const double *boundary_gradient_init,
double *boundary_K,
double *boundary_alphaEff)
double *boundary_alphaEff,
double *boundary_gradient)
{
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index >= num_boundary_faces)
Expand All @@ -411,21 +412,25 @@ __global__ void eeqn_boundaryPermutation(const int num_boundary_faces, const int

boundary_K[index] = boundary_K_init[p];
boundary_alphaEff[index] = boundary_alphaEff_init[p];
boundary_gradient[index] = boundary_gradient_init[p];
}

__global__ void eeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi, double *internal_coeffs,
__global__ void eeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi,
double *gradient, const double *boundary_deltaCoeffs,
double *internal_coeffs,
double *boundary_coeffs, double *laplac_internal_coeffs,
double *laplac_boundary_coeffs)
{
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index >= num_boundary_faces)
return;

// zeroGradient
double grad = gradient[index];
// energyGradient
double valueInternalCoeffs = 1.;
double valueBoundaryCoeffs = 0.;
double valueBoundaryCoeffs = grad / boundary_deltaCoeffs[index];
double gradientInternalCoeffs = 0.;
double gradientBoundaryCoeffs = 0.;
double gradientBoundaryCoeffs = grad;

internal_coeffs[index] = boundary_phi[index] * valueInternalCoeffs;
boundary_coeffs[index] = -boundary_phi[index] * valueBoundaryCoeffs;
Expand All @@ -440,7 +445,7 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
ESolver = new AmgXSolver(modeStr, cfgFile);

stream = dataBase_.stream;
//checkCudaErrors(cudaEventCreate(&event));
// checkCudaErrors(cudaEventCreate(&event));

num_cells = dataBase_.num_cells;
cell_bytes = dataBase_.cell_bytes;
Expand Down Expand Up @@ -473,6 +478,8 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
checkCudaErrors(cudaMalloc((void **)&d_boundary_K, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void **)&d_boundary_alphaEff_init, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void **)&d_boundary_alphaEff, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void **)&d_boundary_gradient_init, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void **)&d_boundary_gradient, boundary_face_bytes));

checkCudaErrors(cudaMalloc((void **)&d_value_internal_coeffs_init, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void **)&d_value_boundary_coeffs_init, boundary_face_bytes));
Expand All @@ -485,7 +492,7 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
}

void dfEEqn::prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
const double *dpdt, const double *boundary_K, const double *boundary_alphaEff)
const double *dpdt, const double *boundary_K, const double *boundary_alphaEff, const double *boundary_gradient)
{
// TODO not real async copy now, because some host array are not in pinned memory.

Expand All @@ -499,12 +506,13 @@ void dfEEqn::prepare_data(const double *he_old, const double *K, const double *K
// copy and permutate boundary variable
checkCudaErrors(cudaMemcpyAsync(d_boundary_K_init, boundary_K, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_boundary_alphaEff_init, boundary_alphaEff, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_boundary_gradient_init, boundary_gradient, boundary_face_bytes, cudaMemcpyHostToDevice, stream));

size_t threads_per_block = 1024;
size_t blocks_per_grid = (num_boundary_faces + threads_per_block - 1) / threads_per_block;
eeqn_boundaryPermutation<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_faces, dataBase_.d_bouPermedIndex,
d_boundary_K_init, d_boundary_alphaEff_init,
d_boundary_K, d_boundary_alphaEff);
d_boundary_K_init, d_boundary_alphaEff_init, d_boundary_gradient_init,
d_boundary_K, d_boundary_alphaEff, d_boundary_gradient);
}

void dfEEqn::initializeTimeStep()
Expand All @@ -516,6 +524,7 @@ void dfEEqn::initializeTimeStep()
size_t threads_per_block = 1024;
size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block;
eeqn_update_BoundaryCoeffs_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi,
d_boundary_gradient, dataBase_.d_boundary_deltaCoeffs,
d_value_internal_coeffs, d_value_boundary_coeffs,
d_gradient_internal_coeffs, d_gradient_boundary_coeffs);
}
Expand Down Expand Up @@ -577,14 +586,14 @@ void dfEEqn::fvc_div_vector()
size_t threads_per_block = 512;
size_t blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
eeqn_fvc_div_vector_internal<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells,
d_A_csr_row_index, d_A_csr_col_index, d_A_csr_diag_index,
dataBase_.d_face_vector, dataBase_.d_hDiffCorrFlux, dataBase_.d_weight,
1., d_b, d_b);
d_A_csr_row_index, d_A_csr_col_index, d_A_csr_diag_index,
dataBase_.d_face_vector, dataBase_.d_hDiffCorrFlux, dataBase_.d_weight,
1., d_b, d_b);
blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
eeqn_fvc_div_vector_boundary<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_cells,
dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
dataBase_.d_boundary_face_vector, dataBase_.d_boundary_hDiffCorrFlux,
1., d_b, d_b);
dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
dataBase_.d_boundary_face_vector, dataBase_.d_boundary_hDiffCorrFlux,
1., d_b, d_b);
}

void dfEEqn::fvc_div_phi_scalar()
Expand All @@ -611,8 +620,8 @@ void dfEEqn::add_to_source()
size_t blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
// " + fvc::ddt(rho,K)" is on the left side of "==", thus should minus from source.
eeqn_add_to_source_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells,
//1., d_dpdt, -1., d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
1., d_dpdt, -1., dataBase_.d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
// 1., d_dpdt, -1., d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
1., d_dpdt, -1., dataBase_.d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
}

void dfEEqn::checkValue(bool print)
Expand Down Expand Up @@ -689,10 +698,10 @@ void dfEEqn::solve()
num_iteration++;

checkCudaErrors(cudaMemcpyAsync(h_he_new, d_he_old, cell_bytes, cudaMemcpyDeviceToHost, stream));
//checkCudaErrors(cudaEventRecord(event, stream));
// checkCudaErrors(cudaStreamSynchronize(stream));
// for (size_t i = 0; i < num_cells; i++)
// fprintf(stderr, "h_he_after[%d]: %.16lf\n", i, h_he_new[i]);
// checkCudaErrors(cudaEventRecord(event, stream));
// checkCudaErrors(cudaStreamSynchronize(stream));
// for (size_t i = 0; i < num_cells; i++)
// fprintf(stderr, "h_he_after[%d]: %.16lf\n", i, h_he_new[i]);
}

void dfEEqn::sync()
Expand All @@ -703,7 +712,7 @@ void dfEEqn::sync()
void dfEEqn::updatePsi(double *Psi)
{
checkCudaErrors(cudaStreamSynchronize(stream));
//checkCudaErrors(cudaEventSynchronize(event));
// checkCudaErrors(cudaEventSynchronize(event));
memcpy(Psi, h_he_new, cell_bytes);
}

Expand All @@ -727,6 +736,8 @@ dfEEqn::~dfEEqn()
checkCudaErrors(cudaFree(d_boundary_K));
checkCudaErrors(cudaFree(d_boundary_alphaEff_init));
checkCudaErrors(cudaFree(d_boundary_alphaEff));
checkCudaErrors(cudaFree(d_boundary_gradient_init));
checkCudaErrors(cudaFree(d_boundary_gradient));

checkCudaErrors(cudaFree(d_value_internal_coeffs_init));
checkCudaErrors(cudaFree(d_value_boundary_coeffs_init));
Expand All @@ -737,5 +748,5 @@ dfEEqn::~dfEEqn()
checkCudaErrors(cudaFree(d_gradient_internal_coeffs));
checkCudaErrors(cudaFree(d_gradient_boundary_coeffs));

//checkCudaErrors(cudaEventDestroy(event));
// checkCudaErrors(cudaEventDestroy(event));
}
Loading