Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 68 additions & 14 deletions applications/solvers/dfLowMachFoam/YEqn.H
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ tmp<fv::convectionScheme<scalar>> mvConvection
)
);


forAll(Y, i)
{
sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]);
Expand All @@ -30,6 +31,50 @@ const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
time_monitor_UEqn_Solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
#endif

#ifdef GPUSolver_
std::vector<double*> Y_new(Y.size()), Y_old(Y.size()), boundary_Y_init(Y.size()), boundary_rhoD_init(Y.size());
std::vector<const double*> rhoD_GPU(Y.size());
for (size_t i = 0; i < Y.size(); ++i)
{
volScalarField& Yi = Y[i];
const volScalarField& rhoDi = chemistry->rhoD(i);
Y_new[i] = &Yi[0];
Y_old[i] = &Yi.oldTime()[0];
rhoD_GPU[i] = &chemistry->rhoD(i)[0];
cudaMallocHost(&boundary_Y_init[i], num_boundary_faces*sizeof(double));
cudaMallocHost(&boundary_rhoD_init[i], num_boundary_faces*sizeof(double));
int offset = 0;
forAll(Yi.boundaryField(), patchi)
{
const scalarField& patchYi = Yi.boundaryField()[patchi];
const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi];
int patchSize = patchYi.size();

memcpy(boundary_Y_init[i]+offset, &patchYi[0], patchSize*sizeof(double));
memcpy(boundary_rhoD_init[i]+offset, &patchRhoDi[0], patchSize*sizeof(double));
offset += patchSize;
}
}

volScalarField mut_sct = turbulence->mut().ref()/Sct;
std::vector<double> boundary_mutsct;
forAll(p.boundaryField(), patchi)
{
const scalarField& patchMut_sct = mut_sct.boundaryField()[patchi];
int patchSize = patchMut_sct.size();
boundary_mutsct.insert(boundary_mutsct.end(), &patchMut_sct[0], &patchMut_sct[0] + patchSize);
}

YEqn_GPU.upwindWeight();
YEqn_GPU.correctVelocity(Y_new, boundary_Y_init, rhoD_GPU);
YEqn_GPU.fvm_ddt(Y_old);
YEqn_GPU.fvm_div_phi();
YEqn_GPU.fvm_div_phiUc();
YEqn_GPU.fvm_laplacian(&mut_sct[0], boundary_mutsct.data(), boundary_rhoD_init);

YEqn_GPU.solve();
#endif

//MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
label flag_mpi_init;
MPI_Initialized(&flag_mpi_init);
Expand All @@ -53,6 +98,7 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);
volScalarField Yt(0.0*Y[0]);

start = std::clock();
int speciesIndex = 0;
forAll(Y, i)
{
volScalarField& Yi = Y[i];
Expand All @@ -61,26 +107,34 @@ time_monitor_corrDiff += double(end - start) / double(CLOCKS_PER_SEC);

if (i != inertIndex)
{
tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
fvScalarMatrix YiEqn
(
fvm::ddt(rho, Yi)
+ mvConvection->fvmDiv(phi, Yi)
+ mvConvection->fvmDiv(phiUc, Yi)
==
#ifdef CPUSolver_
tmp<volScalarField> DEff = chemistry->rhoD(i) + turbulence->mut()/Sct;
fvScalarMatrix YiEqn
(
splitting
? fvm::laplacian(DEff(), Yi)
: (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
)
);
fvm::ddt(rho, Yi)
+ mvConvection->fvmDiv(phi, Yi)
+ mvConvection->fvmDiv(phiUc, Yi)
==
(
splitting
? fvm::laplacian(DEff(), Yi)
: (fvm::laplacian(DEff(), Yi) + combustion->R(Yi))
)
);

YiEqn.relax();

YiEqn.relax();
YiEqn.solve("Yi");
#endif

YiEqn.solve("Yi");
#ifdef GPUSolver_
YEqn_GPU.updatePsi(&Yi[0], speciesIndex);
Yi.correctBoundaryConditions();
#endif

Yi.max(0.0);
Yt += Yi;
++speciesIndex;
}
}

Expand Down
5 changes: 3 additions & 2 deletions applications/solvers/dfLowMachFoam/createdfSolver.H
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ int num_boundary_cells;
string settingPath;
settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UEqnSettingPath", string(""));

dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0],
dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0],
&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex);

dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath);
dfRhoEqn rhoEqn_GPU(dfDataBase);
dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath);
dfYEqn YEqn_GPU(dfDataBase, "dDDI", settingPath, inertIndex);

double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init, *boundary_velocity_init,
*boundary_nuEff_init, *boundary_rho_init, *ueqn_laplac_internalCoeffs_init, *ueqn_laplac_boundaryCoeffs_init, *boundary_phi_init;
Expand Down
2 changes: 2 additions & 0 deletions applications/solvers/dfLowMachFoam/dfLowMachFoam.C
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,11 @@ Description
#include "CombustionModel.H"

#include "dfUEqn.H"
#include "dfYEqn.H"
#include "dfRhoEqn.H"
#include <cuda_runtime.h>
#include <thread>
#include "upwind.H"

#define GPUSolver_

Expand Down
9 changes: 7 additions & 2 deletions src_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,19 @@ include_directories(
$ENV{AMGX_DIR}/include
)

add_library(${PROJECT_NAME} SHARED dfUEqn.cu dfRhoEqn.cu AmgXSolver.cu)
add_library(${PROJECT_NAME}
SHARED
dfUEqn.cu
dfRhoEqn.cu
dfYEqn.cu
AmgXSolver.cu)

target_link_libraries(${PROJECT_NAME}
${MPI_LIBRARIES}
${CUDA_LIBRARIES}
${LIBAMGXSH}
)

target_compile_options(dfMatrix PUBLIC -g)
option(DFMATRIX_ENABLE_DETAILED_DEBUG "Enable detailed debug build" OFF)
if (DFMATRIX_ENABLE_DETAILED_DEBUG)
target_compile_definitions(${PROJECT_NAME} PRIVATE DEBUG)
Expand Down
52 changes: 48 additions & 4 deletions src_gpu/dfMatrixDataBase.H
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ inline void checkVectorEqual(int count, const double* basevec, double* vec, doub
{
double abs_diff = fabs(basevec[i] - vec[i]);
double rel_diff = fabs(basevec[i] - vec[i]) / fabs(basevec[i]);
if (abs_diff > 1e-12 && rel_diff > max_relative_error && !std::isinf(rel_diff))
// if (abs_diff > 1e-12 && rel_diff > max_relative_error && !std::isinf(rel_diff))
if (abs_diff > 1e-15 && rel_diff > max_relative_error)
fprintf(stderr, "mismatch index %d, cpu data: %.16lf, gpu data: %.16lf, relative error: %.16lf\n", i, basevec[i], vec[i], rel_diff);
}
}
Expand All @@ -53,6 +54,8 @@ struct dfMatrixDataBase
// - number of boundary faces
int num_boundary_faces;

int num_species;

// - mesh variables
// - csr_row_index
int *h_A_csr_row_index=nullptr, *d_A_csr_row_index=nullptr;
Expand Down Expand Up @@ -84,14 +87,19 @@ struct dfMatrixDataBase
// - the device pointer to rho_new, rho_old, velocity_old, pressure and volume list
double *d_rho_new = nullptr, *d_rho_old = nullptr, *d_velocity_old = nullptr,
*d_pressure = nullptr, *d_volume = nullptr;
// - the device pointer to Y_new and Y_old
std::vector<double*> d_Y_new_vector;
std::vector<double*> d_Y_old_vector;
// - the device pointer to the pre-permutated and post-permutated interpolation weight list
double *d_weight_init = nullptr, *d_weight = nullptr;
double *d_weight_upwind = nullptr;
// - the device pointer to the pre-permutated and post-permutated flux (phi) list
double *d_phi_init = nullptr, *d_phi = nullptr;
// - the device pointer to the pre-permutated and post-permutated cell face vector list
double *d_face_vector_init = nullptr, *d_face_vector = nullptr;
double *d_face_init = nullptr, *d_face = nullptr;
double *d_deltaCoeffs_init = nullptr, *d_deltaCoeffs = nullptr;
std::vector<double*> d_rhoD_vector;

double rdelta_t = 1/1e-6;

Expand All @@ -114,6 +122,14 @@ struct dfMatrixDataBase
*d_boundary_velocity = nullptr, *d_boundary_velocity_init = nullptr,
*d_boundary_nuEff = nullptr, *d_boundary_nuEff_init = nullptr,
*d_boundary_rho = nullptr, *d_boundary_rho_init = nullptr;
std::vector<double*> d_boundary_Y_vector;
std::vector<double*> d_boundary_Y_init_vector;
std::vector<double*> d_internal_coeffs_Y_vector;
std::vector<double*> d_boundary_coeffs_Y_vector;
std::vector<double*> d_laplac_internal_coeffs_Y_vector;
std::vector<double*> d_laplac_boundary_coeffs_Y_vector;
std::vector<double*> d_boundary_rhoD_vector;
double *d_boundary_mut_sct = nullptr;

std::vector<int> boundPermutationList;
std::vector<double> ueqn_internalCoeffs, ueqn_boundaryCoeffs;
Expand Down Expand Up @@ -180,11 +196,11 @@ struct dfMatrixDataBase

// constructor
dfMatrixDataBase();
dfMatrixDataBase(int num_surfaces, int num_cells, int num_boundary_faces, int & num_boundary_cells_output,
dfMatrixDataBase(int num_surfaces, int num_cells, int num_boundary_faces, int num_species, int & num_boundary_cells_output,
const int *neighbour, const int *owner, const double* volume, const double* weight, const double* face_vector, const double* face,
const double* deltaCoeffs, std::vector<double> boundary_face_vector_init, std::vector<double> boundary_face_init,
std::vector<double> boundary_deltaCoeffs_init, std::vector<int> boundary_cell_id_init)
: num_cells(num_cells), num_faces(num_surfaces*2), num_surfaces(num_surfaces), num_iteration(0),
: num_cells(num_cells), num_faces(num_surfaces*2), num_surfaces(num_surfaces), num_species(num_species), num_iteration(0),
num_boundary_faces(num_boundary_faces), h_volume(volume)
{
// allocate field pointer in pin memory
Expand Down Expand Up @@ -457,19 +473,36 @@ struct dfMatrixDataBase
checkCudaErrors(cudaMalloc((void**)&d_A_csr_diag_index, cell_index_bytes));
total_bytes += (csr_row_index_bytes + csr_col_index_bytes + cell_index_bytes);

d_Y_new_vector.resize(num_species);
d_Y_old_vector.resize(num_species);
d_rhoD_vector.resize(num_species);
d_boundary_Y_vector.resize(num_species);
d_boundary_Y_init_vector.resize(num_species);
d_internal_coeffs_Y_vector.resize(num_species);
d_boundary_coeffs_Y_vector.resize(num_species);
d_laplac_internal_coeffs_Y_vector.resize(num_species);
d_laplac_boundary_coeffs_Y_vector.resize(num_species);
d_boundary_rhoD_vector.resize(num_species);

for (size_t i = 0; i < num_species; ++i){
checkCudaErrors(cudaMalloc((void**)&d_Y_new_vector[i], cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_Y_old_vector[i], cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_rhoD_vector[i], cell_bytes));
}
checkCudaErrors(cudaMalloc((void**)&d_rho_old, cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_rho_new, cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_volume, cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_pressure, cell_bytes));
checkCudaErrors(cudaMalloc((void**)&d_velocity_old, cell_vec_bytes));
checkCudaErrors(cudaMalloc((void**)&d_weight, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_weight_upwind, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_face, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_deltaCoeffs, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_phi, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_phi_init, face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_face_vector, face_vec_bytes));
checkCudaErrors(cudaMalloc((void**)&d_nuEff, cell_bytes));
total_bytes += (cell_bytes * 5 + face_bytes * 5 + cell_vec_bytes + face_vec_bytes);
total_bytes += (cell_bytes * (5 + 2*num_species) + face_bytes * 6 + cell_vec_bytes + face_vec_bytes);

checkCudaErrors(cudaMalloc((void**)&d_boundary_cell_offset, (num_boundary_cells+1) * sizeof(int)));
checkCudaErrors(cudaMalloc((void**)&d_boundary_cell_id, boundary_face_index_bytes));
Expand All @@ -496,6 +529,17 @@ struct dfMatrixDataBase
checkCudaErrors(cudaMalloc((void**)&d_boundary_nuEff_init, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_rho, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_rho_init, boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_mut_sct, boundary_face_bytes));
for (size_t i = 0; i < num_species; ++i){
checkCudaErrors(cudaMalloc((void**)&d_boundary_Y_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_Y_init_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_internal_coeffs_Y_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_coeffs_Y_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_laplac_internal_coeffs_Y_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_laplac_boundary_coeffs_Y_vector[i], boundary_face_bytes));
checkCudaErrors(cudaMalloc((void**)&d_boundary_rhoD_vector[i], boundary_face_bytes));
}

total_bytes += (boundary_face_bytes*10 + boundary_face_vec_bytes * 11);

// checkCudaErrors(cudaMalloc((void**)&d_A_csr, csr_value_vec_bytes));
Expand Down
53 changes: 53 additions & 0 deletions src_gpu/dfYEqn.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

#include "AmgXSolver.H"
#include <amgx_c.h>
#include "dfMatrixDataBase.H"

class dfYEqn
{
private:
dfMatrixDataBase &dataBase_;
cudaStream_t stream;

std::vector<AmgXSolver *> YSolverSet;
int num_iteration = 0;
double time_monitor_CPU;
double time_monitor_GPU_kernel, time_monitor_GPU_memcpy, time_monitor_GPU_memcpy_test;

// common variables
int num_cells, cell_bytes, num_faces, num_surfaces, num_boundary_cells, num_boundary_faces, num_species, inertIndex;
int *d_A_csr_row_index, *d_A_csr_diag_index, *d_A_csr_col_index;

// Matrix variables
double *d_A_csr, *d_b, *d_psi = nullptr;
double *h_A_csr, *h_b, *h_psi = nullptr;
double *d_sumYDiffError = nullptr, *d_sumYDiffError_tmp = nullptr;
double *d_sumYDiffError_boundary = nullptr;
double *d_phiUc_boundary = nullptr;
double *d_phiUc = nullptr;
double *d_mut_Sct = nullptr;

public:
dfYEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile, const int inertIndex);

~dfYEqn();

void checkValue(bool print, char *filename);

void correctVelocity(std::vector<double *> Y_new, std::vector<double *> boundary_Y_init, std::vector<const double *> rhoD_GPU);

void upwindWeight();

void fvm_ddt(std::vector<double *> Y_old);

void fvm_div_phi();

void fvm_div_phiUc();

void fvm_laplacian(double *mut_Sct, double *boundary_mut_Sct, std::vector<double*>boundary_rhoD);

void solve();

void updatePsi(double *Psi, int speciesIndex);
};
Loading