From c831fb27b1d208a33a177ba6022d680158cc8850 Mon Sep 17 00:00:00 2001 From: maorz1998 Date: Sat, 23 Dec 2023 12:18:21 +0800 Subject: [PATCH] fix bugs in 1223 --- applications/solvers/dfLowMachFoam/EEqn.H | 2 +- .../solvers/dfLowMachFoam/Make/options | 7 +- .../solvers/dfLowMachFoam/dfLowMachFoam.C | 7 +- bashrc.in | 3 +- install.sh | 2 +- src/dfChemistryModel/dfChemistryModel.H | 2 + src_gpu/AmgXSolver.cu | 2 +- src_gpu/dfChemistrySolver.cu | 14 +++- src_gpu/dfEEqn.cu | 6 ++ src_gpu/dfMatrixOpBase.cu | 4 +- src_gpu/dfThermo.H | 2 +- src_gpu/dfThermo.cu | 76 +++++++++++-------- src_gpu/dfYEqn.cu | 21 +++-- 13 files changed, 89 insertions(+), 59 deletions(-) diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H index 51277cd73..46f59b798 100644 --- a/applications/solvers/dfLowMachFoam/EEqn.H +++ b/applications/solvers/dfLowMachFoam/EEqn.H @@ -93,7 +93,7 @@ // EEqn_GPU.compareResult(&EEqn.lower()[0], &EEqn.upper()[0], &EEqn.diag()[0], &EEqn.source()[0], // h_internal_coeffs.data(), h_boundary_coeffs.data(), printFlag); // DEBUG_TRACE; - //EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag); + // EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag); } delete h_boundary_he_tmp; diff --git a/applications/solvers/dfLowMachFoam/Make/options b/applications/solvers/dfLowMachFoam/Make/options index 0cf97f2ae..935023c25 100644 --- a/applications/solvers/dfLowMachFoam/Make/options +++ b/applications/solvers/dfLowMachFoam/Make/options @@ -27,7 +27,7 @@ EXE_INC = -std=c++14 \ $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include/torch/csrc/api/include,) \ $(PYTHON_INC_DIR) \ $(if $(AMGX_DIR), -I$(DF_ROOT)/src_gpu,) \ - $(if $(AMGX_DIR), -I/usr/local/cuda-11.6/include,) \ + $(if $(AMGX_DIR), -I/usr/local/cuda/include,) \ $(if $(AMGX_DIR), -I$(AMGX_DIR)/include,) \ -I$(DF_ROOT)/GPUTestRef/lnInclude \ @@ -43,7 +43,6 @@ EXE_LIBS = \ -ldfCanteraMixture \ -ldfChemistryModel \ -ldfCombustionModels \ - -ldfGenMatrix \ $(CANTERA_ROOT)/lib/libcantera.so \ $(if $(LIBTORCH_ROOT),$(LIBTORCH_ROOT)/lib/libtorch.so,) \ $(if $(LIBTORCH_ROOT),$(LIBTORCH_ROOT)/lib/libc10.so,) \ @@ -52,8 +51,8 @@ EXE_LIBS = \ $(if $(LIBTORCH_ROOT),$(DF_SRC)/dfChemistryModel/DNNInferencer/build/libDNNInferencer.so,) \ $(if $(PYTHON_LIB_DIR),-L$(PYTHON_LIB_DIR),) \ $(if $(PYTHON_LIB_DIR),-lpython3.8,) \ - $(if $(AMGX_DIR), /usr/local/cuda-11.6/lib64/libcudart.so,) \ - $(if $(AMGX_DIR), /usr/local/cuda-11.6/lib64/libnccl.so,) \ + $(if $(AMGX_DIR), /usr/local/cuda/lib64/libcudart.so,) \ + $(if $(AMGX_DIR), /usr/local/cuda/lib64/libnccl.so,) \ $(if $(AMGX_DIR), $(DF_ROOT)/src_gpu/build/libdfMatrix.so,) \ $(if $(AMGX_DIR), $(AMGX_DIR)/build/libamgxsh.so,) diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C index 5109faa11..46d185a89 100644 --- a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C +++ b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C @@ -60,10 +60,10 @@ Description #include "basicThermo.H" #include "CombustionModel.H" -#define GPUSolverNew_ -#define TIME +// #define GPUSolverNew_ +// #define TIME // #define DEBUG_ -#define SHOW_MEMINFO +// #define SHOW_MEMINFO #include "dfMatrixDataBase.H" @@ -87,7 +87,6 @@ Description #include "createGPUSolver.H" #include "upwind.H" - #include "GenFvMatrix.H" #include "CanteraMixture.H" #include "multivariateGaussConvectionScheme.H" #include "limitedSurfaceInterpolationScheme.H" diff --git a/bashrc.in b/bashrc.in index 8f517d87f..ac4e7d398 100644 --- a/bashrc.in +++ b/bashrc.in @@ -15,4 +15,5 @@ export DF_LIBBIN=pwd/platforms/$WM_OPTIONS/lib export PATH=$DF_APPBIN:$PATH export LD_LIBRARY_PATH=$DF_LIBBIN:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=$AMGX_DIR/build:$LD_LIBRARY_PATH \ No newline at end of file +export LD_LIBRARY_PATH=$AMGX_DIR/build:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$DF_ROOT/src/dfChemistryModel/DNNInferencer/build:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/install.sh b/install.sh index 7a699bb25..38ec26478 100755 --- a/install.sh +++ b/install.sh @@ -29,7 +29,7 @@ if [ $USE_GPUSOLVER = true ]; then mkdir build cd build cmake .. - make + make -j export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH fi cd $DF_ROOT diff --git a/src/dfChemistryModel/dfChemistryModel.H b/src/dfChemistryModel/dfChemistryModel.H index b1554042a..41be7ac2a 100644 --- a/src/dfChemistryModel/dfChemistryModel.H +++ b/src/dfChemistryModel/dfChemistryModel.H @@ -343,6 +343,8 @@ public: } } + bool ifChemstry() const {return chemistry_;} + // profiling #if defined USE_LIBTORCH || defined USE_PYTORCH double time_allsolve() {return time_allsolve_;} diff --git a/src_gpu/AmgXSolver.cu b/src_gpu/AmgXSolver.cu index c891a5980..909a0abd9 100644 --- a/src_gpu/AmgXSolver.cu +++ b/src_gpu/AmgXSolver.cu @@ -335,7 +335,7 @@ void AmgXSolver::solve( getIters(nIters); getResidual(nIters, rnorm); if (!isMPIEnabled || myRank == 0) - fprintf(stderr, "Initial residual = %.10lf, Final residual = %.5e, No Iterations %d\n", irnorm, rnorm, nIters); + printf("Initial residual = %.10lf, Final residual = %.5e, No Iterations %d\n", irnorm, rnorm, nIters); } diff --git a/src_gpu/dfChemistrySolver.cu b/src_gpu/dfChemistrySolver.cu index 9c4291064..f4b3b051a 100644 --- a/src_gpu/dfChemistrySolver.cu +++ b/src_gpu/dfChemistrySolver.cu @@ -121,13 +121,15 @@ void dfChemistrySolver::setConstantValue(int num_cells, int num_species, int bat std::cerr << "error loading the model\n"; exit(-1); } - modules_[i].to(device_); + // modules_[i].to(device_); + modules_[i].to(device_, torch::kHalf); } } void dfChemistrySolver::Inference(const double *h_T, const double *d_T,const double *p, const double *y, const double *rho, double *RR) { // construct input + clock_t start = clock(); inputsize_ = 0; std::vector reactCellIndex; for (int i = 0; i < num_cells_; i++) { @@ -136,6 +138,10 @@ void dfChemistrySolver::Inference(const double *h_T, const double *d_T,const dou } } inputsize_ = reactCellIndex.size(); + clock_t end = clock(); + double elapsed_secs = double(end - start) / CLOCKS_PER_SEC; + std::cout << "construct input time: " << elapsed_secs << std::endl; + #ifdef STREAM_ALLOCATOR checkCudaErrors(cudaMallocAsync((void**)&init_input_, sizeof(double) * inputsize_ * dim_input_, stream)); checkCudaErrors(cudaMallocAsync((void**)&y_input_BCT, sizeof(double) * inputsize_ * num_species_, stream)); @@ -157,13 +163,16 @@ void dfChemistrySolver::Inference(const double *h_T, const double *d_T,const dou Xmu_, Xstd_, init_input_); // inference by torch + TICK_INIT_EVENT; + TICK_START_EVENT; double *d_output; for (int sample_start = 0; sample_start < inputsize_; sample_start += batch_size_) { int sample_end = std::min(sample_start + batch_size_, inputsize_); int sample_len = sample_end - sample_start; at::Tensor torch_input = torch::from_blob(init_input_ + sample_start * dim_input_, {sample_len, dim_input_}, torch::TensorOptions().device(device_).dtype(torch::kDouble)); - torch_input = torch_input.to(at::kFloat); + // torch_input = torch_input.to(at::kFloat); + torch_input = torch_input.to(at::kHalf); std::vector INPUTS; INPUTS.push_back(torch_input); std::vector output(num_modules_); @@ -175,6 +184,7 @@ void dfChemistrySolver::Inference(const double *h_T, const double *d_T,const dou cudaMemcpy(NN_output_ + (i * inputsize_ + sample_start), d_output, sizeof(double) * sample_len, cudaMemcpyDeviceToDevice); } } + TICK_END_EVENT(Inference); calculate_y_new<<>>(inputsize_, num_modules_, NN_output_, y_input_BCT, Ymu_, Ystd_, NN_output_); diff --git a/src_gpu/dfEEqn.cu b/src_gpu/dfEEqn.cu index ad55af27a..0c8892541 100644 --- a/src_gpu/dfEEqn.cu +++ b/src_gpu/dfEEqn.cu @@ -150,6 +150,12 @@ void dfEEqn::process() { dataBase_.num_patches, dataBase_.patch_size.data(), patch_type_he.data(), dataBase_.d_boundary_delta_coeffs, dataBase_.d_boundary_p, dataBase_.d_boundary_y, d_boundary_heGradient); + correct_boundary_conditions_scalar(dataBase_.stream, dataBase_.nccl_comm, dataBase_.neighbProcNo.data(), + dataBase_.num_boundary_surfaces, dataBase_.num_patches, dataBase_.patch_size.data(), + patch_type_he.data(), dataBase_.d_boundary_delta_coeffs, dataBase_.d_boundary_face_cell, + dataBase_.d_he, dataBase_.d_boundary_he, dataBase_.cyclicNeighbor.data(), + dataBase_.patchSizeOffset.data(), dataBase_.d_boundary_weight, + dataBase_.d_boundary_T, dataBase_.d_boundary_y, d_boundary_heGradient, &thermo_); update_boundary_coeffs_scalar(dataBase_.stream, dataBase_.num_patches, dataBase_.patch_size.data(), patch_type_he.data(), dataBase_.d_boundary_delta_coeffs, dataBase_.d_boundary_he, dataBase_.d_boundary_weight, diff --git a/src_gpu/dfMatrixOpBase.cu b/src_gpu/dfMatrixOpBase.cu index afe3b9cf4..679418df7 100644 --- a/src_gpu/dfMatrixOpBase.cu +++ b/src_gpu/dfMatrixOpBase.cu @@ -849,7 +849,7 @@ __global__ void fvm_laplacian_scalar_boundary(int num, int offset, int start_index = offset + index; double boundary_value = boundary_gamma[start_index] * boundary_mag_sf[start_index]; internal_coeffs[start_index] += boundary_value * gradient_internal_coeffs[start_index] * sign; - boundary_coeffs[start_index] -= boundary_value * gradient_boundary_coeffs[start_index] * sign; + boundary_coeffs[start_index] -= boundary_value * gradient_boundary_coeffs[start_index] * sign; } __global__ void fvm_laplacian_surface_scalar_boundary(int num, int offset, @@ -2438,7 +2438,7 @@ void correct_boundary_conditions_scalar(cudaStream_t stream, ncclComm_t comm, gradient_offset, vf, boundary_cell_face, thermo_gradient, boundary_delta_coeffs, boundary_vf); gradient_offset += patch_size[i]; } else if (patch_type[i] == boundaryConditions::fixedEnergy) { - GPUThermo->calculateEnthalpyGPU(threads_per_block, patch_size[i], boundary_T, boundary_vf, boundary_y, offset); + GPUThermo->calculateEnthalpyGPU(threads_per_block, patch_size[i], num_boundary_surfaces, boundary_T, boundary_vf, boundary_y, offset); } else if (patch_type[i] == boundaryConditions::cyclic) { correct_boundary_conditions_cyclic_scalar<<>>(patch_size[i], offset, patchSizeOffset[cyclicNeighbor[i]], vf, boundary_cell_face, boundary_weight, boundary_vf); diff --git a/src_gpu/dfThermo.H b/src_gpu/dfThermo.H index d71ff046f..37160cab6 100644 --- a/src_gpu/dfThermo.H +++ b/src_gpu/dfThermo.H @@ -83,7 +83,7 @@ public: void calculateRhoDGPU(int threads_per_block, int num_thread, int num_total, const double *T, const double *T_poly, const double *p, const double *mole_fraction, const double *mean_mole_weight, const double *rho, double *rhoD, int offset = 0); - void calculateEnthalpyGPU(int thread_per_block, int num_thread, const double *T, double *enthalpy, const double *d_mass_fraction, int offset = 0); + void calculateEnthalpyGPU(int thread_per_block, int num_thread, int num_total, const double *T, double *enthalpy, const double *d_mass_fraction, int offset = 0); void calculateTemperatureGPU(int thread_per_block, int num_thread, int num_total, const double *T_init, const double *target_h, double *T, const double *d_mass_fraction, int offset = 0, double atol = 1e-7, double rtol = 1e-7, int max_iter = 20); diff --git a/src_gpu/dfThermo.cu b/src_gpu/dfThermo.cu index 20ae65210..6d8dae14d 100644 --- a/src_gpu/dfThermo.cu +++ b/src_gpu/dfThermo.cu @@ -8,7 +8,7 @@ #define GAS_CANSTANT 8314.46261815324 #define SQRT8 2.8284271247461903 -#define NUM_SPECIES 9 +#define NUM_SPECIES 7 // constant memory __constant__ __device__ double d_nasa_coeffs[NUM_SPECIES*15]; @@ -213,36 +213,44 @@ __global__ void calculate_diffusion_kernel(int num_thread, int num_total, int nu if (index >= num_thread) return; - int startIndex = offset + index; + extern __shared__ double shared_data[]; + double *mole_fraction_shared = shared_data; - double D[NUM_SPECIES * NUM_SPECIES]; - double sum1, sum2; - double tmp; + int startIndex = offset + index; for (int i = 0; i < num_species; i++) { - for (int j = 0; j < num_species; j++) { - tmp = 0.; - for (int k = 0; k < 5; k++) - tmp += (d_binary_diffusion_coeffs[i * num_species * 5 + j * 5 + k] * T_poly[num_total * k + startIndex]); - D[i * num_species + j] = tmp * pow(T[startIndex], 1.5); - } + mole_fraction_shared[i * blockDim.x + threadIdx.x] = mole_fraction[i * num_total + startIndex]; + } + + double poly[5]; + for (int j = 0; j < 5; j++) { + poly[j] = T_poly[num_total * j + startIndex]; } + + double powT = T[startIndex] * sqrt(T[startIndex]); + + double local_mean_mole_weight = mean_mole_weight[startIndex]; + double local_rho_div_p = rho[startIndex] / p[startIndex]; for (int i = 0; i < num_species; i++) { - if (mole_fraction[num_total * i + startIndex] + 1e-10 > 1.) { + if (mole_fraction_shared[i * blockDim.x + threadIdx.x] + 1e-10 > 1.) { d[num_total * i + startIndex] = 0.; continue; } - sum1 = 0.; - sum2 = 0.; + double sum1 = 0.; + double sum2 = 0.; for (int j = 0; j < num_species; j++) { if (i == j) continue; - sum1 += mole_fraction[num_total * j + startIndex] / D[i * num_species + j]; - sum2 += mole_fraction[num_total * j + startIndex] * d_molecular_weights[j] / D[i * num_species + j]; + // calculate D + double tmp = 0.; + for (int k = 0; k < 5; k++) + tmp += (d_binary_diffusion_coeffs[i * num_species * 5 + j * 5 + k] * poly[k]); + double local_D = tmp * powT; + sum1 += mole_fraction_shared[j * blockDim.x + threadIdx.x] / local_D; + sum2 += mole_fraction_shared[j * blockDim.x + threadIdx.x] * d_molecular_weights[j] / local_D; } - sum1 *= p[startIndex]; - sum2 *= p[startIndex] * mole_fraction[num_total * i + startIndex] / - (mean_mole_weight[startIndex] - mole_fraction[num_total * i + startIndex] * d_molecular_weights[i]); - d[num_total * i + startIndex] = 1 / (sum1 + sum2) * rho[startIndex]; + sum2 *= mole_fraction_shared[i * blockDim.x + threadIdx.x] / + (local_mean_mole_weight - mole_fraction_shared[i * blockDim.x + threadIdx.x] * d_molecular_weights[i]); + d[num_total * i + startIndex] = 1 / (sum1 + sum2) * local_rho_div_p; } } @@ -314,7 +322,7 @@ __global__ void calculate_temperature_kernel(int num_thread, int num_total, int extern void __global__ correct_internal_boundary_field_scalar(int num, int offset, const double *vf_internal, const int *face2Cells, double *vf_boundary); -__global__ void calculate_enthalpy_kernel(int num_thread, int offset, int num_cells, int num_species, +__global__ void calculate_enthalpy_kernel(int num_thread, int offset, int num_total, int num_species, const double *T, const double *mass_fraction, double *enthalpy) { int index = blockDim.x * blockIdx.x + threadIdx.x; @@ -323,7 +331,7 @@ __global__ void calculate_enthalpy_kernel(int num_thread, int offset, int num_ce int startIndex = index + offset; - enthalpy[startIndex] = calculate_enthalpy_device_kernel(num_cells, num_species, startIndex, T[startIndex], mass_fraction); + enthalpy[startIndex] = calculate_enthalpy_device_kernel(num_total, num_species, startIndex, T[startIndex], mass_fraction); } __global__ void calculate_psip0_kernel(int num_thread, int offset, const double *p, const double *psi, double *psip0) @@ -489,7 +497,7 @@ void dfThermo::calculatePsiGPU(int threads_per_block, int num_thread, const doub void dfThermo::calculateRhoGPU(int threads_per_block, int num_thread, const double *p, const double *psi, double *rho, int offset) { size_t blocks_per_grid = (num_thread + threads_per_block - 1) / threads_per_block; - calculate_rho_kernel<<>>(num_cells, offset, p, psi, rho); + calculate_rho_kernel<<>>(num_thread, offset, p, psi, rho); } void dfThermo::calculateViscosityGPU(int num_thread, int num_total, const double *T, const double *mole_fraction, @@ -514,13 +522,19 @@ void dfThermo::calculateThermoConductivityGPU(int threads_per_block, int num_thr offset, d_nasa_coeffs, d_y, T_poly, T, mole_fraction, species_thermal_conductivities, thermal_conductivity); } -void dfThermo::calculateRhoDGPU(int threads_per_block, int num_thread, int num_total, const double *T, - const double *T_poly, const double *p, const double *mole_fraction, +void dfThermo::calculateRhoDGPU(int threads_per_block, int num_thread, int num_total, const double *T, + const double *T_poly, const double *p, const double *mole_fraction, const double *mean_mole_weight, const double *rho, double *rhoD, int offset) { + threads_per_block = 32; size_t blocks_per_grid = (num_thread + threads_per_block - 1) / threads_per_block; - calculate_diffusion_kernel<<>>(num_thread, num_total, num_species, offset, + size_t sharedMemSize = sizeof(double) * threads_per_block * num_species; + + TICK_INIT_EVENT; + TICK_START_EVENT; + calculate_diffusion_kernel<<>>(num_thread, num_total, num_species, offset, T_poly, mole_fraction, p, mean_mole_weight, rho, T, rhoD); + TICK_END_EVENT("calculate_diffusion_kernel"); } void dfThermo::calculateTemperatureGPU(int threads_per_block, int num_thread, int num_total, const double *T_init, const double *target_h, double *T, @@ -532,17 +546,17 @@ void dfThermo::calculateTemperatureGPU(int threads_per_block, int num_thread, in T_init, target_h, d_mass_fraction, T, atol, rtol, max_iter); } -void dfThermo::calculateEnthalpyGPU(int threads_per_block, int num_thread, const double *T, double *enthalpy, const double *d_mass_fraction, int offset) +void dfThermo::calculateEnthalpyGPU(int threads_per_block, int num_thread, int num_total, const double *T, double *enthalpy, const double *d_mass_fraction, int offset) { size_t blocks_per_grid = (num_thread + threads_per_block - 1) / threads_per_block; - calculate_enthalpy_kernel<<>>(num_thread, offset, num_cells, num_species, + calculate_enthalpy_kernel<<>>(num_thread, offset, num_total, num_species, T, d_mass_fraction, enthalpy); } void dfThermo::updateEnergy() { - calculateEnthalpyGPU(1024, num_cells, dataBase_.d_T, dataBase_.d_he, dataBase_.d_y); + calculateEnthalpyGPU(1024, num_cells, num_cells, dataBase_.d_T, dataBase_.d_he, dataBase_.d_y); // int offset = 0; // for (int i = 0; i < dataBase_.num_patches; i++) { @@ -574,6 +588,7 @@ void dfThermo::correctThermo() setMassFraction(dataBase_.d_y, dataBase_.d_boundary_y); // internal field int cell_thread = 512, boundary_thread = 32; + fprintf(stderr, "\n\n"); calculateTemperatureGPU(cell_thread, dataBase_.num_cells, dataBase_.num_cells, dataBase_.d_T, dataBase_.d_he, dataBase_.d_T, dataBase_.d_y); // calculate temperature calculateTPolyGPU(cell_thread, dataBase_.num_cells, dataBase_.num_cells, dataBase_.d_T, d_T_poly); // calculate T_poly calculatePsiGPU(cell_thread, dataBase_.num_cells, dataBase_.d_T, d_mean_mole_weight, dataBase_.d_thermo_psi); // calculate psi @@ -584,13 +599,14 @@ void dfThermo::correctThermo() d_species_thermal_conductivities, dataBase_.d_thermo_alpha); // calculate thermal conductivity calculateRhoDGPU(cell_thread, dataBase_.num_cells, dataBase_.num_cells, dataBase_.d_T, d_T_poly, dataBase_.d_p, d_mole_fraction, d_mean_mole_weight, dataBase_.d_rho, dataBase_.d_thermo_rhoD); + fprintf(stderr, "\n\n"); // boundary field int offset = 0; for (int i = 0; i < dataBase_.num_patches; i++) { if (dataBase_.patch_size[i] == 0) continue; if (dataBase_.patch_type_T[i] == boundaryConditions::fixedValue) { calculateTPolyGPU(boundary_thread, dataBase_.patch_size[i], dataBase_.num_boundary_surfaces, dataBase_.d_boundary_T, d_boundary_T_poly, offset); - calculateEnthalpyGPU(boundary_thread, dataBase_.patch_size[i], dataBase_.d_boundary_T, dataBase_.d_boundary_he, + calculateEnthalpyGPU(boundary_thread, dataBase_.patch_size[i], dataBase_.num_boundary_surfaces, dataBase_.d_boundary_T, dataBase_.d_boundary_he, dataBase_.d_boundary_y, offset); calculatePsiGPU(boundary_thread, dataBase_.patch_size[i], dataBase_.d_boundary_T, d_boundary_mean_mole_weight, dataBase_.d_boundary_thermo_psi, offset); calculateRhoGPU(boundary_thread, dataBase_.patch_size[i], dataBase_.d_boundary_p, dataBase_.d_boundary_thermo_psi, dataBase_.d_boundary_rho, offset); diff --git a/src_gpu/dfYEqn.cu b/src_gpu/dfYEqn.cu index e6bbc5d4c..b86cb5a69 100644 --- a/src_gpu/dfYEqn.cu +++ b/src_gpu/dfYEqn.cu @@ -346,9 +346,8 @@ void dfYEqn::createNonConstantFieldsInternal() { checkCudaErrors(cudaMalloc((void**)&d_phiUc, dataBase_.surface_value_bytes)); checkCudaErrors(cudaMalloc((void**)&d_DEff, dataBase_.cell_value_bytes * dataBase_.num_species)); checkCudaErrors(cudaMalloc((void**)&d_permute, dataBase_.cell_value_vec_bytes)); - - checkCudaErrors(cudaMalloc((void**)&d_RR, dataBase_.cell_value_bytes * dataBase_.num_species)); #endif + checkCudaErrors(cudaMalloc((void**)&d_RR, dataBase_.cell_value_bytes * dataBase_.num_species)); // computed on CPU, used on GPU, need memcpyh2d checkCudaErrors(cudaMallocHost((void**)&h_rhoD, dataBase_.cell_value_bytes * dataBase_.num_species)); // UnityLewis @@ -442,7 +441,14 @@ void dfYEqn::preProcess(const double *h_rhoD, const double *h_boundary_rhoD, } void dfYEqn::process() { - TICK_INIT_EVENT; + TICK_INIT_EVENT; + + // calculate reaction rates + TICK_START_EVENT; + checkCudaErrors(cudaMemset(d_RR, 0, dataBase_.cell_value_bytes * dataBase_.num_species)); + yeqn_compute_RR(chemistrySolver_, dataBase_.stream, dataBase_.h_T, dataBase_.d_T, dataBase_.d_p, dataBase_.d_y, dataBase_.d_rho_old, d_RR); + TICK_END_EVENT(YEqn compute RR); + TICK_START_EVENT; #ifdef USE_GRAPH if(!graph_created) { @@ -460,8 +466,6 @@ void dfYEqn::process() { checkCudaErrors(cudaMallocAsync((void**)&d_phiUc, dataBase_.surface_value_bytes, dataBase_.stream)); checkCudaErrors(cudaMallocAsync((void**)&d_DEff, dataBase_.cell_value_bytes * dataBase_.num_species, dataBase_.stream)); checkCudaErrors(cudaMallocAsync((void**)&d_permute, dataBase_.cell_value_vec_bytes, dataBase_.stream)); - // combustion fields - checkCudaErrors(cudaMallocAsync((void**)&d_RR, dataBase_.cell_value_bytes * dataBase_.num_species, dataBase_.stream)); // thermophysical fields checkCudaErrors(cudaMallocAsync((void**)&d_boundary_hai, dataBase_.boundary_surface_value_bytes * dataBase_.num_species, dataBase_.stream)); checkCudaErrors(cudaMallocAsync((void**)&d_boundary_mut_sct, dataBase_.boundary_surface_value_bytes, dataBase_.stream)); @@ -493,11 +497,6 @@ void dfYEqn::process() { checkCudaErrors(cudaMemsetAsync(dataBase_.d_boundary_diff_alphaD, 0, dataBase_.boundary_surface_value_bytes, dataBase_.stream)); checkCudaErrors(cudaMemsetAsync(d_grad_y, 0, dataBase_.cell_value_vec_bytes * dataBase_.num_species, dataBase_.stream)); checkCudaErrors(cudaMemsetAsync(d_boundary_grad_y, 0, dataBase_.boundary_surface_value_vec_bytes * dataBase_.num_species, dataBase_.stream)); - // combustion fields - checkCudaErrors(cudaMemsetAsync(d_RR, 0, dataBase_.cell_value_bytes * dataBase_.num_species, dataBase_.stream)); - // calculate reaction rates - checkCudaErrors(cudaStreamSynchronize(dataBase_.stream)); - yeqn_compute_RR(chemistrySolver_, dataBase_.stream, dataBase_.h_T, dataBase_.d_T, dataBase_.d_p, dataBase_.d_y, dataBase_.d_rho_old, d_RR); // compute diffAlphaD yeqn_fvc_laplacian_scalar(dataBase_.stream, dataBase_.nccl_comm, dataBase_.neighbProcNo.data(), dataBase_.num_species, dataBase_.num_cells, dataBase_.num_surfaces, dataBase_.num_boundary_surfaces, @@ -669,8 +668,6 @@ void dfYEqn::process() { checkCudaErrors(cudaFreeAsync(d_DEff, dataBase_.stream)); checkCudaErrors(cudaFreeAsync(d_permute, dataBase_.stream)); - checkCudaErrors(cudaFreeAsync(d_RR, dataBase_.stream)); - // thermophysical fields //checkCudaErrors(cudaFreeAsync(d_boundary_rhoD, dataBase_.stream)); checkCudaErrors(cudaFreeAsync(d_boundary_hai, dataBase_.stream));