diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H index 963e5f0e8..e676f28cc 100644 --- a/applications/solvers/dfLowMachFoam/EEqn.H +++ b/applications/solvers/dfLowMachFoam/EEqn.H @@ -3,6 +3,23 @@ #ifdef CPUSolver_ start1 = std::clock(); + //debug + // { + // const fvPatchScalarField& hew = he.boundaryField()[5]; + // const basicThermo& bThermo = basicThermo::lookupThermo(hew); + // const scalarField& pw = bThermo.p().boundaryField()[5]; + // fvPatchScalarField& Tw = + // const_cast(bThermo.T().boundaryField()[5]); + // scalarField& Tw_v = Tw; + + // Tw.evaluate(); + + // Info << "internal field" <(bThermo.T().boundaryField()[5]); + // scalarField& Tw_v = Tw; + + // Tw.evaluate(); + + // Info << "internal field" <(bThermo.T().boundaryField()[patchi]); + scalarField& Tw_v = Tw; + + Tw.evaluate(); + const scalarField& patchDeltaCoeff = mesh.boundary()[patchi].deltaCoeffs(); + const scalarField heInternal = bThermo.he(ppw, Tw, patchi)(); + const scalarField heBoundary = bThermo.he(ppw, Tw, mesh.boundary()[patchi].faceCells())(); + const scalarField patchGradMau = patchDeltaCoeff * (heInternal - heBoundary); + const scalarField& patchK = K.boundaryField()[patchi]; // const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi]; const scalarField& patchGrad = he.boundaryField()[patchi].gradientBoundaryCoeffs(); // gradient_ @@ -60,6 +112,42 @@ memcpy(boundary_gradient + eeqn_offset, &patchGrad[0], patchSize*sizeof(double)); eeqn_offset += patchSize; + + // debug + // const fvsPatchScalarField& patchFlux = phi.boundaryField()[patchi]; + // // Field valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw); + // // Field valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw); + // Field valueInternalCoeffs = patchFlux*he.boundaryField()[patchi].valueInternalCoeffs(pw); + // Field valueBoundaryCoeffs = -patchFlux*he.boundaryField()[patchi].valueBoundaryCoeffs(pw); + // Field gradientInternalCoeffs = he.boundaryField()[patchi].gradientInternalCoeffs(); + // Field gradientBoundaryCoeffs = he.boundaryField()[patchi].gradientBoundaryCoeffs(); + // labelUList sub_boundary = mesh.boundary()[patchi].faceCells(); + + // calculate grad manually + // const fvPatchScalarField& hew = he.boundaryField()[patchi]; + // const basicThermo& bThermo = basicThermo::lookupThermo(hew); + // const scalarField& ppw = bThermo.p().boundaryField()[patchi]; + // fvPatchScalarField& Tw = + // const_cast(bThermo.T().boundaryField()[patchi]); + // scalarField& Tw_v = Tw; + + // Tw.evaluate(); + // const scalarField& patchDeltaCoeff = mesh.boundary()[patchi].deltaCoeffs(); + // const scalarField heInternal = bThermo.he(ppw, Tw, patchi)(); + // const scalarField heBoundary = bThermo.he(ppw, Tw, mesh.boundary()[patchi].faceCells())(); + // const scalarField patchGradMau = patchDeltaCoeff * (heInternal - heBoundary); + + // forAll(sub_boundary, i){ + // if (sub_boundary[i] == 1) + // { + // printf("\npatchFlux = %.10lf\n", patchFlux[i]); + // printf("valueBoundaryCoeffs = %.10lf\n", he.boundaryField()[patchi].valueBoundaryCoeffs(pw)()[i]); + // printf("valueBoundaryCoeffs_CPU = %.10lf\n", valueBoundaryCoeffs[i]); + // printf("patchGrad = %.10lf\n", patchGrad[i]); + // printf("patchGradMau = %.10lf\n", patchGradMau[i]); + // printf("patch = %d, cellID = %d\n", patchi, i); + // } + // } } end1 = std::clock(); time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); @@ -95,7 +183,7 @@ time_monitor_EEqn_mtxAssembly_GPU_run += double(end1 - start1) / double(CLOCKS_PER_SEC); // check value of mtxAssembly, no time monitor - // EEqn_GPU.checkValue(false); + // EEqn_GPU.checkValue(true); start1 = std::clock(); EEqn_GPU.solve(); diff --git a/applications/solvers/dfLowMachFoam/YEqn.H b/applications/solvers/dfLowMachFoam/YEqn.H index dae8c9ca8..689a151d4 100644 --- a/applications/solvers/dfLowMachFoam/YEqn.H +++ b/applications/solvers/dfLowMachFoam/YEqn.H @@ -19,6 +19,7 @@ forAll(Y, i) { sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]); } +// Info << "sumYDiffError\n" << sumYDiffError << endl; const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf(); start1 = std::clock(); time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); @@ -43,18 +44,18 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); { cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double)); } - // const volScalarField& haii = chemistry->hai(i); - // const volScalarField& rhoDi = chemistry->rhoD(i); + const volScalarField& haii = chemistry->hai(i); + const volScalarField& rhoDi = chemistry->rhoD(i); // hai[i] = &haii[0]; - // rhoD[i] = &rhoDi[0]; + rhoD[i] = &rhoDi[0]; // cudaMallocHost(&boundary_hai[i], num_boundary_faces*sizeof(double)); - // cudaMallocHost(&boundary_rhoD[i], num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_rhoD[i], num_boundary_faces*sizeof(double)); int offset = 0; forAll(Yi.boundaryField(), patchi) { const scalarField& patchYi = Yi.boundaryField()[patchi]; // const scalarField& patchHaii = haii.boundaryField()[patchi]; - // const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi]; + const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi]; int patchSize = patchYi.size(); if (updateBoundaryFields) @@ -62,10 +63,16 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double)); } // memcpy(boundary_hai[i] + offset, &patchHaii[0], patchSize*sizeof(double)); - // memcpy(boundary_rhoD[i] + offset, &patchRhoDi[0], patchSize*sizeof(double)); + memcpy(boundary_rhoD[i] + offset, &patchRhoDi[0], patchSize*sizeof(double)); offset += patchSize; } + // if (i == 5) + // { + // Info << "rhoD_CPU" << rhoDi << endl; + // } + } + // Info << "rhoD from nuEff\n" << nuEff * rho / 0.7 << endl; updateBoundaryFields = false; volScalarField mut_sct = turbulence->mut().ref()/Sct; double *boundary_mutsct = nullptr; @@ -77,6 +84,17 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); int patchSize = patchMut_sct.size(); memcpy(boundary_mutsct + offset, &patchMut_sct[0], patchSize*sizeof(double)); offset += patchSize; + + // debug + // const fvsPatchScalarField& pw = mesh.surfaceInterpolation::weights().boundaryField()[patchi]; + // Field valueInternalCoeffs = Y[5].boundaryField()[patchi].valueInternalCoeffs(pw); + // Field valueBoundaryCoeffs = Y[5].boundaryField()[patchi].valueBoundaryCoeffs(pw); + // Field gradientInternalCoeffs = Y[5].boundaryField()[patchi].gradientInternalCoeffs(); + // Field gradientBoundaryCoeffs = Y[5].boundaryField()[patchi].gradientBoundaryCoeffs(); + // Info << "valueInternalCoeffs\n" << valueInternalCoeffs << endl; + // Info << "valueBoundaryCoeffs\n" << valueBoundaryCoeffs << endl; + // Info << "gradientInternalCoeffs\n" << gradientInternalCoeffs << endl; + // Info << "gradientBoundaryCoeffs\n" << gradientBoundaryCoeffs << endl; } end1 = std::clock(); time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); @@ -93,6 +111,7 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); YEqn_GPU.fvm_div_phi(); YEqn_GPU.fvm_div_phiUc(); YEqn_GPU.sync(); + // YEqn_GPU.checkValue(true, "of_output_H2.txt"); end1 = std::clock(); time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); @@ -161,6 +180,21 @@ if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM); : (fvm::laplacian(DEff(), Yi) + combustion->R(Yi)) ) ); + // debug + if (i == 0) + { + printf("b = %lf\n", YiEqn.source()[1]); + forAll(mesh.boundary(), patchi){ + labelUList sub_boundary = mesh.boundary()[patchi].faceCells(); + forAll(sub_boundary, i){ + if (sub_boundary[i] == 0){ + printf("patchID = %d, faceID = %d\n", patchi, i); + printf("YiEqn = %.15lf\n", YiEqn.boundaryCoeffs()[patchi][i]); + } + } + } + } + end1 = std::clock(); time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); // YiEqn.relax(); diff --git a/applications/solvers/dfLowMachFoam/createdfSolver.H b/applications/solvers/dfLowMachFoam/createdfSolver.H index 0707dc629..3c5593833 100644 --- a/applications/solvers/dfLowMachFoam/createdfSolver.H +++ b/applications/solvers/dfLowMachFoam/createdfSolver.H @@ -36,31 +36,30 @@ int num_boundary_cells; string settingPath; settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UEqnSettingPath", string("")); -dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0], -&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex, patchTypes); - #ifdef GPUSolver_ -dfRhoEqn rhoEqn_GPU(dfDataBase); -dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath); -dfYEqn YEqn_GPU(dfDataBase, "dDDI", settingPath, inertIndex); -dfEEqn EEqn_GPU(dfDataBase, "dDDI", settingPath); + dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0], + &mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex, patchTypes); + dfRhoEqn rhoEqn_GPU(dfDataBase); + dfUEqn UEqn_GPU(dfDataBase, "dDDI", settingPath); + dfYEqn YEqn_GPU(dfDataBase, "dDDI", settingPath, inertIndex); + dfEEqn EEqn_GPU(dfDataBase, "dDDI", settingPath); -double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init, *boundary_velocity_init, - *boundary_nuEff_init, *boundary_rho_init, *ueqn_laplac_internalCoeffs_init, *ueqn_laplac_boundaryCoeffs_init, *boundary_phi_init; -cudaMallocHost(&ueqn_internalCoeffs_init, 3*num_boundary_faces*sizeof(double)); -cudaMallocHost(&ueqn_boundaryCoeffs_init, 3*num_boundary_faces*sizeof(double)); -cudaMallocHost(&ueqn_laplac_internalCoeffs_init, 3*num_boundary_faces*sizeof(double)); -cudaMallocHost(&ueqn_laplac_boundaryCoeffs_init, 3*num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_velocity_init, 3*num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_pressure_init, num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double)); + double *ueqn_internalCoeffs_init, *ueqn_boundaryCoeffs_init, *boundary_pressure_init, *boundary_velocity_init, + *boundary_nuEff_init, *boundary_rho_init, *ueqn_laplac_internalCoeffs_init, *ueqn_laplac_boundaryCoeffs_init, *boundary_phi_init; + cudaMallocHost(&ueqn_internalCoeffs_init, 3*num_boundary_faces*sizeof(double)); + cudaMallocHost(&ueqn_boundaryCoeffs_init, 3*num_boundary_faces*sizeof(double)); + cudaMallocHost(&ueqn_laplac_internalCoeffs_init, 3*num_boundary_faces*sizeof(double)); + cudaMallocHost(&ueqn_laplac_boundaryCoeffs_init, 3*num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_velocity_init, 3*num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_pressure_init, num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double)); -double *boundary_alphaEff, *boundary_K, *boundary_gradient; -cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double)); -cudaMallocHost(&boundary_gradient, num_boundary_faces * sizeof(double)); + double *boundary_alphaEff, *boundary_K, *boundary_gradient; + cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double)); + cudaMallocHost(&boundary_gradient, num_boundary_faces * sizeof(double)); -bool updateBoundaryFields = true; // make sure that the boundary fields do H2D copy at 1st timestep + bool updateBoundaryFields = true; // make sure that the boundary fields do H2D copy at 1st timestep #endif diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C index cfffa8058..9982f3989 100644 --- a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C +++ b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C @@ -67,8 +67,8 @@ Description #include #include "upwind.H" -#define GPUSolver_ -// #define CPUSolver_ +// #define GPUSolver_ +#define CPUSolver_ // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // @@ -285,13 +285,15 @@ int main(int argc, char *argv[]) Info<< "other Time = " << time_monitor_other << " s" << endl; Info<< "rho Equations = " << time_monitor_rho << " s" << endl; Info<< "U Equations = " << time_monitor_U << " s" << endl; - Info<< "Y Equations = " << time_monitor_Y << " s" << endl; + Info<< "Y Equations = " << time_monitor_Y - time_monitor_chem << " s" << endl; Info<< "E Equations = " << time_monitor_E << " s" << endl; - Info<< "percentage of rho/U/Y/E = " << (time_monitor_E + time_monitor_Y + time_monitor_U + time_monitor_rho) / loop_time * 100 << " %" << endl; Info<< "p Equations = " << time_monitor_p << " s" << endl; Info<< "chemistry correctThermo = " << time_monitor_chemistry_correctThermo << " s" << endl; Info<< "turbulence correct = " << time_monitor_turbulence_correct << " s" << endl; Info<< "combustion correct(in Y) = " << time_monitor_chem << " s" << endl; + Info<< "percentage of chemistry = " << time_monitor_chem / loop_time * 100 << " %" << endl; + Info<< "percentage of rho/U/Y/E = " << (time_monitor_E + time_monitor_Y + time_monitor_U + time_monitor_rho - time_monitor_chem) / loop_time * 100 << " %" << endl; + Info<< "========Time details of each equation======="<< endl; diff --git a/src_gpu/dfEEqn.cu b/src_gpu/dfEEqn.cu index fd56e25be..d96bf0109 100644 --- a/src_gpu/dfEEqn.cu +++ b/src_gpu/dfEEqn.cu @@ -189,7 +189,7 @@ __global__ void eeqn_fvm_laplacian_uncorrected_boundary(int num_boundary_cells, double gamma = boundary_alphaEff[i]; double gamma_magsf = gamma * boundary_magsf[i]; internal_coeffs += gamma_magsf * gradient_internal_coeffs[i]; - boundary_coeffs += gamma_magsf * gradient_boundary_coeffs[i]; + boundary_coeffs -= gamma_magsf * gradient_boundary_coeffs[i]; } A_csr_output[csr_index] = A_csr_input[csr_index] + internal_coeffs * sign; @@ -631,8 +631,8 @@ void dfEEqn::add_to_source() void dfEEqn::checkValue(bool print) { - checkCudaErrors(cudaMemcpyAsync(h_A_csr, d_A_csr, csr_value_vec_bytes, cudaMemcpyDeviceToHost, stream)); - checkCudaErrors(cudaMemcpyAsync(h_b, d_b, cell_vec_bytes, cudaMemcpyDeviceToHost, stream)); + checkCudaErrors(cudaMemcpyAsync(h_A_csr, d_A_csr, (num_faces + num_cells) * sizeof(double), cudaMemcpyDeviceToHost, stream)); + checkCudaErrors(cudaMemcpyAsync(h_b, d_b, num_cells * sizeof(double), cudaMemcpyDeviceToHost, stream)); // Synchronize stream checkCudaErrors(cudaStreamSynchronize(stream)); @@ -644,7 +644,7 @@ void dfEEqn::checkValue(bool print) fprintf(stderr, "h_b[%d]: %.16lf\n", i, h_b[i]); } - char *input_file = "of_output.txt"; + char *input_file = "of_output_E.txt"; FILE *fp = fopen(input_file, "rb+"); if (fp == NULL) { @@ -672,9 +672,9 @@ void dfEEqn::checkValue(bool print) if (print) { for (int i = 0; i < (num_faces + num_cells); i++) - fprintf(stderr, "h_A_of_vec_1mtx[%d]: %.16lf\n", i, h_A_of_vec_1mtx[i]); + printf("h_A_of_vec_1mtx[%d]: %.16lf\n", i, h_A_of_vec_1mtx[i]); for (int i = 0; i < num_cells; i++) - fprintf(stderr, "h_b_of_vec[%d]: %.16lf\n", i, h_b_of_vec[i]); + printf("h_b_of_vec[%d]: %.16lf\n", i, h_b_of_vec[i]); } // check diff --git a/src_gpu/dfMatrixDataBase.H b/src_gpu/dfMatrixDataBase.H index 262391051..8efb4bf62 100644 --- a/src_gpu/dfMatrixDataBase.H +++ b/src_gpu/dfMatrixDataBase.H @@ -44,7 +44,8 @@ inline void checkVectorEqual(int count, const double* basevec, double* vec, doub enum boundaryConditions{ zeroGradient, fixedValue, - coupled + coupled, + empty }; void constructBoundarySelector(std::vector& patchTypeSelector, const std::string& patchTypeStr, const int patchSize); diff --git a/src_gpu/dfMatrixDataBase.cu b/src_gpu/dfMatrixDataBase.cu index 9c9f9a02a..d4f5a7ab0 100644 --- a/src_gpu/dfMatrixDataBase.cu +++ b/src_gpu/dfMatrixDataBase.cu @@ -9,6 +9,7 @@ void constructBoundarySelector(std::vector& patchTypeSelector, const std::s static std::map BCMap = { {"zeroGradient", zeroGradient}, {"fixedValue", fixedValue}, + {"empty", empty}, {"coupled", coupled} }; auto iter = BCMap.find(patchTypeStr); @@ -31,11 +32,17 @@ void constructBoundarySelector(std::vector& patchTypeSelector, const std::s patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end()); break; } - case coupled: + case empty: { tmpSelector.resize(patchSize, 2); patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end()); break; } + case coupled: + { + tmpSelector.resize(patchSize, 3); + patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end()); + break; + } } } diff --git a/src_gpu/dfUEqn.cu b/src_gpu/dfUEqn.cu index 0a4d80756..8e39d423f 100644 --- a/src_gpu/dfUEqn.cu +++ b/src_gpu/dfUEqn.cu @@ -821,9 +821,9 @@ __global__ void fvm_laplacian_uncorrected_vector_boundary(int num_cells, int num internal_coeffs_x += gamma_magsf * gradient_internal_coeffs[i * 3 + 0]; internal_coeffs_y += gamma_magsf * gradient_internal_coeffs[i * 3 + 1]; internal_coeffs_z += gamma_magsf * gradient_internal_coeffs[i * 3 + 2]; - boundary_coeffs_x += gamma_magsf * gradient_boundary_coeffs[i * 3 + 0]; - boundary_coeffs_y += gamma_magsf * gradient_boundary_coeffs[i * 3 + 1]; - boundary_coeffs_z += gamma_magsf * gradient_boundary_coeffs[i * 3 + 2]; + boundary_coeffs_x -= gamma_magsf * gradient_boundary_coeffs[i * 3 + 0]; + boundary_coeffs_y -= gamma_magsf * gradient_boundary_coeffs[i * 3 + 1]; + boundary_coeffs_z -= gamma_magsf * gradient_boundary_coeffs[i * 3 + 2]; } ueqn_internal_coeffs[cell_index * 3 + 0] += internal_coeffs_x * sign; @@ -997,39 +997,59 @@ __global__ void addDiagDivVolume(int num_cells, const int *csr_row_index, __global__ void ueqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi, double *internal_coeffs, double *boundary_coeffs, double *laplac_internal_coeffs, - double *laplac_boundary_coeffs, const int *U_patch_type) + double *laplac_boundary_coeffs, const int *U_patch_type, + const double *boundary_velocity, const double *boundary_deltaCoeffs) { int index = blockDim.x * blockIdx.x + threadIdx.x; if (index >= num_boundary_faces) return; int patchIndex = U_patch_type[index]; - double valueInternalCoeffs, valueBoundaryCoeffs, gradientInternalCoeffs, gradientBoundaryCoeffs; - switch (patchIndex) - { - case 0: // zeroGradient - { - valueInternalCoeffs = 1.; - valueBoundaryCoeffs = 0.; - gradientInternalCoeffs = 0.; - gradientBoundaryCoeffs = 0.; - break; + if (patchIndex == 0) { // zeroGradient + double bouPhi = boundary_phi[index]; + internal_coeffs[index * 3 + 0] = bouPhi * 1.; // valueInternalCoeffs = 1. + internal_coeffs[index * 3 + 1] = bouPhi * 1.; + internal_coeffs[index * 3 + 2] = bouPhi * 1.; + boundary_coeffs[index * 3 + 0] = -bouPhi * 0.; // valueBoundaryCoeffs = 0. + boundary_coeffs[index * 3 + 1] = -bouPhi * 0.; + boundary_coeffs[index * 3 + 2] = -bouPhi * 0.; + laplac_internal_coeffs[index * 3 + 0] = 0.; // gradientInternalCoeffs = 0. + laplac_internal_coeffs[index * 3 + 1] = 0.; + laplac_internal_coeffs[index * 3 + 2] = 0.; + laplac_boundary_coeffs[index * 3 + 0] = 0.; // gradientBoundaryCoeffs = 0. + laplac_boundary_coeffs[index * 3 + 1] = 0.; + laplac_boundary_coeffs[index * 3 + 2] = 0.; + } else if (patchIndex == 1) { // fixedValue + double bouDeltaCoeffs = boundary_deltaCoeffs[index]; + double bouPhi = boundary_phi[index]; + internal_coeffs[index * 3 + 0] = bouPhi * 0.; // valueInternalCoeffs = 0. + internal_coeffs[index * 3 + 1] = bouPhi * 0.; + internal_coeffs[index * 3 + 2] = bouPhi * 0.; + boundary_coeffs[index * 3 + 0] = -bouPhi * boundary_velocity[index * 3 + 0]; // valueBoundaryCoeffs = boundaryValue + boundary_coeffs[index * 3 + 1] = -bouPhi * boundary_velocity[index * 3 + 0]; + boundary_coeffs[index * 3 + 2] = -bouPhi * boundary_velocity[index * 3 + 0]; + laplac_internal_coeffs[index * 3 + 0] = -1 * bouDeltaCoeffs; // gradientInternalCoeffs = -1 * boundaryDeltaCoeffs + laplac_internal_coeffs[index * 3 + 1] = -1 * bouDeltaCoeffs; + laplac_internal_coeffs[index * 3 + 2] = -1 * bouDeltaCoeffs; + laplac_boundary_coeffs[index * 3 + 0] = bouDeltaCoeffs * boundary_velocity[index * 3 + 0]; // gradientBoundaryCoeffs = boundaryDeltaCoeffs * boundaryValue + laplac_boundary_coeffs[index * 3 + 1] = bouDeltaCoeffs * boundary_velocity[index * 3 + 1]; + laplac_boundary_coeffs[index * 3 + 2] = bouDeltaCoeffs * boundary_velocity[index * 3 + 2]; + } else if (patchIndex == 2) { // empty + double bouPhi = boundary_phi[index]; + internal_coeffs[index * 3 + 0] = bouPhi * 0.; // valueInternalCoeffs = 0. + internal_coeffs[index * 3 + 1] = bouPhi * 0.; + internal_coeffs[index * 3 + 2] = bouPhi * 0.; + boundary_coeffs[index * 3 + 0] = -bouPhi * 0.; // valueBoundaryCoeffs = 0. + boundary_coeffs[index * 3 + 1] = -bouPhi * 0.; + boundary_coeffs[index * 3 + 2] = -bouPhi * 0.; + laplac_internal_coeffs[index * 3 + 0] = 0.; // gradientInternalCoeffs = 0. + laplac_internal_coeffs[index * 3 + 1] = 0.; + laplac_internal_coeffs[index * 3 + 2] = 0.; + laplac_boundary_coeffs[index * 3 + 0] = 0.; // gradientBoundaryCoeffs = 0. + laplac_boundary_coeffs[index * 3 + 1] = 0.; + laplac_boundary_coeffs[index * 3 + 2] = 0.; } - // TODO implement coupled and fixedValue conditions - } - - internal_coeffs[index * 3 + 0] = boundary_phi[index] * valueInternalCoeffs; - internal_coeffs[index * 3 + 1] = boundary_phi[index] * valueInternalCoeffs; - internal_coeffs[index * 3 + 2] = boundary_phi[index] * valueInternalCoeffs; - boundary_coeffs[index * 3 + 0] = -boundary_phi[index] * valueBoundaryCoeffs; - boundary_coeffs[index * 3 + 1] = -boundary_phi[index] * valueBoundaryCoeffs; - boundary_coeffs[index * 3 + 2] = -boundary_phi[index] * valueBoundaryCoeffs; - laplac_internal_coeffs[index * 3 + 0] = gradientInternalCoeffs; - laplac_internal_coeffs[index * 3 + 1] = gradientInternalCoeffs; - laplac_internal_coeffs[index * 3 + 2] = gradientInternalCoeffs; - laplac_boundary_coeffs[index * 3 + 0] = gradientBoundaryCoeffs; - laplac_boundary_coeffs[index * 3 + 1] = gradientBoundaryCoeffs; - laplac_boundary_coeffs[index * 3 + 2] = gradientBoundaryCoeffs; + // TODO implement coupled conditions } __global__ void ueqn_correct_BoundaryConditions_kernel(int num_cells, int num_boundary_cells, @@ -1058,6 +1078,8 @@ __global__ void ueqn_correct_BoundaryConditions_kernel(int num_cells, int num_bo } case 1: break; + case 2: + break; // TODO implement coupled conditions } } @@ -1128,6 +1150,14 @@ void dfUEqn::fvm_div(double *boundary_pressure_init, double *boundary_velocity_i dataBase_.d_boundary_velocity_init, dataBase_.d_boundary_pressure, dataBase_.d_boundary_velocity, dataBase_.d_boundary_nuEff_init, dataBase_.d_boundary_nuEff, dataBase_.d_boundary_rho_init, dataBase_.d_boundary_rho); + // initialize boundary coeffs (must after the update of d_boundary_velocity) + threads_per_block = 1024; + blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block; + ueqn_update_BoundaryCoeffs_kernel<<>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi, + dataBase_.d_internal_coeffs, dataBase_.d_boundary_coeffs, + dataBase_.d_laplac_internal_coeffs, dataBase_.d_laplac_boundary_coeffs, + dataBase_.d_boundary_UpatchType, dataBase_.d_boundary_velocity, dataBase_.d_boundary_deltaCoeffs); + blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block; fvm_div_internal<<>>(num_cells, num_faces, d_A_csr_row_index, d_A_csr_diag_index, @@ -1263,13 +1293,6 @@ void dfUEqn::initializeTimeStep() // initialize matrix value checkCudaErrors(cudaMemsetAsync(d_A_csr, 0, csr_value_vec_bytes, stream)); checkCudaErrors(cudaMemsetAsync(d_b, 0, cell_vec_bytes, stream)); - // initialize boundary coeffs - size_t threads_per_block = 1024; - size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block; - ueqn_update_BoundaryCoeffs_kernel<<>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi, - dataBase_.d_internal_coeffs, dataBase_.d_boundary_coeffs, - dataBase_.d_laplac_internal_coeffs, dataBase_.d_laplac_boundary_coeffs, - dataBase_.d_boundary_UpatchType); } void dfUEqn::checkValue(bool print) diff --git a/src_gpu/dfYEqn.cu b/src_gpu/dfYEqn.cu index 197352a4e..7da385a5d 100644 --- a/src_gpu/dfYEqn.cu +++ b/src_gpu/dfYEqn.cu @@ -30,43 +30,43 @@ __global__ void fvc_grad_internal(int num_cells, int num_species, double vol = volume[index]; for (int s = 0; s < num_species; s++) { - double own_cell_Y = species[num_cells * s + index]; - double grad_bx = 0; - double grad_by = 0; - double grad_bz = 0; - for (int i = row_index; i < next_row_index; i++) - { - int inner_index = i - row_index; - // lower - if (inner_index < diag_index) + double own_cell_Y = species[num_cells * s + index]; + double grad_bx = 0; + double grad_by = 0; + double grad_bz = 0; + for (int i = row_index; i < next_row_index; i++) { - int neighbor_index = neighbor_offset + inner_index; - double w = weight[neighbor_index]; - double sfx = face_vector[neighbor_index * 3 + 0]; - double sfy = face_vector[neighbor_index * 3 + 1]; - double sfz = face_vector[neighbor_index * 3 + 2]; - int neighbor_cell_id = csr_col_index[row_index + inner_index]; - double neighbor_cell_Y = species[num_cells * s + neighbor_cell_id]; - double face_Y = w * (neighbor_cell_Y - own_cell_Y) + own_cell_Y; - grad_bx -= face_Y * sfx; - grad_by -= face_Y * sfy; - grad_bz -= face_Y * sfz; - } - // upper - if (inner_index > diag_index) - { - int neighbor_index = neighbor_offset + inner_index - 1; - double w = weight[neighbor_index]; - double sfx = face_vector[neighbor_index * 3 + 0]; - double sfy = face_vector[neighbor_index * 3 + 1]; - double sfz = face_vector[neighbor_index * 3 + 2]; - int neighbor_cell_id = csr_col_index[row_index + inner_index]; - double neighbor_cell_Y = species[num_cells * s + neighbor_cell_id]; - double face_Y = w * (own_cell_Y - neighbor_cell_Y) + neighbor_cell_Y; - grad_bx += face_Y * sfx; - grad_by += face_Y * sfy; - grad_bz += face_Y * sfz; - } + int inner_index = i - row_index; + // lower + if (inner_index < diag_index) + { + int neighbor_index = neighbor_offset + inner_index; + double w = weight[neighbor_index]; + double sfx = face_vector[neighbor_index * 3 + 0]; + double sfy = face_vector[neighbor_index * 3 + 1]; + double sfz = face_vector[neighbor_index * 3 + 2]; + int neighbor_cell_id = csr_col_index[row_index + inner_index]; + double neighbor_cell_Y = species[num_cells * s + neighbor_cell_id]; + double face_Y = w * (neighbor_cell_Y - own_cell_Y) + own_cell_Y; + grad_bx -= face_Y * sfx; + grad_by -= face_Y * sfy; + grad_bz -= face_Y * sfz; + } + // upper + if (inner_index > diag_index) + { + int neighbor_index = neighbor_offset + inner_index - 1; + double w = weight[neighbor_index]; + double sfx = face_vector[neighbor_index * 3 + 0]; + double sfy = face_vector[neighbor_index * 3 + 1]; + double sfz = face_vector[neighbor_index * 3 + 2]; + int neighbor_cell_id = csr_col_index[row_index + inner_index]; + double neighbor_cell_Y = species[num_cells * s + neighbor_cell_id]; + double face_Y = w * (own_cell_Y - neighbor_cell_Y) + neighbor_cell_Y; + grad_bx += face_Y * sfx; + grad_by += face_Y * sfy; + grad_bz += face_Y * sfz; + } } grady[num_cells * s * 3 + index * 3 + 0] = grad_bx / vol; grady[num_cells * s * 3 + index * 3 + 1] = grad_by / vol; @@ -124,7 +124,8 @@ __global__ void fvc_grad_boundary(int num_cells, int num_boundary_cells, int num __global__ void correct_boundary_conditions(int num_cells, int num_boundary_cells, int num_boundary_faces, int num_species, const int *boundary_cell_offset, const int *boundary_cell_id, const double *boundary_sf, const double *mag_sf, - const double *grady, double* boundary_grady) + const double *grady, double* boundary_grady, const double *boundary_deltaCoeffs, + const double *Y, const double *boundary_Y, const int *Y_patch_type) { int index = blockDim.x * blockIdx.x + threadIdx.x; if (index >= num_boundary_cells) @@ -135,22 +136,30 @@ __global__ void correct_boundary_conditions(int num_cells, int num_boundary_cell int cell_index = boundary_cell_id[cell_offset]; for (int s = 0; s < num_species; s++) { - // initialize boundary_sumYDiffError - double grady_x = grady[num_cells * s * 3 + cell_index * 3 + 0]; - double grady_y = grady[num_cells * s * 3 + cell_index * 3 + 1]; - double grady_z = grady[num_cells * s * 3 + cell_index * 3 + 2]; + // initialize boundary_sumYDiffError + double grady_x = grady[num_cells * s * 3 + cell_index * 3 + 0]; + double grady_y = grady[num_cells * s * 3 + cell_index * 3 + 1]; + double grady_z = grady[num_cells * s * 3 + cell_index * 3 + 2]; + double internal_Y = Y[num_cells * s + cell_index]; - for (int i = cell_offset; i < next_cell_offset; i++) - { - double n_x = boundary_sf[i * 3 + 0] / mag_sf[i]; - double n_y = boundary_sf[i * 3 + 1] / mag_sf[i]; - double n_z = boundary_sf[i * 3 + 2] / mag_sf[i]; - double sn_grad = 0; - double grad_correction = sn_grad - (n_x * grady_x + n_y * grady_y + n_z * grady_z); - boundary_grady[num_boundary_faces * s * 3 + i * 3 + 0] = grady_x + grad_correction * n_x; - boundary_grady[num_boundary_faces * s * 3 + i * 3 + 1] = grady_y + grad_correction * n_y; - boundary_grady[num_boundary_faces * s * 3 + i * 3 + 2] = grady_z + grad_correction * n_z; - } + for (int i = cell_offset; i < next_cell_offset; i++) + { + double n_x = boundary_sf[i * 3 + 0] / mag_sf[i]; + double n_y = boundary_sf[i * 3 + 1] / mag_sf[i]; + double n_z = boundary_sf[i * 3 + 2] / mag_sf[i]; + int patchIndex = Y_patch_type[i]; + double sn_grad; + if (patchIndex == 0) { // zeroGradient + sn_grad = 0; + } else if (patchIndex == 1) { // fixedValue + sn_grad = boundary_deltaCoeffs[i] * (boundary_Y[num_boundary_faces * s + i] - internal_Y); + } + // TODO: implement other BCs + double grad_correction = sn_grad - (n_x * grady_x + n_y * grady_y + n_z * grady_z); + boundary_grady[num_boundary_faces * s * 3 + i * 3 + 0] = grady_x + grad_correction * n_x; + boundary_grady[num_boundary_faces * s * 3 + i * 3 + 1] = grady_y + grad_correction * n_y; + boundary_grady[num_boundary_faces * s * 3 + i * 3 + 2] = grady_z + grad_correction * n_z; + } } } @@ -201,15 +210,17 @@ __global__ void sumError_boundary(int num_boundary_faces, int num_species, const if (index >= num_boundary_faces) return; - int permute_index; + int permute_index, permute_index_Y; if (!uploadData) { - permute_index = index; + permute_index_Y = index; } else { - permute_index = bouPermedIndex[index]; + permute_index_Y = bouPermedIndex[index]; } + permute_index = bouPermedIndex[index]; + double sum_boundary_hai_rhoD_grady_x = 0; double sum_boundary_hai_rhoD_grady_y = 0; double sum_boundary_hai_rhoD_grady_z = 0; @@ -220,7 +231,7 @@ __global__ void sumError_boundary(int num_boundary_faces, int num_species, const for (int s = 0; s < num_species; s++) { double boundary_hai_value = boundary_hai[num_boundary_faces * s + permute_index]; double boundary_rhoD_value = boundary_rhoD[num_boundary_faces * s + permute_index]; - double boundary_y_value = boundary_y[num_boundary_faces * s + permute_index]; + double boundary_y_value = boundary_y[num_boundary_faces * s + permute_index_Y]; double boundary_grady_x = boundary_grady[num_boundary_faces * s * 3 + index * 3 + 0]; double boundary_grady_y = boundary_grady[num_boundary_faces * s * 3 + index * 3 + 1]; double boundary_grady_z = boundary_grady[num_boundary_faces * s * 3 + index * 3 + 2]; @@ -463,7 +474,7 @@ __global__ void fvm_div_boundary_scalar(int num_cells, int num_faces, int num_bo for (int i = 0; i < loop_size; i++) { internal_coeffs_own += boundary_phi[cell_offset + i] * internal_coeffs[num_boundary_faces * s + cell_offset + i]; - boundary_coeffs_own += -boundary_phi[cell_offset + i] * boundary_coeffs[num_boundary_faces + s + cell_offset + i]; + boundary_coeffs_own += -boundary_phi[cell_offset + i] * boundary_coeffs[num_boundary_faces * s + cell_offset + i]; } A_csr_output[mtxIndex * (num_cells + num_faces) + csr_index] = A_csr_input[mtxIndex * (num_cells + num_faces) + csr_index] + internal_coeffs_own; @@ -553,23 +564,23 @@ __global__ void fvm_laplacian_uncorrected_scalar_boundary(int num_cells, int num int mtxIndex = 0; for (int s = 0; s < num_species; s++) { - if (s == inertIndex) continue; - double internal_coeffs = 0; - double boundary_coeffs = 0; - for (int i = cell_offset; i < next_cell_offset; i++) - { - int permute_index = bouPermedIndex[i]; - double gamma = boundary_mut_sct[permute_index] + boundary_rhoD[num_boundary_faces * s + permute_index]; - double gamma_magsf = gamma * boundary_magsf[i]; - internal_coeffs += gamma_magsf * gradient_internal_coeffs[num_boundary_faces * s + i]; - boundary_coeffs += gamma_magsf * gradient_boundary_coeffs[num_boundary_faces * s + i]; - } + if (s == inertIndex) continue; + double internal_coeffs = 0; + double boundary_coeffs = 0; + for (int i = cell_offset; i < next_cell_offset; i++) + { + int permute_index = bouPermedIndex[i]; + double gamma = boundary_mut_sct[permute_index] + boundary_rhoD[num_boundary_faces * s + permute_index]; + double gamma_magsf = gamma * boundary_magsf[i]; + internal_coeffs += gamma_magsf * gradient_internal_coeffs[num_boundary_faces * s + i]; + boundary_coeffs -= gamma_magsf * gradient_boundary_coeffs[num_boundary_faces * s + i]; + } - A_csr_output[mtxIndex * (num_cells + num_faces) + csr_index] = - A_csr_input[mtxIndex * (num_cells + num_faces) + csr_index] + internal_coeffs * sign; - b_output[mtxIndex * num_cells + cell_index] = - b_input[mtxIndex * num_cells + cell_index] + boundary_coeffs * sign; - ++mtxIndex; + A_csr_output[mtxIndex * (num_cells + num_faces) + csr_index] = + A_csr_input[mtxIndex * (num_cells + num_faces) + csr_index] + internal_coeffs * sign; + b_output[mtxIndex * num_cells + cell_index] = + b_input[mtxIndex * num_cells + cell_index] + boundary_coeffs * sign; + ++mtxIndex; } } @@ -627,34 +638,48 @@ __global__ void fvc_laplacian_internal(int num_cells, int num_species, } __global__ void yeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, int num_species, - const double *boundary_phi, double *internal_coeffs, + const double *boundary_phi, double *internal_coeffs, double *boundary_coeffs, double *laplac_internal_coeffs, - double *laplac_boundary_coeffs, const int *U_patch_type) + double *laplac_boundary_coeffs, const int *Y_patch_type, + const double *boundary_Y, const double *boundary_deltaCoeffs, + const int* bouPermedIndex, bool uploadData) { int index = blockDim.x * blockIdx.x + threadIdx.x; if (index >= num_boundary_faces) return; - int patchIndex = U_patch_type[index]; - for (int s = 0; s < num_species; s++) { + int patchIndex = Y_patch_type[index]; double valueInternalCoeffs, valueBoundaryCoeffs, gradientInternalCoeffs, gradientBoundaryCoeffs; - switch (patchIndex) - { - case 0: // zeroGradient - { + for (int s = 0; s < num_species; s++) { + if (patchIndex == 0) { // zeroGradient valueInternalCoeffs = 1.; valueBoundaryCoeffs = 0.; gradientInternalCoeffs = 0.; gradientBoundaryCoeffs = 0.; - break; + } else if (patchIndex == 1) { // fixedValue + if (!uploadData) { + valueInternalCoeffs = 0.; + valueBoundaryCoeffs = boundary_Y[index + s * num_boundary_faces]; + gradientInternalCoeffs = -1 * boundary_deltaCoeffs[index]; + gradientBoundaryCoeffs = boundary_Y[index + s * num_boundary_faces] * boundary_deltaCoeffs[index]; + } else { + int permute_index = bouPermedIndex[index]; + valueInternalCoeffs = 0.; + valueBoundaryCoeffs = boundary_Y[permute_index + s * num_boundary_faces]; + gradientInternalCoeffs = -1 * boundary_deltaCoeffs[index]; + gradientBoundaryCoeffs = boundary_Y[permute_index + s * num_boundary_faces] * boundary_deltaCoeffs[index]; + } + } else if (patchIndex == 2) { // empty + valueInternalCoeffs = 0.; + valueBoundaryCoeffs = 0.; + gradientInternalCoeffs = 0.; + gradientBoundaryCoeffs = 0.; } - // TODO implement coupled and fixedValue conditions - } - - internal_coeffs[num_boundary_faces * s + index] = valueInternalCoeffs; - boundary_coeffs[num_boundary_faces * s + index] = valueBoundaryCoeffs; - laplac_internal_coeffs[num_boundary_faces * s + index] = gradientInternalCoeffs; - laplac_boundary_coeffs[num_boundary_faces * s + index] = gradientBoundaryCoeffs; + internal_coeffs[num_boundary_faces * s + index] = valueInternalCoeffs; + boundary_coeffs[num_boundary_faces * s + index] = valueBoundaryCoeffs; + laplac_internal_coeffs[num_boundary_faces * s + index] = gradientInternalCoeffs; + laplac_boundary_coeffs[num_boundary_faces * s + index] = gradientBoundaryCoeffs; + } } @@ -673,6 +698,7 @@ __global__ void yeqn_correct_BoundaryConditions_kernel(int num_cells, int num_bo for (int i = cell_offset; i < next_cell_offset; i++) { int patchIndex = Y_patch_type[i]; + switch (patchIndex) { case 0: // zeroGradient @@ -683,8 +709,8 @@ __global__ void yeqn_correct_BoundaryConditions_kernel(int num_cells, int num_bo } break; } - case 1: - break; + // case 1: + // break; // TODO implement coupled conditions } } @@ -697,11 +723,11 @@ __global__ void yeqn_calculate_rhoD_alpha_via_nuEff_internal(int num_cells, int if (index >= num_cells) return; - // rhoD = alpha (UnityLewis) - // alpha = nu * rho / 0.7 - for (int i = 0; i < num_species; i++) { - rhoD[i * num_cells + index] = nuEff[index] * rho[index] / 0.7; - } + // // rhoD = alpha (UnityLewis) + // // alpha = nu * rho / 0.7 + // for (int i = 0; i < num_species; i++) { + // rhoD[i * num_cells + index] = nuEff[index] * rho[index] / 0.7; + // } alpha[index] = rhoD[index]; } @@ -713,9 +739,9 @@ __global__ void yeqn_calculate_rhoD_alpha_via_nuEff_boundary(int num_boundary_fa if (index >= num_boundary_face) return; - for (int i = 0; i < num_species; i++) { - boundary_rhoD[i * num_boundary_face + index] = boundary_nuEff[index] * boundary_rho[index] / 0.7; - } + // for (int i = 0; i < num_species; i++) { + // boundary_rhoD[i * num_boundary_face + index] = boundary_nuEff[index] * boundary_rho[index] / 0.7; + // } boundary_alpha[index] = boundary_rhoD[index]; } @@ -798,17 +824,19 @@ void dfYEqn::initializeTimeStep() // initialize variables in each time step checkCudaErrors(cudaMemsetAsync(d_psi, 0, cell_bytes * (num_species - 1), stream)); - // initialize boundary coeffs - size_t threads_per_block = 1024; - size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block; - yeqn_update_BoundaryCoeffs_kernel<<>>( - num_boundary_faces, num_species, - dataBase_.d_boundary_phi, - dataBase_.d_internal_coeffs_Y, - dataBase_.d_boundary_coeffs_Y, - dataBase_.d_laplac_internal_coeffs_Y, - dataBase_.d_laplac_boundary_coeffs_Y, - dataBase_.d_boundary_YpatchType); + // // initialize boundary coeffs + // size_t threads_per_block = 1024; + // size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block; + // yeqn_update_BoundaryCoeffs_kernel<<>>( + // num_boundary_faces, num_species, + // dataBase_.d_boundary_phi, + // dataBase_.d_internal_coeffs_Y, + // dataBase_.d_boundary_coeffs_Y, + // dataBase_.d_laplac_internal_coeffs_Y, + // dataBase_.d_laplac_boundary_coeffs_Y, + // dataBase_.d_boundary_YpatchType, + // d_boundary_Y, + // dataBase_.d_boundary_deltaCoeffs); } void dfYEqn::upwindWeight() @@ -851,15 +879,41 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto // checkCudaErrors(cudaMemcpyAsync(d_hai + i * num_cells, hai[i], cell_bytes, cudaMemcpyHostToDevice, stream)); // checkCudaErrors(cudaMemcpyAsync(d_boundary_hai + i * num_boundary_faces, boundary_hai[i], boundary_face_bytes, // cudaMemcpyHostToDevice, stream)); - // checkCudaErrors(cudaMemcpyAsync(d_rhoD + i * num_cells, rhoD[i], cell_bytes, cudaMemcpyHostToDevice, stream)); - // checkCudaErrors(cudaMemcpyAsync(d_boundary_rhoD + i * num_boundary_faces, boundary_rhoD[i], boundary_face_bytes, - // cudaMemcpyHostToDevice, stream)); + // TODO: check why rhoD has to upload even in the UnityLewis case + checkCudaErrors(cudaMemcpyAsync(d_rhoD + i * num_cells, rhoD[i], cell_bytes, cudaMemcpyHostToDevice, stream)); + checkCudaErrors(cudaMemcpyAsync(d_boundary_rhoD + i * num_boundary_faces, boundary_rhoD[i], boundary_face_bytes, + cudaMemcpyHostToDevice, stream)); } + // initialize boundary coeffs (must after the update of d_boundary_Y) + threads_per_block = 1024; + blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block; + yeqn_update_BoundaryCoeffs_kernel<<>>( + num_boundary_faces, num_species, + dataBase_.d_boundary_phi, + dataBase_.d_internal_coeffs_Y, + dataBase_.d_boundary_coeffs_Y, + dataBase_.d_laplac_internal_coeffs_Y, + dataBase_.d_laplac_boundary_coeffs_Y, + dataBase_.d_boundary_YpatchType, + d_boundary_Y, + dataBase_.d_boundary_deltaCoeffs, + dataBase_.d_bouPermedIndex, + uploadData); + threads_per_block = 1024; blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block; yeqn_calculate_rhoD_alpha_via_nuEff_internal<<>>( num_cells, num_species, d_rhoD, dataBase_.d_nuEff, dataBase_.d_rho_new, d_alpha); + // // check rhoD + // checkCudaErrors(cudaStreamSynchronize(stream)); + // double *h_rhoD = new double[num_cells]; + // cudaMemcpy(h_rhoD, d_rhoD + num_cells * 5, num_cells * sizeof(double), cudaMemcpyDeviceToHost); + // for (int i = 0; i < num_boundary_faces; i++) + // { + // printf("Y_H_rhoD[%d] = %e\n", i, h_rhoD[i]); + // } + blocks_per_grid = (num_boundary_faces + threads_per_block - 1) / threads_per_block; yeqn_calculate_rhoD_alpha_via_nuEff_boundary<<>>( num_boundary_faces, num_species, @@ -881,12 +935,21 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id, dataBase_.d_bouPermedIndex, dataBase_.d_boundary_face_vector, d_boundary_Y, dataBase_.d_volume, d_grady, d_grady, uploadData); + // check + // checkCudaErrors(cudaStreamSynchronize(stream)); + // double *h_grady = new double[num_cells * 3]; + // cudaMemcpy(h_grady, d_grady + num_cells * 3 * 5, (num_cells * 3) * sizeof(double), cudaMemcpyDeviceToHost); + // for (int i = 0; i < num_cells; i++) + // { + // printf("d_grady[%d] = (%lf, %lf, %lf)\n", i, h_grady[i * 3], h_grady[i * 3 + 1], h_grady[i * 3 + 2]); + // } + blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block; correct_boundary_conditions<<>>( num_cells, num_boundary_cells, num_boundary_faces, num_species, dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id, dataBase_.d_boundary_face_vector, dataBase_.d_boundary_face, - d_grady, d_boundary_grady); + d_grady, d_boundary_grady, dataBase_.d_boundary_deltaCoeffs, dataBase_.d_Y, d_boundary_Y, dataBase_.d_boundary_YpatchType); // sum(chemistry->hai(i)*chemistry->rhoD(i)*fvc::grad(Yi)) // sum(chemistry->rhoD(i)*fvc::grad(Yi)), also be called sumYDiffError @@ -1011,8 +1074,8 @@ void dfYEqn::fvm_div_phiUc() void dfYEqn::checkValue(bool print, char *filename) { - checkCudaErrors(cudaMemcpyAsync(h_A_csr, d_A_csr, (num_cells + num_faces) * sizeof(double), cudaMemcpyDeviceToHost, stream)); - checkCudaErrors(cudaMemcpyAsync(h_b, d_b, num_cells * sizeof(double), cudaMemcpyDeviceToHost, stream)); + checkCudaErrors(cudaMemcpyAsync(h_A_csr, d_A_csr + (num_cells + num_faces) * 5, (num_cells + num_faces) * sizeof(double), cudaMemcpyDeviceToHost, stream)); // H + checkCudaErrors(cudaMemcpyAsync(h_b, d_b + num_cells * 5, num_cells * sizeof(double), cudaMemcpyDeviceToHost, stream)); // H // Synchronize stream checkCudaErrors(cudaStreamSynchronize(stream));