diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H
index 93e60e7bf..c9cbd1e68 100644
--- a/applications/solvers/dfLowMachFoam/EEqn.H
+++ b/applications/solvers/dfLowMachFoam/EEqn.H
@@ -28,6 +28,8 @@
 #ifdef GPUSolver_
     start1 = std::clock();
     UEqn_GPU.updatePsi(&U[0][0]);
+    UEqn_GPU.correctBoundaryConditions();
+    U.correctBoundaryConditions();
     K = 0.5*magSqr(U);
     end1 = std::clock();
     time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
@@ -51,8 +53,10 @@
 
         const scalarField& patchK = K.boundaryField()[patchi];
         const scalarField& patchAlphaEff = alphaEff.boundaryField()[patchi];
+        const scalarField& patchGrad = he.boundaryField()[patchi].gradientBoundaryCoeffs(); // gradient_
         memcpy(boundary_K + eeqn_offset, &patchK[0], patchSize*sizeof(double));
         memcpy(boundary_alphaEff + eeqn_offset, &patchAlphaEff[0], patchSize*sizeof(double));
+        memcpy(boundary_gradient + eeqn_offset, &patchGrad[0], patchSize*sizeof(double));
 
         eeqn_offset += patchSize;
     }
@@ -67,7 +71,7 @@
     // prepare data on GPU
     start1 = std::clock();
     EEqn_GPU.prepare_data(&he.oldTime()[0], &K[0], &K.oldTime()[0], &alphaEff[0],
-            &dpdt[0], boundary_K, boundary_alphaEff);
+            &dpdt[0], boundary_K, boundary_alphaEff, boundary_gradient);
     EEqn_GPU.sync();
     end1 = std::clock();
     time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
diff --git a/applications/solvers/dfLowMachFoam/YEqn.H b/applications/solvers/dfLowMachFoam/YEqn.H
index 94fb6cf8d..6e6c9876a 100644
--- a/applications/solvers/dfLowMachFoam/YEqn.H
+++ b/applications/solvers/dfLowMachFoam/YEqn.H
@@ -39,7 +39,11 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
     {
         volScalarField& Yi = Y[i];
         Y_old[i] = &Yi.oldTime()[0];
-        cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double));
+        if (updateBoundaryFields)
+        {
+            cudaMallocHost(&boundary_Y[i], num_boundary_faces*sizeof(double));
+            Info << "here" << endl;
+        }
         const volScalarField& haii = chemistry->hai(i);
         const volScalarField& rhoDi = chemistry->rhoD(i);
         hai[i] = &haii[0];
@@ -54,12 +58,16 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
             const scalarField& patchRhoDi = rhoDi.boundaryField()[patchi];
             int patchSize = patchYi.size();
 
-            memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double));
+            if (updateBoundaryFields)
+            {
+                memcpy(boundary_Y[i] + offset, &patchYi[0], patchSize*sizeof(double));
+            }
             memcpy(boundary_hai[i] + offset, &patchHaii[0], patchSize*sizeof(double));
             memcpy(boundary_rhoD[i] + offset, &patchRhoDi[0], patchSize*sizeof(double));
             offset += patchSize;
         }
     }
+    updateBoundaryFields = false;
     volScalarField mut_sct = turbulence->mut().ref()/Sct;
     double *boundary_mutsct = nullptr;
     cudaMallocHost(&boundary_mutsct, num_boundary_faces*sizeof(double));
@@ -124,6 +132,7 @@ if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
         YEqn_GPU.updatePsi(&Yi[0], i);
         Yi.correctBoundaryConditions();
     }
+    YEqn_GPU.correctBoundaryConditions();
     end1 = std::clock();
     time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_YEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
diff --git a/applications/solvers/dfLowMachFoam/createdfSolver.H b/applications/solvers/dfLowMachFoam/createdfSolver.H
index e56376b28..0707dc629 100644
--- a/applications/solvers/dfLowMachFoam/createdfSolver.H
+++ b/applications/solvers/dfLowMachFoam/createdfSolver.H
@@ -7,6 +7,8 @@ std::vector<int> boundaryCellIndex;
 std::vector<double> boundary_face_vector_init;
 std::vector<double> boundary_face_init;
 std::vector<double> boundary_deltaCoeffs_init;
+std::vector<std::vector<int>> patchTypes;
+std::vector<int> patchTypeU, patchTypeY;
 int num_boundary_faces = 0;
 int patchSize;
 forAll(mesh.boundary(), patchi)
@@ -22,14 +24,20 @@ forAll(mesh.boundary(), patchi)
     boundary_face_init.insert(boundary_face_init.end(), &pMagSf[0], &pMagSf[0]+patchSize);
     boundary_deltaCoeffs_init.insert(boundary_deltaCoeffs_init.end(), &pDeltaCoeffs[0], &pDeltaCoeffs[0]+patchSize);
     num_boundary_faces += patchSize;
+
+    constructBoundarySelector(patchTypeU, U.boundaryField()[patchi].type(), patchSize);
+    constructBoundarySelector(patchTypeY, Y[0].boundaryField()[patchi].type(), patchSize);
 }
+patchTypes.emplace_back(patchTypeU);
+patchTypes.emplace_back(patchTypeY);
+
 int num_boundary_cells;
 
 string settingPath;
 settingPath = CanteraTorchProperties.subDict("AmgxSettings").lookupOrDefault("UEqnSettingPath", string(""));
 
 dfMatrixDataBase dfDataBase(num_surfaces, num_cells, num_boundary_faces, Y.size(), num_boundary_cells, &neighbour[0], &owner[0], &mesh.V()[0], &mesh.surfaceInterpolation::weights()[0], 
-&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex);
+&mesh.Sf()[0][0], &mesh.magSf()[0], &mesh.nonOrthDeltaCoeffs()[0], boundary_face_vector_init, boundary_face_init, boundary_deltaCoeffs_init, boundaryCellIndex, patchTypes);
 
 #ifdef GPUSolver_
 dfRhoEqn rhoEqn_GPU(dfDataBase);
@@ -49,12 +57,10 @@ cudaMallocHost(&boundary_nuEff_init, num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_rho_init, num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_phi_init, num_boundary_faces*sizeof(double));
 
-double *boundary_alphaEff, *boundary_K;
-double *eeqn_valueInternalCoeffs, *eeqn_valueBoundaryCoeffs, *eeqn_gradientInternalCoeffs, *eeqn_gradientBoundaryCoeffs;
+double *boundary_alphaEff, *boundary_K, *boundary_gradient;
 cudaMallocHost(&boundary_K, num_boundary_faces*sizeof(double));
 cudaMallocHost(&boundary_alphaEff, num_boundary_faces*sizeof(double));
-cudaMallocHost(&eeqn_valueInternalCoeffs, num_boundary_faces*sizeof(double));
-cudaMallocHost(&eeqn_valueBoundaryCoeffs, num_boundary_faces*sizeof(double));
-cudaMallocHost(&eeqn_gradientInternalCoeffs, num_boundary_faces*sizeof(double));
-cudaMallocHost(&eeqn_gradientBoundaryCoeffs, num_boundary_faces*sizeof(double));
+cudaMallocHost(&boundary_gradient, num_boundary_faces * sizeof(double));
+
+bool updateBoundaryFields = true; // make sure that the boundary fields do H2D copy at 1st timestep
 #endif
diff --git a/src_gpu/CMakeLists.txt b/src_gpu/CMakeLists.txt
index 1040cee14..686607bae 100644
--- a/src_gpu/CMakeLists.txt
+++ b/src_gpu/CMakeLists.txt
@@ -24,7 +24,8 @@ add_library(${PROJECT_NAME}
         dfRhoEqn.cu 
         dfYEqn.cu
         dfEEqn.cu
-        AmgXSolver.cu)
+        AmgXSolver.cu
+        dfMatrixDataBase.cpp)
 
 target_link_libraries(${PROJECT_NAME}
     ${MPI_LIBRARIES}
diff --git a/src_gpu/dfEEqn.H b/src_gpu/dfEEqn.H
index f15a44a49..61b2612cf 100644
--- a/src_gpu/dfEEqn.H
+++ b/src_gpu/dfEEqn.H
@@ -42,6 +42,8 @@ private:
     double *d_value_boundary_coeffs = nullptr;
     double *d_gradient_internal_coeffs = nullptr;
     double *d_gradient_boundary_coeffs = nullptr;
+    double *d_boundary_gradient_init = nullptr;
+    double *d_boundary_gradient = nullptr;
 
 public:
     dfEEqn();
@@ -49,7 +51,7 @@ public:
     ~dfEEqn();
 
     void prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
-            const double *dpdt, const double *boundary_K, const double *boundary_alphaEff);
+            const double *dpdt, const double *boundary_K, const double *boundary_alphaEff, const double *boundary_gradient);
 
     void initializeTimeStep();
 
diff --git a/src_gpu/dfEEqn.cu b/src_gpu/dfEEqn.cu
index f1aa3d14e..9cf1efe6c 100644
--- a/src_gpu/dfEEqn.cu
+++ b/src_gpu/dfEEqn.cu
@@ -399,9 +399,10 @@ __global__ void eeqn_add_to_source_kernel(int num_cells,
 
 __global__ void eeqn_boundaryPermutation(const int num_boundary_faces, const int *bouPermedIndex,
                                          const double *boundary_K_init,
-                                         const double *boundary_alphaEff_init,
+                                         const double *boundary_alphaEff_init, const double *boundary_gradient_init,
                                          double *boundary_K,
-                                         double *boundary_alphaEff)
+                                         double *boundary_alphaEff,
+                                         double *boundary_gradient)
 {
     int index = blockDim.x * blockIdx.x + threadIdx.x;
     if (index >= num_boundary_faces)
@@ -411,9 +412,12 @@ __global__ void eeqn_boundaryPermutation(const int num_boundary_faces, const int
 
     boundary_K[index] = boundary_K_init[p];
     boundary_alphaEff[index] = boundary_alphaEff_init[p];
+    boundary_gradient[index] = boundary_gradient_init[p];
 }
 
-__global__ void eeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi, double *internal_coeffs,
+__global__ void eeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi,
+                                                  double *gradient, const double *boundary_deltaCoeffs,
+                                                  double *internal_coeffs,
                                                   double *boundary_coeffs, double *laplac_internal_coeffs,
                                                   double *laplac_boundary_coeffs)
 {
@@ -421,11 +425,12 @@ __global__ void eeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const
     if (index >= num_boundary_faces)
         return;
 
-    // zeroGradient
+    double grad = gradient[index];
+    // energyGradient
     double valueInternalCoeffs = 1.;
-    double valueBoundaryCoeffs = 0.;
+    double valueBoundaryCoeffs = grad / boundary_deltaCoeffs[index];
     double gradientInternalCoeffs = 0.;
-    double gradientBoundaryCoeffs = 0.;
+    double gradientBoundaryCoeffs = grad;
 
     internal_coeffs[index] = boundary_phi[index] * valueInternalCoeffs;
     boundary_coeffs[index] = -boundary_phi[index] * valueBoundaryCoeffs;
@@ -440,7 +445,7 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
     ESolver = new AmgXSolver(modeStr, cfgFile);
 
     stream = dataBase_.stream;
-    //checkCudaErrors(cudaEventCreate(&event));
+    // checkCudaErrors(cudaEventCreate(&event));
 
     num_cells = dataBase_.num_cells;
     cell_bytes = dataBase_.cell_bytes;
@@ -473,6 +478,8 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
     checkCudaErrors(cudaMalloc((void **)&d_boundary_K, boundary_face_bytes));
     checkCudaErrors(cudaMalloc((void **)&d_boundary_alphaEff_init, boundary_face_bytes));
     checkCudaErrors(cudaMalloc((void **)&d_boundary_alphaEff, boundary_face_bytes));
+    checkCudaErrors(cudaMalloc((void **)&d_boundary_gradient_init, boundary_face_bytes));
+    checkCudaErrors(cudaMalloc((void **)&d_boundary_gradient, boundary_face_bytes));
 
     checkCudaErrors(cudaMalloc((void **)&d_value_internal_coeffs_init, boundary_face_bytes));
     checkCudaErrors(cudaMalloc((void **)&d_value_boundary_coeffs_init, boundary_face_bytes));
@@ -485,7 +492,7 @@ dfEEqn::dfEEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
 }
 
 void dfEEqn::prepare_data(const double *he_old, const double *K, const double *K_old, const double *alphaEff,
-                          const double *dpdt, const double *boundary_K, const double *boundary_alphaEff)
+                          const double *dpdt, const double *boundary_K, const double *boundary_alphaEff, const double *boundary_gradient)
 {
     // TODO not real async copy now, because some host array are not in pinned memory.
 
@@ -499,12 +506,13 @@ void dfEEqn::prepare_data(const double *he_old, const double *K, const double *K
     // copy and permutate boundary variable
     checkCudaErrors(cudaMemcpyAsync(d_boundary_K_init, boundary_K, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
     checkCudaErrors(cudaMemcpyAsync(d_boundary_alphaEff_init, boundary_alphaEff, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
+    checkCudaErrors(cudaMemcpyAsync(d_boundary_gradient_init, boundary_gradient, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
 
     size_t threads_per_block = 1024;
     size_t blocks_per_grid = (num_boundary_faces + threads_per_block - 1) / threads_per_block;
     eeqn_boundaryPermutation<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_faces, dataBase_.d_bouPermedIndex,
-            d_boundary_K_init, d_boundary_alphaEff_init,
-            d_boundary_K, d_boundary_alphaEff);
+                                                                                d_boundary_K_init, d_boundary_alphaEff_init, d_boundary_gradient_init,
+                                                                                d_boundary_K, d_boundary_alphaEff, d_boundary_gradient);
 }
 
 void dfEEqn::initializeTimeStep()
@@ -516,6 +524,7 @@ void dfEEqn::initializeTimeStep()
     size_t threads_per_block = 1024;
     size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block;
     eeqn_update_BoundaryCoeffs_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi,
+                                                                                         d_boundary_gradient, dataBase_.d_boundary_deltaCoeffs,
                                                                                          d_value_internal_coeffs, d_value_boundary_coeffs,
                                                                                          d_gradient_internal_coeffs, d_gradient_boundary_coeffs);
 }
@@ -577,14 +586,14 @@ void dfEEqn::fvc_div_vector()
     size_t threads_per_block = 512;
     size_t blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
     eeqn_fvc_div_vector_internal<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells,
-            d_A_csr_row_index, d_A_csr_col_index, d_A_csr_diag_index,
-            dataBase_.d_face_vector, dataBase_.d_hDiffCorrFlux, dataBase_.d_weight,
-            1., d_b, d_b);
+                                                                                    d_A_csr_row_index, d_A_csr_col_index, d_A_csr_diag_index,
+                                                                                    dataBase_.d_face_vector, dataBase_.d_hDiffCorrFlux, dataBase_.d_weight,
+                                                                                    1., d_b, d_b);
     blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
     eeqn_fvc_div_vector_boundary<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_cells,
-            dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
-            dataBase_.d_boundary_face_vector, dataBase_.d_boundary_hDiffCorrFlux,
-            1., d_b, d_b);
+                                                                                    dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
+                                                                                    dataBase_.d_boundary_face_vector, dataBase_.d_boundary_hDiffCorrFlux,
+                                                                                    1., d_b, d_b);
 }
 
 void dfEEqn::fvc_div_phi_scalar()
@@ -611,8 +620,8 @@ void dfEEqn::add_to_source()
     size_t blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
     // " + fvc::ddt(rho，K)" is on the left side of "==", thus should minus from source.
     eeqn_add_to_source_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells,
-            //1., d_dpdt, -1., d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
-            1., d_dpdt, -1., dataBase_.d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
+                                                                                 // 1., d_dpdt, -1., d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
+                                                                                 1., d_dpdt, -1., dataBase_.d_diffAlphaD, dataBase_.d_volume, d_b, d_b);
 }
 
 void dfEEqn::checkValue(bool print)
@@ -689,10 +698,10 @@ void dfEEqn::solve()
     num_iteration++;
 
     checkCudaErrors(cudaMemcpyAsync(h_he_new, d_he_old, cell_bytes, cudaMemcpyDeviceToHost, stream));
-    //checkCudaErrors(cudaEventRecord(event, stream));
-    // checkCudaErrors(cudaStreamSynchronize(stream));
-    // for (size_t i = 0; i < num_cells; i++)
-    //     fprintf(stderr, "h_he_after[%d]: %.16lf\n", i, h_he_new[i]);
+    // checkCudaErrors(cudaEventRecord(event, stream));
+    //  checkCudaErrors(cudaStreamSynchronize(stream));
+    //  for (size_t i = 0; i < num_cells; i++)
+    //      fprintf(stderr, "h_he_after[%d]: %.16lf\n", i, h_he_new[i]);
 }
 
 void dfEEqn::sync()
@@ -703,7 +712,7 @@ void dfEEqn::sync()
 void dfEEqn::updatePsi(double *Psi)
 {
     checkCudaErrors(cudaStreamSynchronize(stream));
-    //checkCudaErrors(cudaEventSynchronize(event));
+    // checkCudaErrors(cudaEventSynchronize(event));
     memcpy(Psi, h_he_new, cell_bytes);
 }
 
@@ -727,6 +736,8 @@ dfEEqn::~dfEEqn()
     checkCudaErrors(cudaFree(d_boundary_K));
     checkCudaErrors(cudaFree(d_boundary_alphaEff_init));
     checkCudaErrors(cudaFree(d_boundary_alphaEff));
+    checkCudaErrors(cudaFree(d_boundary_gradient_init));
+    checkCudaErrors(cudaFree(d_boundary_gradient));
 
     checkCudaErrors(cudaFree(d_value_internal_coeffs_init));
     checkCudaErrors(cudaFree(d_value_boundary_coeffs_init));
@@ -737,5 +748,5 @@ dfEEqn::~dfEEqn()
     checkCudaErrors(cudaFree(d_gradient_internal_coeffs));
     checkCudaErrors(cudaFree(d_gradient_boundary_coeffs));
 
-    //checkCudaErrors(cudaEventDestroy(event));
+    // checkCudaErrors(cudaEventDestroy(event));
 }
diff --git a/src_gpu/dfMatrixDataBase.H b/src_gpu/dfMatrixDataBase.H
index 8ed3dcd32..3fd5eb69e 100644
--- a/src_gpu/dfMatrixDataBase.H
+++ b/src_gpu/dfMatrixDataBase.H
@@ -11,6 +11,7 @@
 #include <map>
 #include <iostream>
 #include <ctime>
+#include <cmath>
 
 
 static const char *_cudaGetErrorEnum(cudaError_t error) {
@@ -40,6 +41,13 @@ inline void checkVectorEqual(int count, const double* basevec, double* vec, doub
     }
 }
 
+enum boundaryConditions{
+    zeroGradient,
+    fixedValue,
+    coupled
+};
+
+void constructBoundarySelector(std::vector<int>& patchTypeSelector, const std::string& patchTypeStr, const int patchSize);
 
 struct dfMatrixDataBase
 {
@@ -138,6 +146,8 @@ struct dfMatrixDataBase
     double *d_boundary_mut_sct = nullptr;
 
     double *d_boundary_hDiffCorrFlux = nullptr;
+    int *d_boundary_UpatchType = nullptr;
+    int *d_boundary_YpatchType = nullptr;
 
     std::vector<int> boundPermutationList;
     std::vector<double> ueqn_internalCoeffs, ueqn_boundaryCoeffs;
@@ -145,6 +155,8 @@ struct dfMatrixDataBase
     std::vector<double> boundary_pressure;
     std::vector<double> boundary_face;
     std::vector<double> boundary_deltaCoeffs;
+    std::vector<std::vector<int>> patch_type_init;
+    std::vector<std::vector<int>> patch_type;
 
     // - the device pointer to the permutated index list
     std::vector<int> permedIndex;
@@ -202,9 +214,9 @@ struct dfMatrixDataBase
     dfMatrixDataBase(int num_surfaces, int num_cells, int num_boundary_faces, int num_species, int & num_boundary_cells_output,
         const int *neighbour, const int *owner, const double* volume, const double* weight, const double* face_vector, const double* face, 
         const double* deltaCoeffs, std::vector<double> boundary_face_vector_init, std::vector<double> boundary_face_init, 
-        std::vector<double> boundary_deltaCoeffs_init, std::vector<int> boundary_cell_id_init)
+        std::vector<double> boundary_deltaCoeffs_init, std::vector<int> boundary_cell_id_init, std::vector<std::vector<int>> patch_type_init)
     : num_cells(num_cells), num_faces(num_surfaces*2), num_surfaces(num_surfaces), num_species(num_species), num_iteration(0),
-      num_boundary_faces(num_boundary_faces), h_volume(volume)
+      num_boundary_faces(num_boundary_faces), h_volume(volume), patch_type_init(patch_type_init)
     {
         // create cuda stream
         checkCudaErrors(cudaStreamCreate(&stream));
@@ -403,6 +415,10 @@ struct dfMatrixDataBase
         boundary_face.resize(num_boundary_faces);
         boundary_deltaCoeffs.resize(num_boundary_faces);
 
+        patch_type.resize(2);
+        patch_type[0].resize(num_boundary_faces);
+        patch_type[1].resize(num_boundary_faces);
+
         /**
          * 4. permutation list for field variables
         */
@@ -466,6 +482,8 @@ struct dfMatrixDataBase
             boundary_face_vector[3*i+2] = boundary_face_vector_init[3*boundPermutationList[i]+2];
             boundary_face[i] = boundary_face_init[boundPermutationList[i]];
             boundary_deltaCoeffs[i] = boundary_deltaCoeffs_init[boundPermutationList[i]];
+            patch_type[0][i] = patch_type_init[0][boundPermutationList[i]];
+            patch_type[1][i] = patch_type_init[1][boundPermutationList[i]];
         }
         h_boundary_face_vector = boundary_face_vector.data();
         h_boundary_face = boundary_face.data();
@@ -535,6 +553,8 @@ struct dfMatrixDataBase
         checkCudaErrors(cudaMalloc((void**)&d_boundary_rho, boundary_face_bytes));
         checkCudaErrors(cudaMalloc((void**)&d_boundary_rho_init, boundary_face_bytes));
         checkCudaErrors(cudaMalloc((void**)&d_boundary_mut_sct, boundary_face_bytes));
+        checkCudaErrors(cudaMalloc((void**)&d_boundary_UpatchType, boundary_face_index_bytes));
+        checkCudaErrors(cudaMalloc((void**)&d_boundary_YpatchType, boundary_face_index_bytes));
         for (size_t i = 0; i < num_species; ++i){
             checkCudaErrors(cudaMalloc((void**)&d_boundary_Y_vector[i], boundary_face_bytes));
             checkCudaErrors(cudaMalloc((void**)&d_boundary_Y_init_vector[i], boundary_face_bytes));
@@ -587,6 +607,8 @@ struct dfMatrixDataBase
         checkCudaErrors(cudaMemcpyAsync(d_boundary_face_vector, h_boundary_face_vector, boundary_face_vec_bytes, cudaMemcpyHostToDevice, stream));
         checkCudaErrors(cudaMemcpyAsync(d_boundary_face, h_boundary_face, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
         checkCudaErrors(cudaMemcpyAsync(d_boundary_deltaCoeffs, h_boundary_deltaCoeffs, boundary_face_bytes, cudaMemcpyHostToDevice, stream));
+        checkCudaErrors(cudaMemcpyAsync(d_boundary_UpatchType, patch_type[0].data(), boundary_face_index_bytes, cudaMemcpyHostToDevice, stream));
+        checkCudaErrors(cudaMemcpyAsync(d_boundary_YpatchType, patch_type[1].data(), boundary_face_index_bytes, cudaMemcpyHostToDevice, stream));
 
         checkCudaErrors(cudaMemcpyAsync(d_permedIndex, permedIndex.data(), face_index_bytes, cudaMemcpyHostToDevice, stream));
         checkCudaErrors(cudaMemcpyAsync(d_bouPermedIndex, boundPermutationList.data(), boundary_face_index_bytes, cudaMemcpyHostToDevice, stream));
diff --git a/src_gpu/dfMatrixDataBase.cpp b/src_gpu/dfMatrixDataBase.cpp
new file mode 100644
index 000000000..9c9f9a02a
--- /dev/null
+++ b/src_gpu/dfMatrixDataBase.cpp
@@ -0,0 +1,41 @@
+#include "dfMatrixDataBase.H"
+
+
+void constructBoundarySelector(std::vector<int>& patchTypeSelector, const std::string& patchTypeStr,
+    const int patchSize)
+{
+    boundaryConditions patchCondition;
+    std::vector<int> tmpSelector;
+    static std::map<std::string, boundaryConditions> BCMap = {
+        {"zeroGradient", zeroGradient},
+        {"fixedValue", fixedValue},
+        {"coupled", coupled}
+    };
+    auto iter = BCMap.find(patchTypeStr);
+    if (iter != BCMap.end()) {
+        patchCondition = iter->second;
+    } else {
+        throw std::runtime_error("Unknown boundary condition: " + patchTypeStr);
+    }
+    // zeroGradient labeled as 0, fixedValue labeled as 1, coupled labeled as 2
+    switch (patchCondition){
+        case zeroGradient:
+        {
+            tmpSelector.resize(patchSize, 0);
+            patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end());
+            break;
+        }
+        case fixedValue:
+        {
+            tmpSelector.resize(patchSize, 1);
+            patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end());
+            break;
+        }
+        case coupled:
+        {
+            tmpSelector.resize(patchSize, 2);
+            patchTypeSelector.insert(patchTypeSelector.end(), tmpSelector.begin(), tmpSelector.end());
+            break;
+        }
+    }
+}
diff --git a/src_gpu/dfUEqn.H b/src_gpu/dfUEqn.H
index 14288209b..ec739db5e 100644
--- a/src_gpu/dfUEqn.H
+++ b/src_gpu/dfUEqn.H
@@ -54,6 +54,8 @@ public:
 
     void updatePsi(double *Psi);
 
+    void correctBoundaryConditions();
+
     void correctPsi(double *Psi);
 
     void initializeTimeStep();
diff --git a/src_gpu/dfUEqn.cu b/src_gpu/dfUEqn.cu
index 6aa479796..0a4d80756 100644
--- a/src_gpu/dfUEqn.cu
+++ b/src_gpu/dfUEqn.cu
@@ -997,17 +997,26 @@ __global__ void addDiagDivVolume(int num_cells, const int *csr_row_index,
 
 __global__ void ueqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi, double *internal_coeffs,
                                                   double *boundary_coeffs, double *laplac_internal_coeffs,
-                                                  double *laplac_boundary_coeffs)
+                                                  double *laplac_boundary_coeffs, const int *U_patch_type)
 {
     int index = blockDim.x * blockIdx.x + threadIdx.x;
     if (index >= num_boundary_faces)
         return;
 
-    // zeroGradient
-    double valueInternalCoeffs = 1.;
-    double valueBoundaryCoeffs = 0.;
-    double gradientInternalCoeffs = 0.;
-    double gradientBoundaryCoeffs = 0.;
+    int patchIndex = U_patch_type[index];
+    double valueInternalCoeffs, valueBoundaryCoeffs, gradientInternalCoeffs, gradientBoundaryCoeffs;
+    switch (patchIndex)
+    {
+    case 0: // zeroGradient
+    {
+        valueInternalCoeffs = 1.;
+        valueBoundaryCoeffs = 0.;
+        gradientInternalCoeffs = 0.;
+        gradientBoundaryCoeffs = 0.;
+        break;
+    }
+        // TODO implement coupled and fixedValue conditions
+    }
 
     internal_coeffs[index * 3 + 0] = boundary_phi[index] * valueInternalCoeffs;
     internal_coeffs[index * 3 + 1] = boundary_phi[index] * valueInternalCoeffs;
@@ -1023,6 +1032,37 @@ __global__ void ueqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const
     laplac_boundary_coeffs[index * 3 + 2] = gradientBoundaryCoeffs;
 }
 
+__global__ void ueqn_correct_BoundaryConditions_kernel(int num_cells, int num_boundary_cells,
+                                                       const int *boundary_cell_offset, const int *boundary_cell_id,
+                                                       const double *velocity, double *boundary_velocity, const int *U_patch_type)
+{
+    int index = blockDim.x * blockIdx.x + threadIdx.x;
+    if (index >= num_boundary_cells)
+        return;
+
+    int cell_offset = boundary_cell_offset[index];
+    int next_cell_offset = boundary_cell_offset[index + 1];
+    int cell_index = boundary_cell_id[cell_offset];
+
+    for (int i = cell_offset; i < next_cell_offset; i++)
+    {
+        int patchIndex = U_patch_type[i];
+        switch (patchIndex)
+        {
+            case 0: // zeroGradient
+            {
+                boundary_velocity[i * 3 + 0] = velocity[cell_index];
+                boundary_velocity[i * 3 + 1] = velocity[num_cells * 1 + cell_index];
+                boundary_velocity[i * 3 + 2] = velocity[num_cells * 2 + cell_index];
+                break;
+            }
+            case 1:
+                break;
+            // TODO implement coupled conditions
+        }
+    }
+}
+
 // constructor
 dfUEqn::dfUEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile)
     : dataBase_(dataBase)
@@ -1228,7 +1268,8 @@ void dfUEqn::initializeTimeStep()
     size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block;
     ueqn_update_BoundaryCoeffs_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi,
                                                                                          dataBase_.d_internal_coeffs, dataBase_.d_boundary_coeffs,
-                                                                                         dataBase_.d_laplac_internal_coeffs, dataBase_.d_laplac_boundary_coeffs);
+                                                                                         dataBase_.d_laplac_internal_coeffs, dataBase_.d_laplac_boundary_coeffs,
+                                                                                         dataBase_.d_boundary_UpatchType);
 }
 
 void dfUEqn::checkValue(bool print)
@@ -1360,6 +1401,15 @@ void dfUEqn::updatePsi(double *Psi)
     memcpy(Psi, h_psi, cell_vec_bytes);
 }
 
+void dfUEqn::correctBoundaryConditions()
+{
+    size_t threads_per_block = 1024;
+    size_t blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
+    ueqn_correct_BoundaryConditions_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells, num_boundary_cells, 
+                                                                                              dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
+                                                                                              d_psi, dataBase_.d_boundary_velocity, dataBase_.d_boundary_UpatchType);
+}
+
 // correct volecity in pEqn
 void dfUEqn::correctPsi(double *Psi)
 {
diff --git a/src_gpu/dfYEqn.H b/src_gpu/dfYEqn.H
index 2bc645cdd..4d1c87457 100644
--- a/src_gpu/dfYEqn.H
+++ b/src_gpu/dfYEqn.H
@@ -32,6 +32,8 @@ private:
     double *d_boundary_Y = nullptr;
     double *d_grady, *d_boundary_grady = nullptr;
 
+    bool uploadBoundaryY = true;
+
 public:
     dfYEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile, const int inertIndex);
 
@@ -57,6 +59,8 @@ public:
 
     void solve();
 
+    void correctBoundaryConditions();
+
     void sync();
 
     void updatePsi(double *Psi, int speciesIndex);
diff --git a/src_gpu/dfYEqn.cu b/src_gpu/dfYEqn.cu
index 85db544da..22fe5e1aa 100644
--- a/src_gpu/dfYEqn.cu
+++ b/src_gpu/dfYEqn.cu
@@ -74,7 +74,7 @@ __global__ void fvc_grad_internal(int num_cells,
 __global__ void fvc_grad_boundary(int num_boundary_cells,
         const int *boundary_cell_offset, const int *boundary_cell_id, const int *bouPermedIndex,
         const double *boundary_face_vector, const double *boundary_species_init,
-        const double *volume, double *grady)
+        const double *volume, double *grady, bool uploadBoundaryY)
 {
     int index = blockDim.x * blockIdx.x + threadIdx.x;
     if (index >= num_boundary_cells)
@@ -93,8 +93,16 @@ __global__ void fvc_grad_boundary(int num_boundary_cells,
         double sfx = boundary_face_vector[i * 3 + 0];
         double sfy = boundary_face_vector[i * 3 + 1];
         double sfz = boundary_face_vector[i * 3 + 2];
-        int permute_index = bouPermedIndex[i];
-        double face_Y = boundary_species_init[permute_index];
+        double face_Y;
+        if (!uploadBoundaryY)
+        {
+            face_Y = boundary_species_init[i];
+        }
+        else
+        {
+            int permute_index = bouPermedIndex[i];
+            face_Y = boundary_species_init[permute_index];
+        }
         grad_bx += face_Y * sfx;
         grad_by += face_Y * sfy;
         grad_bz += face_Y * sfz;
@@ -158,13 +166,21 @@ __global__ void sumError_internal(int num_cells,
 
 __global__ void sumError_boundary(int num_boundary_faces, const int *bouPermedIndex,
         const double *boundary_hai, const double *boundary_rhoD, const double *boundary_y, const double *boundary_grady,
-        double *sum_boundary_hai_rhoD_grady, double *sum_boundary_rhoD_grady, double *sum_boundary_hai_y)
+        double *sum_boundary_hai_rhoD_grady, double *sum_boundary_rhoD_grady, double *sum_boundary_hai_y, bool uploadBoundaryY)
 {
     int index = blockDim.x * blockIdx.x + threadIdx.x;
     if (index >= num_boundary_faces)
         return;
 
-    int permute_index = bouPermedIndex[index];
+    int permute_index;
+    if (!uploadBoundaryY)
+    {
+        permute_index = index;
+    }
+    else
+    {
+        permute_index = bouPermedIndex[index];
+    }
     sum_boundary_hai_rhoD_grady[index * 3 + 0] += boundary_hai[permute_index] * boundary_rhoD[permute_index] * boundary_grady[index * 3 + 0];
     sum_boundary_hai_rhoD_grady[index * 3 + 1] += boundary_hai[permute_index] * boundary_rhoD[permute_index] * boundary_grady[index * 3 + 1];
     sum_boundary_hai_rhoD_grady[index * 3 + 2] += boundary_hai[permute_index] * boundary_rhoD[permute_index] * boundary_grady[index * 3 + 2];
@@ -377,8 +393,6 @@ __global__ void fvm_div_boundary_scalar(int num_cells, int num_faces, int num_bo
     double boundary_coeffs_own = 0;
     for (int i = 0; i < loop_size; i++)
     {
-        // zeroGradient
-        internal_coeffs[cell_offset + i] = 1.;
         internal_coeffs_own += boundary_phi[cell_offset + i] * internal_coeffs[cell_offset + i];
         boundary_coeffs_own += -boundary_phi[cell_offset + i] * boundary_coeffs[cell_offset + i];
     }
@@ -462,10 +476,8 @@ __global__ void fvm_laplacian_uncorrected_scalar_boundary(int num_boundary_cells
         int permute_index = bouPermedIndex[i];
         double gamma = boundary_scalar0[permute_index] + boundary_scalar1[permute_index];
         double gamma_magsf = gamma * boundary_magsf[i];
-        // internal_coeffs += gamma_magsf * gradient_internal_coeffs[i * 3 + 0];
-        // boundary_coeffs += gamma_magsf * gradient_boundary_coeffs[i * 3 + 0];
-        internal_coeffs += gamma_magsf * 0.;
-        boundary_coeffs += gamma_magsf * 0.;
+        internal_coeffs += gamma_magsf * gradient_internal_coeffs[i];
+        boundary_coeffs += gamma_magsf * gradient_boundary_coeffs[i];
     }
 
     A_csr_output[csr_index] = A_csr_input[csr_index] + internal_coeffs * sign;
@@ -521,6 +533,67 @@ __global__ void fvc_laplacian_internal(int num_cells,
     output[index] += sum / vol;
 }
 
+__global__ void yeqn_update_BoundaryCoeffs_kernel(int num_boundary_faces, const double *boundary_phi, double *internal_coeffs,
+                                                  double *boundary_coeffs, double *laplac_internal_coeffs,
+                                                  double *laplac_boundary_coeffs, const int *U_patch_type)
+{
+    int index = blockDim.x * blockIdx.x + threadIdx.x;
+    if (index >= num_boundary_faces)
+        return;
+
+    int patchIndex = U_patch_type[index];
+    double valueInternalCoeffs, valueBoundaryCoeffs, gradientInternalCoeffs, gradientBoundaryCoeffs;
+    switch (patchIndex)
+    {
+        case 0: // zeroGradient
+        {
+            valueInternalCoeffs = 1.;
+            valueBoundaryCoeffs = 0.;
+            gradientInternalCoeffs = 0.;
+            gradientBoundaryCoeffs = 0.;
+            break;
+        }
+        // TODO implement coupled and fixedValue conditions
+    }
+
+    internal_coeffs[index] = valueInternalCoeffs;
+    boundary_coeffs[index] = valueBoundaryCoeffs;
+    laplac_internal_coeffs[index] = gradientInternalCoeffs;
+    laplac_boundary_coeffs[index] = gradientBoundaryCoeffs;
+}
+
+__global__ void yeqn_correct_BoundaryConditions_kernel(int num_cells, int num_boundary_cells, int num_boundary_faces, int num_species,
+                                                       const int *boundary_cell_offset, const int *boundary_cell_id,
+                                                       const double *species, double *boundary_species, const int *Y_patch_type)
+{
+    int index = blockDim.x * blockIdx.x + threadIdx.x;
+    if (index >= num_boundary_cells)
+        return;
+
+    int cell_offset = boundary_cell_offset[index];
+    int next_cell_offset = boundary_cell_offset[index + 1];
+    int cell_index = boundary_cell_id[cell_offset];
+
+    for (int i = cell_offset; i < next_cell_offset; i++)
+    {
+        int patchIndex = Y_patch_type[i];
+        switch (patchIndex)
+        {
+            case 0: // zeroGradient
+            {
+                for (int speciesID = 0; speciesID < num_species; speciesID++)
+                {
+                    boundary_species[speciesID * num_boundary_faces + i] = species[speciesID * num_cells + cell_index];
+                }
+                break;
+            }
+            case 1:
+                break;
+            // TODO implement coupled conditions
+        }
+    }
+}
+
 dfYEqn::dfYEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std::string &cfgFile, const int inertIndex)
     : dataBase_(dataBase), inertIndex(inertIndex)
 {
@@ -554,7 +627,7 @@ dfYEqn::dfYEqn(dfMatrixDataBase &dataBase, const std::string &modeStr, const std
     checkCudaErrors(cudaMalloc((void **)&d_mut_Sct, cell_bytes));
     checkCudaErrors(cudaMalloc((void **)&d_boundary_mut_sct, boundary_face_bytes));
 
-    checkCudaErrors(cudaMalloc((void **)&d_boundary_Y, boundary_face_bytes));
+    checkCudaErrors(cudaMalloc((void **)&d_boundary_Y, boundary_face_bytes * num_species));
 
     checkCudaErrors(cudaMalloc((void **)&d_hai, cell_bytes));
     checkCudaErrors(cudaMalloc((void **)&d_boundary_hai, boundary_face_bytes));
@@ -591,6 +664,17 @@ void dfYEqn::initializeTimeStep()
     checkCudaErrors(cudaMemsetAsync(d_b, 0, cell_bytes * (num_species - 1), stream));
     // initialize variables in each time step
     checkCudaErrors(cudaMemsetAsync(d_psi, 0, cell_bytes * (num_species - 1), stream));
+
+    // initialize boundary coeffs
+    size_t threads_per_block = 1024;
+    size_t blocks_per_grid = (dataBase_.num_boundary_faces + threads_per_block - 1) / threads_per_block;
+    for (int i = 0; i < num_species; i++)
+    {
+        yeqn_update_BoundaryCoeffs_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(dataBase_.num_boundary_faces, dataBase_.d_boundary_phi,
+                                                                                             dataBase_.d_internal_coeffs_Y_vector[i], dataBase_.d_boundary_coeffs_Y_vector[i],
+                                                                                             dataBase_.d_laplac_internal_coeffs_Y_vector[i], dataBase_.d_laplac_boundary_coeffs_Y_vector[i],
+                                                                                             dataBase_.d_boundary_YpatchType);
+    }
 }
 
 void dfYEqn::upwindWeight()
@@ -622,10 +706,14 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto
 
     size_t threads_per_block, blocks_per_grid;
     int mtxIndex = 0;
+    
     for (size_t i = 0; i < num_species; ++i)
     {
         checkCudaErrors(cudaMemcpyAsync(dataBase_.d_Y + i * num_cells, Y_old[i], cell_bytes, cudaMemcpyHostToDevice, stream));
-        checkCudaErrors(cudaMemcpyAsync(d_boundary_Y, boundary_Y[i], boundary_face_bytes, cudaMemcpyHostToDevice, stream));
+        if (uploadBoundaryY)
+        {
+            checkCudaErrors(cudaMemcpyAsync(d_boundary_Y + i * num_boundary_faces, boundary_Y[i], boundary_face_bytes, cudaMemcpyHostToDevice, stream));
+        }
         checkCudaErrors(cudaMemcpyAsync(d_hai, hai[i], cell_bytes, cudaMemcpyHostToDevice, stream));
         checkCudaErrors(cudaMemcpyAsync(d_boundary_hai, boundary_hai[i], boundary_face_bytes, cudaMemcpyHostToDevice, stream));
         checkCudaErrors(cudaMemcpyAsync(d_rhoD, rhoD[i], cell_bytes, cudaMemcpyHostToDevice, stream));
@@ -644,8 +732,8 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto
         blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
         fvc_grad_boundary<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_cells,
                 dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id, dataBase_.d_bouPermedIndex,
-                dataBase_.d_boundary_face_vector, d_boundary_Y,
-                dataBase_.d_volume, d_grady);
+                dataBase_.d_boundary_face_vector, d_boundary_Y + i * num_boundary_faces,
+                dataBase_.d_volume, d_grady, uploadBoundaryY);
         blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
         correct_boundary_conditions<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_cells,
                 dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
@@ -662,8 +750,8 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto
         blocks_per_grid = (num_boundary_faces + threads_per_block - 1) / threads_per_block;
         sumError_boundary<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_boundary_faces,
                 dataBase_.d_bouPermedIndex,
-                d_boundary_hai, d_boundary_rhoD, d_boundary_Y, d_boundary_grady,
-                d_sum_boundary_hai_rhoD_grady, d_sum_boundary_rhoD_grady, d_sum_boundary_hai_y);
+                d_boundary_hai, d_boundary_rhoD, d_boundary_Y + num_boundary_faces, d_boundary_grady,
+                d_sum_boundary_hai_rhoD_grady, d_sum_boundary_rhoD_grady, d_sum_boundary_hai_y, uploadBoundaryY);
 
         // compute diffAlphaD
         blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
@@ -692,6 +780,7 @@ void dfYEqn::fvm_laplacian_and_sumYDiffError_diffAlphaD_hDiffCorrFlux(std::vecto
         }
         ++mtxIndex;
     }
+    uploadBoundaryY = false;
 
     blocks_per_grid = (num_cells + threads_per_block - 1) / threads_per_block;
     calculate_hDiffCorrFlux<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells,
@@ -892,6 +981,21 @@ void dfYEqn::updatePsi(double *Psi, int speciesIndex)
     memcpy(Psi, h_psi + speciesIndex * num_cells, cell_bytes);
 }
 
+void dfYEqn::correctBoundaryConditions()
+{
+    size_t threads_per_block = 1024;
+    size_t blocks_per_grid = (num_boundary_cells + threads_per_block - 1) / threads_per_block;
+    yeqn_correct_BoundaryConditions_kernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(num_cells, num_boundary_cells, num_boundary_faces, num_species,
+                                                                                              dataBase_.d_boundary_cell_offset, dataBase_.d_boundary_cell_id,
+                                                                                              dataBase_.d_Y, d_boundary_Y, dataBase_.d_boundary_YpatchType);
+    // double *h_boundary_Y = new double[num_boundary_faces];
+    // cudaMemcpy(h_boundary_Y, d_boundary_Y, num_boundary_faces * sizeof(double), cudaMemcpyDeviceToHost);
+    // for (int i = 0; i < num_boundary_faces; i++)
+    // {
+    //     printf("h_boundary_GPU[%d] = %e\n", i, h_boundary_Y[i]);
+    // }
+}
+
 dfYEqn::~dfYEqn()
 {
 }