diff --git a/Allwclean b/Allwclean
index 2dc13b3ef..569262a61 100755
--- a/Allwclean
+++ b/Allwclean
@@ -25,3 +25,4 @@ wclean ./applications/solvers/dfHighSpeedFoam
 rm -rf src_orig/
 rm -rf bin/
 rm -rf lib/
+rm -rf src_gpu/build
diff --git a/applications/solvers/dfLowMachFoam/EEqn.H b/applications/solvers/dfLowMachFoam/EEqn.H
index a2e351074..6ebf7ca31 100644
--- a/applications/solvers/dfLowMachFoam/EEqn.H
+++ b/applications/solvers/dfLowMachFoam/EEqn.H
@@ -1,47 +1,5 @@
 {
     volScalarField& he = thermo.he();
-
-#ifdef CPUSolver_
-    start1 = std::clock();
-    //debug
-    // {
-    //     const fvPatchScalarField& hew = he.boundaryField()[5];
-    //     const basicThermo& bThermo = basicThermo::lookupThermo(hew);
-    //     const scalarField& pw = bThermo.p().boundaryField()[5];
-    //     fvPatchScalarField& Tw =
-    //         const_cast<fvPatchScalarField&>(bThermo.T().boundaryField()[5]);
-    //     scalarField& Tw_v = Tw;
-
-    //     Tw.evaluate();
-
-    //     Info << "internal field" <<bThermo.he(pw, Tw, mesh.boundary()[5].faceCells()) << endl;
-    //     Info << "boundary field" <<bThermo.he(pw, Tw, 5) << endl;
-    //     Info << "calculated grad" << mesh.boundary()[5].deltaCoeffs() * (bThermo.he(pw, Tw, 5) - bThermo.he(pw, Tw, mesh.boundary()[5].faceCells())) << endl;
-    // }
-
-
-    fvScalarMatrix EEqn
-    (
-        fvm::ddt(rho, he) + fvm::div(phi, he)
-      + fvc::ddt(rho, K) + fvc::div(phi, K)
-      - dpdt
-      - fvm::laplacian(turbulence->alphaEff(), he)
-      + diffAlphaD
-     ==
-        fvc::div(hDiffCorrFlux)
-    );
-    end1 = std::clock();
-    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
-    time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
-
-    // EEqn.relax();
-    start1 = std::clock();
-    EEqn.solve();
-    end1 = std::clock();
-    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
-    time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
-#endif
-
 #ifdef GPUSolver_
     start1 = std::clock();
     UEqn_GPU.updatePsi(&U[0][0]);
@@ -51,9 +9,7 @@
     end1 = std::clock();
     time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_UEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
-#endif
 
-#ifdef GPUSolver_
     // prepare data on CPU
     start1 = std::clock();
     start2 = std::clock();
@@ -148,5 +104,27 @@
     end1 = std::clock();
     time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_EEqn_correctBC += double(end1 - start1) / double(CLOCKS_PER_SEC);
+#else
+    start1 = std::clock();
+    fvScalarMatrix EEqn
+    (
+        fvm::ddt(rho, he) + fvm::div(phi, he)
+      + fvc::ddt(rho, K) + fvc::div(phi, K)
+      - dpdt
+      - fvm::laplacian(turbulence->alphaEff(), he)
+      + diffAlphaD
+     ==
+        fvc::div(hDiffCorrFlux)
+    );
+    end1 = std::clock();
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC);
+
+    // EEqn.relax();
+    start1 = std::clock();
+    EEqn.solve();
+    end1 = std::clock();
+    time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
 #endif
 }
diff --git a/applications/solvers/dfLowMachFoam/Make/options b/applications/solvers/dfLowMachFoam/Make/options
index 506f0d90a..e2a57bd00 100644
--- a/applications/solvers/dfLowMachFoam/Make/options
+++ b/applications/solvers/dfLowMachFoam/Make/options
@@ -9,6 +9,7 @@ EXE_INC = -std=c++14 \
     $(PFLAGS) $(PINC) \
     $(if $(LIBTORCH_ROOT),-DUSE_LIBTORCH,) \
     $(if $(PYTHON_INC_DIR),-DUSE_PYTORCH,) \
+    $(if $(AMGX_DIR),-DGPUSolver_,) \
     -I$(LIB_SRC)/transportModels/compressible/lnInclude \
     -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \
     -I$(LIB_SRC)/TurbulenceModels/turbulenceModels/lnInclude \
@@ -26,9 +27,9 @@ EXE_INC = -std=c++14 \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include,) \
     $(if $(LIBTORCH_ROOT),-I$(LIBTORCH_ROOT)/include/torch/csrc/api/include,) \
     $(PYTHON_INC_DIR) \
-    -I$(DF_ROOT)/src_gpu \
-    -I/usr/local/cuda-11.6/include \
-    -I$(AMGX_DIR)/include
+    $(if $(AMGX_DIR), -I$(DF_ROOT)/src_gpu,) \
+    $(if $(AMGX_DIR), -I/usr/local/cuda-11.6/include,) \
+    $(if $(AMGX_DIR), -I$(AMGX_DIR)/include,)
 
 EXE_LIBS = \
     -lcompressibleTransportModels \
@@ -50,7 +51,7 @@ EXE_LIBS = \
     $(if $(LIBTORCH_ROOT),$(DF_SRC)/dfChemistryModel/DNNInferencer/build/libDNNInferencer.so,) \
     $(if $(PYTHON_LIB_DIR),-L$(PYTHON_LIB_DIR),) \
     $(if $(PYTHON_LIB_DIR),-lpython3.8,) \
-    $(DF_ROOT)/src_gpu/build/libdfMatrix.so \
-    /usr/local/cuda-11.6/lib64/libcudart.so \
-    $(AMGX_DIR)/build/libamgxsh.so
+    $(if $(AMGX_DIR), /usr/local/cuda-11.6/lib64/libcudart.so,) \
+    $(if $(AMGX_DIR), $(DF_ROOT)/src_gpu/build/libdfMatrix.so,) \
+    $(if $(AMGX_DIR), $(AMGX_DIR)/build/libamgxsh.so,)
 
diff --git a/applications/solvers/dfLowMachFoam/UEqn.H b/applications/solvers/dfLowMachFoam/UEqn.H
index 65bc7e788..cac4218c0 100644
--- a/applications/solvers/dfLowMachFoam/UEqn.H
+++ b/applications/solvers/dfLowMachFoam/UEqn.H
@@ -79,16 +79,6 @@
         //     }
         // }
     // }
-    // const tmp<volTensorField> tgradU(fvc::grad(U));
-    // const volTensorField& gradU = tgradU();
-    // Pout << "gradU_of[1]\n" << gradU[1] << nl;
-    // Pout << "gradU_of[0][64]\n" << gradU.boundaryField()[0][64] << nl;
-    // Pout << "gradU_of[1][64]\n" << gradU.boundaryField()[1][64] << nl;
-    // Pout << "gradU_of[5][1]\n" << gradU.boundaryField()[5][1] << nl;
-    // Pout << "U[1][1]\n" << U[1] << nl;
-    // Pout << "Ubou[0][64]\n" << U.boundaryField()[0][64] << nl;
-    // Pout << "Ubou[1][64]\n" << U.boundaryField()[1][64] << nl;
-    // Pout << "Ubou[5][1]\n" << U.boundaryField()[5][1] << nl;
     // if (pimple.momentumPredictor())
     // {
     //     solve(UEqn);
diff --git a/applications/solvers/dfLowMachFoam/YEqn.H b/applications/solvers/dfLowMachFoam/YEqn.H
index 78a20397a..aff3fdbd6 100644
--- a/applications/solvers/dfLowMachFoam/YEqn.H
+++ b/applications/solvers/dfLowMachFoam/YEqn.H
@@ -12,20 +12,6 @@ tmp<fv::convectionScheme<scalar>> mvConvection
         mesh.divScheme("div(phi,Yi_h)")
     )
 );
-
-#ifdef CPUSolver_
-start1 = std::clock();
-forAll(Y, i)
-{
-    sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]);
-}
-// Info << "sumYDiffError\n" << sumYDiffError << endl;
-const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
-start1 = std::clock();
-time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
-time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
-#endif
-
 #ifdef GPUSolver_
     start1 = std::clock();
     UEqn_GPU.solve();
@@ -124,6 +110,17 @@ time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
     end1 = std::clock();
     time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
     time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);
+#else
+    start1 = std::clock();
+    forAll(Y, i)
+    {
+        sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]);
+    }
+    // Info << "sumYDiffError\n" << sumYDiffError << endl;
+    const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf();
+    start1 = std::clock();
+    time_monitor_YEqn += double(end1 - start1) / double(CLOCKS_PER_SEC);
+    time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC);  
 #endif
 
 //MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM);
diff --git a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
index 284dc88f0..34fa85ed3 100644
--- a/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
+++ b/applications/solvers/dfLowMachFoam/dfLowMachFoam.C
@@ -59,6 +59,7 @@ Description
 #include "basicThermo.H"
 #include "CombustionModel.H"
 
+#ifdef GPUSolver_
 #include "dfUEqn.H"
 #include "dfYEqn.H"
 #include "dfRhoEqn.H"
@@ -66,9 +67,7 @@ Description
 #include <cuda_runtime.h>
 #include <thread>
 #include "upwind.H"
-
-#define GPUSolver_
-// #define CPUSolver_
+#endif
 
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
@@ -79,10 +78,10 @@ int main(int argc, char *argv[])
 #endif
     #include "postProcess.H"
 
-    unsigned int flags = 0;
-    checkCudaErrors(cudaGetDeviceFlags(&flags));
-    flags |= cudaDeviceScheduleYield;
-    checkCudaErrors(cudaSetDeviceFlags(flags));
+    // unsigned int flags = 0;
+    // checkCudaErrors(cudaGetDeviceFlags(&flags));
+    // flags |= cudaDeviceScheduleYield;
+    // checkCudaErrors(cudaSetDeviceFlags(flags));
 
     // #include "setRootCaseLists.H"
     #include "listOptions.H"
@@ -158,7 +157,9 @@ int main(int argc, char *argv[])
     }
 
     start1 = std::clock();
+    #ifdef GPUSolver_
     #include "createdfSolver.H"
+    #endif
     end1 = std::clock();
     time_monitor_init += double(end1 - start1) / double(CLOCKS_PER_SEC);
 
diff --git a/bashrc.in b/bashrc.in
index 6d3b75da9..0aa02ce1a 100644
--- a/bashrc.in
+++ b/bashrc.in
@@ -8,12 +8,11 @@ export CANTERA_ROOT=LIBCANTERA_DIR
 export CANTERA_DATA=$CANTERA_ROOT/share/cantera/data
 export LD_LIBRARY_PATH=$LIBTORCH_ROOT/lib:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$CANTERA_ROOT/lib:$LD_LIBRARY_PATH
+export AMGX_DIR=@AMGX_DIR@
 
 export DF_APPBIN=pwd/bin
 export DF_LIBBIN=pwd/lib
 export PATH=$DF_APPBIN:$PATH
 export LD_LIBRARY_PATH=$DF_LIBBIN:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=path-to-amgx/build:$LD_LIBRARY_PATH
-export AMGX_DIR=path-to-amgx
-
+export LD_LIBRARY_PATH=$AMGX_DIR/build:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/configure.sh b/configure.sh
index 1cf9254a9..4947df660 100644
--- a/configure.sh
+++ b/configure.sh
@@ -7,15 +7,19 @@ unset LIBTORCH_DIR
 unset LIBCANTERA_DIR
 unset PYTORCH_INC
 unset PYTORCH_LIB
+unset USE_GPUSOLVER
+unset AMGX_DIR
 
 print_usage() {
-    echo "Usage: . install.sh --libtorch_no (default) | --libtorch_dir _path_to_libtorch | --libtorch_autodownload | --use_pytorch | --libcantera_dir _path_to_libcantera"
+    echo "Usage: . install.sh --libtorch_no (default) | --libtorch_dir _path_to_libtorch | --libtorch_autodownload | --use_pytorch | --libcantera_dir _path_to_libcantera
+        | --amgx_dir _path_to_amgx"
 }
 
 # default
 LIBTORCH_AUTO=false
 USE_LIBTORCH=false
 USE_PYTORCH=false
+USE_GPUSOLVER=false
 
 while test $# -gt 0; do
     case "$1" in
@@ -60,6 +64,17 @@ while test $# -gt 0; do
             fi
             shift
             ;;
+        --amgx_dir)
+            shift
+            if test $# -gt 0; then
+                AMGX_DIR=$1
+                USE_GPUSOLVER=true
+            else
+                print_usage
+            return
+            fi
+            shift
+            ;;
         -h|--help)
             shift
             print_usage
@@ -148,6 +163,9 @@ if [ $USE_PYTORCH = true ]; then
     echo PYTORCH_LIB=$PYTORCH_LIB
     echo LIBTORCH_DIR=""
 fi
+if [ $USE_GPUSOLVER = true ]; then
+    echo AMGX_DIR=$AMGX_DIR
+fi
 
 cp bashrc.in bashrc
 sed -i "s#pwd#$PWD#g" ./bashrc
@@ -155,6 +173,7 @@ sed -i "s#LIBTORCH_DIR#$LIBTORCH_DIR#g" ./bashrc
 sed -i "s#PYTORCH_INC#$PYTORCH_INC#g" ./bashrc
 sed -i "s#PYTORCH_LIB#$PYTORCH_LIB#g" ./bashrc
 sed -i "s#LIBCANTERA_DIR#$LIBCANTERA_DIR#g" ./bashrc
+sed -i "s#@AMGX_DIR@#$AMGX_DIR#g" ./bashrc
 
 
 
diff --git a/docs/source/qs/install.rst b/docs/source/qs/install.rst
index 28a3ad9e4..cd0f1ec96 100644
--- a/docs/source/qs/install.rst
+++ b/docs/source/qs/install.rst
@@ -142,27 +142,29 @@ If ``df-notorch`` not activated (or you have a self-complied libcantera), specif
     . install.sh
 
 
-**3. If you wish to employ the AMGX library for accelerating PDE solving using GPU:**
+**3. If you wish to employ dfMatrix and the AMGX library for accelerating PDE solving using GPU:**
+
+.. Note:: This is still under developement.
 
 To begin, you will need to install AMGX. You can find the instructions for installing AMGX on its official website. Follow the instructions provided to install AMGX on your system. Once you have installed AMGX, navigate to the DeepFlame directory and follow the commands below.
 
 .. code-block:: bash
 
-    cd ${DF_ROOT}/src/dfMatrix/solver/amgx/
-    export AMGX_DIR=/your/path/to/AMGX/
-    cmake -B build
-    cd build
-    make
-
-After this, two libraries for enabling DeepFlame with AMGX are available in ``${DF_ROOT}/src/dfMatrix/solver/amgx/build``.
-Beforing using AMGX, run:
+    cd deepflame-dev
+    . configure.sh --amgx_dir /your/path/to/AMGX/
+    source ./bashrc
+    . install.sh
 
-.. code-block:: bash
+Also, you will need to add configuration files for AMGX for each euqation under ``system`` folder and name them in the pattern of ``amgxpOptions``, ``amgxUOptions`` . Please refer to the AMGX official website to find out detailed instructions. 
 
-    export LD_LIBRARY_PATH=${DF_ROOT}/src/dfMatrix/solver/amgx/build:$LD_LIBRARY_PATH
+**If you have compiled DeepFlame with GPU solver successfully, you should see the print message in your terminal:**
 
+.. code-block::
 
-If you want to use AMGX, you will need to add configuration files for AMGX for each euqation under ``system`` folder and name them in the pattern of ``amgxpOptions``, ``amgxUOptions`` . Please refer to the AMGX official website to find out detailed instructions. 
+     = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+    |     deepflame (linked with libcantera) compiled successfully! Enjoy!!          |
+    |        select the GPU solver coupled with AMGx library to solve PDE            |
+     = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
 
 
 **4. If you wish to install DeepFlame with CMake**
diff --git a/install.sh b/install.sh
index 17b3d2097..7a699bb25 100755
--- a/install.sh
+++ b/install.sh
@@ -4,18 +4,17 @@ print_finish() {
     if [ ! -z "$LIBTORCH_ROOT" ]; then
         echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
         echo "| deepflame (linked with libcantera and libtorch) compiled successfully! Enjoy!! |"
+    elif [ ! -z "$PYTHON_LIB_DIR" ]; then
         echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
-        return
+        echo "| deepflame (linked with libcantera and pytorch) compiled successfully! Enjoy!!  |"
+    else
+        echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
+        echo "|     deepflame (linked with libcantera) compiled successfully! Enjoy!!          |"
     fi
-    if [ ! -z "$PYTHON_LIB_DIR" ]; then
-        echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
-        echo "| deepflame (linked with libcantera and pytorch) compiled successfully! Enjoy!! | "
-        echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
-	return
+    if [ ! -z "$AMGX_DIR" ]; then
+        echo "|        select the GPU solver coupled with AMGx library to solve PDE            |"
     fi
-    echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
-    echo "| deepflame (linked with libcantera) compiled successfully! Enjoy!! |"
-    echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
+    echo " = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="
 }
 if [ $USE_LIBTORCH = true ]; then
     cd "$DF_SRC/dfChemistryModel/DNNInferencer"
@@ -25,5 +24,13 @@ if [ $USE_LIBTORCH = true ]; then
     make 
     export LD_LIBRARY_PATH=$DF_SRC/dfChemistryModel/DNNInferencer/build:$LD_LIBRARY_PATH
 fi
+if [ $USE_GPUSOLVER = true ]; then
+    cd "$DF_ROOT/src_gpu"
+    mkdir build
+    cd build
+    cmake ..
+    make 
+    export LD_LIBRARY_PATH=$DF_ROOT/src_gpu/build:$LD_LIBRARY_PATH
+fi
 cd $DF_ROOT
 ./Allwmake -j && print_finish