diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index 8029263061..d94c49d180 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -14,19 +14,20 @@ jobs:
         - float_prec: low
           variant: cpu
         - float_prec: high
-          variant: gpu
+          variant: cuda
         - float_prec: low
-          variant: gpu
+          variant: cuda
     steps:
     - uses: actions/checkout@master
       with:
         submodules: true
     - run: sudo apt update && sudo apt install g++-7
     - run: sudo apt install nvidia-cuda-toolkit
-      if: matrix.variant == 'gpu'
+      if: matrix.variant == 'cuda'
     - run: source/install/build_cc.sh
       env:
         FLOAT_PREC: ${{ matrix.float_prec }}
+        DP_VARIANT: ${{ matrix.variant }}
         CC: gcc-7
         CXX: g++-7
     - run: source/install/build_lammps.sh
diff --git a/doc/install.md b/doc/install.md
index dea55f2e56..e2d7859097 100644
--- a/doc/install.md
+++ b/doc/install.md
@@ -113,6 +113,14 @@ Execute
 cd $deepmd_source_dir
 pip install .
 ```
+
+One may set the following environment variables before executing `pip`:
+
+| Environment variables | Allowed value          | Default value | Usage                      |
+| --------------------- | ---------------------- | ------------- | -------------------------- |
+| DP_VARIANT            | `cpu`, `cuda`, `rocm`  | `cpu`         | Build CPU variant or GPU variant with CUDA or ROCM support. |
+| DP_FLOAT_PREC         | `high`, `low`          | `high`        | Build high (double) or low (float) precision. |
+
 To test the installation, one should firstly jump out of the source directory
 ```
 cd /some/other/workspace
@@ -166,15 +174,27 @@ I assume you want to install DeePMD-kit into path `$deepmd_root`, then execute c
 ```bash
 cmake -DTENSORFLOW_ROOT=$tensorflow_root -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
 ```
-where the variable `tensorflow_root` stores the location where the tensorflow's C++ interface is installed. The DeePMD-kit will automatically detect if a CUDA tool-kit is available on your machine and build the GPU support accordingly. If you want to force the cmake to find CUDA tool-kit, you can speicify the key `USE_CUDA_TOOLKIT`, 
-```bash
-cmake -DUSE_CUDA_TOOLKIT=true -DTENSORFLOW_ROOT=$tensorflow_root -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
-```
-and you may further asked to provide `CUDA_TOOLKIT_ROOT_DIR`. If the cmake has executed successfully, then 
+where the variable `tensorflow_root` stores the location where the TensorFlow's C++ interface is installed. 
+
+One may add the following arguments to `cmake`:
+
+| CMake Aurgements         | Allowed value       | Default value | Usage                   |
+| ------------------------ | ------------------- | ------------- | ------------------------|
+| -DTENSORFLOW_ROOT=&lt;value&gt;  | Path              | -             | The Path to TensorFlow's C++ interface. |
+| -DCMAKE_INSTALL_PREFIX=&lt;value&gt; | Path          | -             | The Path where DeePMD-kit will be installed. |
+| -DFLOAT_PREC=&lt;value&gt;       | `high` or `low`   | `high`        | Build high (double) or low (float) precision. |
+| -DUSE_CUDA_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with CUDA toolkit. |
+| -DCUDA_TOOLKIT_ROOT_DIR=&lt;value&gt; | Path         | Detected automatically | The path to the CUDA toolkit directory. |
+| -DUSE_ROCM_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with ROCM toolkit. |
+| -DROCM_ROOT=&lt;value&gt; | Path         | Detected automatically | The path to the ROCM toolkit directory. |
+
+If the cmake has executed successfully, then 
 ```bash
-make
+make -j4
 make install
 ```
+The option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware. 
+
 If everything works fine, you will have the following executable and libraries installed in `$deepmd_root/bin` and `$deepmd_root/lib`
 ```bash
 $ ls $deepmd_root/bin
@@ -206,7 +226,6 @@ make yes-kspace
 make yes-user-deepmd
 make mpi -j4
 ```
-The option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware. 
 
 If everything works fine, you will end up with an executable `lmp_mpi`.
 ```bash
diff --git a/setup.py b/setup.py
index cb2759e1b5..f574e46e06 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,28 @@
         "gpu": [f"tensorflow=={tf_version}"],
     }
 
+cmake_args = []
+# get variant option from the environment varibles, available: cpu, cuda, rocm
+dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
+if dp_variant == "cpu" or dp_variant == "":
+    pass
+elif dp_variant == "cuda":
+    cmake_args.append("-DUSE_CUDA_TOOLKIT:BOOL=TRUE")
+elif dp_variant == "rocm":
+    cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE")
+elif dp_variant != "":
+    raise RuntimeError("Unsupported DP_VARIANT option: %s" % dp_variant)
+
+# FLOAT_PREC
+dp_float_prec = os.environ.get("DP_FLOAT_PREC", "").lower()
+if dp_float_prec in ["high", "low"]:
+    cmake_args.append("-DFLOAT_PREC:STRING=%s" % dp_float_prec)
+elif dp_float_prec == "":
+    # default is high
+    cmake_args.append("-DFLOAT_PREC:STRING=high")
+else:
+    raise RuntimeError("Unsupported float precision option: %s" % dp_float_prec)
+
 # get tensorflow spec
 tf_spec = find_spec("tensorflow")
 if not tf_spec:
@@ -101,7 +123,7 @@
         f"-DTENSORFLOW_ROOT:STRING={tf_install_dir}",
         "-DBUILD_PY_IF:BOOL=TRUE",
         "-DBUILD_CPP_IF:BOOL=FALSE",
-        "-DFLOAT_PREC:STRING=high",
+        *cmake_args,
     ],
     cmake_source_dir="source",
     cmake_minimum_required_version="3.0",
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index 6e27a1e60f..2a606c61f8 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -4,6 +4,11 @@ if [ -z "$FLOAT_PREC" ]
 then
   FLOAT_PREC=high
 fi
+
+if [ "$DP_VARIANT" == "cuda" ]
+then
+  CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
+fi
 #------------------
 
 SCRIPT_PATH=$(dirname $(realpath -s $0))
@@ -20,7 +25,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DFLOAT_PREC=${FLOAT_PREC} -DINSTALL_TENSORFLOW=TRUE ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DFLOAT_PREC=${FLOAT_PREC} -DINSTALL_TENSORFLOW=TRUE ${CUDA_ARGS} ..
 make -j${NPROC}
 make install