From 11f74bf9e814ed349ea0916b6725b6cdc44ee046 Mon Sep 17 00:00:00 2001
From: "cuiqing.li" <lixx3527@gmail.com>
Date: Thu, 31 Aug 2023 14:14:05 +0800
Subject: [PATCH 1/4] add installation req

---
 colossalai/inference/README.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md
index 5eb89447abc0..eafeb7c57206 100644
--- a/colossalai/inference/README.md
+++ b/colossalai/inference/README.md
@@ -55,18 +55,23 @@ dependencies
 
 ```bash
 pytorch= 1.13.1 (gpu)
+cuda>= 11.6 
 transformers= 4.30.2
 triton==2.0.0.dev20221202
-vllm=
-flash-attention=
+# for install vllm, please use this branch to install https://github.com/tiandiao123/vllm/tree/setup_branch
+vllm
+# for install flash-attention, please use commit hash: 67ae6fd74b4bc99c36b2ce524cf139c35663793c
+flash-attention
 ```
 
 ### Docker
 
-You can use our official docker container as well.
+You can use docker run to use docker container to set-up environment 
+
+```
+docker pull colossal-inference:v2 
+docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace colosssal-inference:v2 /bin/bash
 
-```bash
-docker..
 ```
 
 ### Dive into fast-inference!

From b0d55b1e608e45c3b54de9e7f2f3a7b0cfd84fd4 Mon Sep 17 00:00:00 2001
From: "cuiqing.li" <lixx3527@gmail.com>
Date: Thu, 31 Aug 2023 14:23:13 +0800
Subject: [PATCH 2/4] fix

---
 colossalai/inference/README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md
index eafeb7c57206..11fe89cfe514 100644
--- a/colossalai/inference/README.md
+++ b/colossalai/inference/README.md
@@ -69,8 +69,10 @@ flash-attention
 You can use docker run to use docker container to set-up environment 
 
 ```
-docker pull colossal-inference:v2 
-docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace colosssal-inference:v2 /bin/bash
+# env: python==3.8, cuda 11.6, triton==2.0.0, vllm kernels support, flash-attention-2 kernels support 
+docker pull hpcaitech/colossalai-inference:v2 
+docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash
+
 
 ```
 

From 2704a27cdd45e9ca0840cf08dc98d5c2f7c280b0 Mon Sep 17 00:00:00 2001
From: "cuiqing.li" <lixx3527@gmail.com>
Date: Thu, 31 Aug 2023 14:25:31 +0800
Subject: [PATCH 3/4] slight change

---
 colossalai/inference/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md
index 11fe89cfe514..6e0abcd24388 100644
--- a/colossalai/inference/README.md
+++ b/colossalai/inference/README.md
@@ -69,7 +69,7 @@ flash-attention
 You can use docker run to use docker container to set-up environment 
 
 ```
-# env: python==3.8, cuda 11.6, triton==2.0.0, vllm kernels support, flash-attention-2 kernels support 
+# env: python==3.8, cuda 11.6, pytorch == 1.13.1 triton==2.0.0.dev20221202, vllm kernels support, flash-attention-2 kernels support 
 docker pull hpcaitech/colossalai-inference:v2 
 docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash
 

From 88c2b0faa52df6aae7b5b79a7bf97332212b28dd Mon Sep 17 00:00:00 2001
From: "cuiqing.li" <lixx3527@gmail.com>
Date: Thu, 31 Aug 2023 14:37:44 +0800
Subject: [PATCH 4/4] remove empty

---
 colossalai/inference/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md
index 6e0abcd24388..7228c51aa484 100644
--- a/colossalai/inference/README.md
+++ b/colossalai/inference/README.md
@@ -73,7 +73,6 @@ You can use docker run to use docker container to set-up environment
 docker pull hpcaitech/colossalai-inference:v2 
 docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash
 
-
 ```
 
 ### Dive into fast-inference!