From 11f74bf9e814ed349ea0916b6725b6cdc44ee046 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 31 Aug 2023 14:14:05 +0800 Subject: [PATCH 1/4] add installation req --- colossalai/inference/README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index 5eb89447abc0..eafeb7c57206 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -55,18 +55,23 @@ dependencies ```bash pytorch= 1.13.1 (gpu) +cuda>= 11.6 transformers= 4.30.2 triton==2.0.0.dev20221202 -vllm= -flash-attention= +# for install vllm, please use this branch to install https://github.com/tiandiao123/vllm/tree/setup_branch +vllm +# for install flash-attention, please use commit hash: 67ae6fd74b4bc99c36b2ce524cf139c35663793c +flash-attention ``` ### Docker -You can use our official docker container as well. +You can use docker run to use docker container to set-up environment + +``` +docker pull colossal-inference:v2 +docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace colosssal-inference:v2 /bin/bash -```bash -docker.. ``` ### Dive into fast-inference! From b0d55b1e608e45c3b54de9e7f2f3a7b0cfd84fd4 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 31 Aug 2023 14:23:13 +0800 Subject: [PATCH 2/4] fix --- colossalai/inference/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index eafeb7c57206..11fe89cfe514 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -69,8 +69,10 @@ flash-attention You can use docker run to use docker container to set-up environment ``` -docker pull colossal-inference:v2 -docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace colosssal-inference:v2 /bin/bash +# env: python==3.8, cuda 11.6, triton==2.0.0, vllm kernels support, flash-attention-2 kernels support +docker pull hpcaitech/colossalai-inference:v2 +docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash + ``` From 2704a27cdd45e9ca0840cf08dc98d5c2f7c280b0 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 31 Aug 2023 14:25:31 +0800 Subject: [PATCH 3/4] slight change --- colossalai/inference/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index 11fe89cfe514..6e0abcd24388 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -69,7 +69,7 @@ flash-attention You can use docker run to use docker container to set-up environment ``` -# env: python==3.8, cuda 11.6, triton==2.0.0, vllm kernels support, flash-attention-2 kernels support +# env: python==3.8, cuda 11.6, pytorch == 1.13.1 triton==2.0.0.dev20221202, vllm kernels support, flash-attention-2 kernels support docker pull hpcaitech/colossalai-inference:v2 docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash From 88c2b0faa52df6aae7b5b79a7bf97332212b28dd Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 31 Aug 2023 14:37:44 +0800 Subject: [PATCH 4/4] remove empty --- colossalai/inference/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index 6e0abcd24388..7228c51aa484 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -73,7 +73,6 @@ You can use docker run to use docker container to set-up environment docker pull hpcaitech/colossalai-inference:v2 docker run -it --gpus all --name ANY_NAME -v $PWD:/workspace -w /workspace hpcaitech/colossalai-inference:v2 /bin/bash - ``` ### Dive into fast-inference!