Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
6fc6a05
fix for async io
flybird11111 Feb 13, 2025
510ff7b
Merge branch 'hpcaitech:main' into main
flybird11111 Feb 14, 2025
3ecb500
test for upgrading transformers
flybird11111 Mar 27, 2025
40cf89d
Merge branch 'hpcaitech:main' into upgrade-transformers
flybird11111 Mar 27, 2025
0b81be7
add ci machine
flybird11111 Mar 28, 2025
6c728df
fix
flybird11111 Mar 31, 2025
43885a4
fix
flybird11111 Mar 31, 2025
837a503
fix
flybird11111 Mar 31, 2025
8c66b7c
fix
flybird11111 Mar 31, 2025
621cb93
fix
flybird11111 Mar 31, 2025
822556a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 31, 2025
4b8b67a
fix
flybird11111 Apr 1, 2025
3491a9f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2025
ca91414
Update test_fp16_torch.py
flybird11111 Apr 9, 2025
397875e
Update build_on_pr.yml
flybird11111 Apr 9, 2025
28cf1e2
fix
flybird11111 Apr 9, 2025
b38d45e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 9, 2025
c0811d7
fix
flybird11111 Apr 9, 2025
466b61e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 9, 2025
a4e5ed9
fix
flybird11111 Apr 9, 2025
e92a692
Merge branch 'upgrade-transformers' of github.com:flybird11111/Coloss…
flybird11111 Apr 9, 2025
57d7b16
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 9, 2025
0e900ac
fix
flybird11111 Apr 9, 2025
d5a3d1a
fix
flybird11111 Apr 9, 2025
603e229
fix
flybird11111 Apr 9, 2025
dce2212
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 9, 2025
25c5e42
fix
flybird11111 Apr 9, 2025
99298c6
Merge branch 'upgrade-transformers' of github.com:flybird11111/Coloss…
flybird11111 Apr 9, 2025
eaef783
fix
flybird11111 Apr 10, 2025
964f9a7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 10, 2025
e8a3d52
fix
flybird11111 Apr 10, 2025
5c56a7f
Merge branch 'upgrade-transformers' of github.com:flybird11111/Coloss…
flybird11111 Apr 10, 2025
6997862
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 10, 2025
de4f7a1
fix
flybird11111 Apr 10, 2025
517bedc
Merge branch 'upgrade-transformers' of github.com:flybird11111/Coloss…
flybird11111 Apr 10, 2025
0d09c0e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 10, 2025
914b179
fix
flybird11111 Apr 10, 2025
c37107c
Merge branch 'upgrade-transformers' of github.com:flybird11111/Coloss…
flybird11111 Apr 10, 2025
21707a7
fix
flybird11111 Apr 10, 2025
910433f
fix
flybird11111 Apr 10, 2025
0950b07
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 10, 2025
db4c73f
fix
flybird11111 Apr 11, 2025
fd69a82
fix
flybird11111 Apr 11, 2025
dc60efe
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 11, 2025
a2e623d
fix
flybird11111 Apr 17, 2025
afe07a6
fiux
flybird11111 Apr 17, 2025
7af46ab
fix
flybird11111 Apr 17, 2025
52ead00
fix
flybird11111 Apr 18, 2025
0c5ed65
fix
flybird11111 Apr 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ jobs:
name: Build and Test Colossal-AI
needs: detect
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
runs-on: [self-hosted, gpu]
runs-on: ubuntu-latest
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --shm-size=2g --rm -v /dev/shm -v /data/scratch:/data/scratch
timeout-minutes: 90
defaults:
run:
Expand Down
4 changes: 3 additions & 1 deletion tests/test_fp8/test_all_to_all_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import all_to_all_single_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn


@clear_cache_before_run()
@parameterize("shape", [(4,), (1, 8, 16), (4, 8, 16)])
@parameterize("dtype", [torch.bfloat16, torch.float16])
@parameterize("async_op", [True, False])
Expand All @@ -24,6 +25,7 @@ def check_all2all(shape, dtype, async_op):
assert_close(output, output_fp8, rtol=0.1, atol=0.1)


@clear_cache_before_run()
@parameterize("shape", [(8, 8, 16)])
@parameterize("dtype", [torch.bfloat16, torch.float16])
@parameterize("async_op", [True, False])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_all_to_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import _all_to_all_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn


@clear_cache_before_run()
@parameterize("shape", [(16, 8, 4)])
@parameterize("scatter_dim", [0, 1, 2])
@parameterize("dtype", [torch.bfloat16, torch.float16])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_all_to_all_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import all_to_all_single_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn

dist.all_to_all_single


@clear_cache_before_run()
@parameterize("shape", [(4), (8, 7), (4, 8, 16)])
@parameterize("dtype", [torch.bfloat16, torch.float16])
@parameterize("fp8_format", ["e4m3", "e5m2"])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_allgather.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import _all_gather_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn


@clear_cache_before_run()
@parameterize(
"shape",
[(3, 7, 16)],
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_allreduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import all_reduce_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn


@parameterize(
Expand All @@ -20,6 +20,7 @@
(8,),
],
)
@clear_cache_before_run()
@parameterize("dtype", [torch.float16, torch.bfloat16])
@parameterize("fp8_format", ["e4m3", "e5m2"])
@parameterize("async_op", [True, False])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import cast_from_fp8, cast_from_fp8_pipeline, cast_to_fp8, cast_to_fp8_pipeline
from colossalai.testing import parameterize
from colossalai.testing import clear_cache_before_run, parameterize


@clear_cache_before_run()
@parameterize("shape", [(100, 10), (10, 100), (3, 7), (2, 1), (1, 2), (2, 2), (4, 2), (5,), (4,), (2,)])
@parameterize("dtype", [torch.bfloat16, torch.float16, torch.float32])
@parameterize("fp8_format", ["e4m3", "e5m2"])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_fsdp_comm_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from torch.testing import assert_close

from colossalai import launch
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn

# example modified from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html

Expand All @@ -28,6 +28,7 @@ def forward(self, x):
return self.net2(self.relu(self.net1(x)))


@clear_cache_before_run()
@parameterize("mode", ["grad", "params"])
def run_model(mode):
rank = dist.get_rank()
Expand Down
3 changes: 2 additions & 1 deletion tests/test_fp8/test_fp8_reduce_scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from colossalai import launch
from colossalai.accelerator import get_accelerator
from colossalai.quantization.fp8 import reduce_scatter_fp8
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn


@clear_cache_before_run()
@parameterize("shape", [(16, 8, 4)])
@parameterize("scatter_dim", [0, 1, 2])
@parameterize("dtype", [torch.bfloat16, torch.float16])
Expand Down