From cfec34d0224f40f559e3a2a2096ecd1bf562069c Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 13:51:40 +0800 Subject: [PATCH 1/7] [zero] update legacy import --- colossalai/zero/legacy/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/colossalai/zero/legacy/__init__.py b/colossalai/zero/legacy/__init__.py index 35570a1f539a..3783d38e61b2 100644 --- a/colossalai/zero/legacy/__init__.py +++ b/colossalai/zero/legacy/__init__.py @@ -6,6 +6,7 @@ from colossalai.logging import get_dist_logger from .init_ctx import ZeroInitContext, no_shard_zero_context, no_shard_zero_decrator +from .shard_utils import BucketTensorShardStrategy, TensorShardStrategy from .sharded_model import ShardedModelV2 from .sharded_optim import ShardedOptimizerV2 @@ -40,5 +41,5 @@ def convert_to_zero_v2(model: nn.Module, optimizer: torch.optim.Optimizer, model __all__ = [ 'convert_to_zero_v2', 'ShardedModelV2', 'ShardedOptimizerV2', 'ZeroInitContext', 'no_shard_zero_context', - 'no_shard_zero_decrator' + 'no_shard_zero_decrator', 'TensorShardStrategy', 'BucketTensorShardStrategy' ] From 6e6194f4464d0152f49eaee76d6ff2552b970abb Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 13:56:49 +0800 Subject: [PATCH 2/7] [zero] update examples --- examples/language/roberta/configs/colossalai_ddp.py | 2 +- examples/language/roberta/configs/colossalai_zero.py | 4 ++-- examples/tutorial/opt/opt/colossalai_zero.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/language/roberta/configs/colossalai_ddp.py b/examples/language/roberta/configs/colossalai_ddp.py index c3c59aa4079c..b98b1e902be1 100644 --- a/examples/language/roberta/configs/colossalai_ddp.py +++ b/examples/language/roberta/configs/colossalai_ddp.py @@ -1,4 +1,4 @@ -from colossalai.zero.shard_utils import TensorShardStrategy from colossalai.nn.optimizer import FusedAdam +from colossalai.zero.legacy import TensorShardStrategy clip_grad_norm = 1.0 diff --git a/examples/language/roberta/configs/colossalai_zero.py b/examples/language/roberta/configs/colossalai_zero.py index c5debdce0988..a02c835fd394 100644 --- a/examples/language/roberta/configs/colossalai_zero.py +++ b/examples/language/roberta/configs/colossalai_zero.py @@ -1,5 +1,5 @@ -from colossalai.zero.shard_utils import TensorShardStrategy from colossalai.nn.optimizer import FusedAdam +from colossalai.zero.legacy import TensorShardStrategy # fp16 = dict( # mode=AMP_TYPE.TORCH, @@ -29,4 +29,4 @@ weight_decay=1e-2, ) -# 64433 \ No newline at end of file +# 64433 diff --git a/examples/tutorial/opt/opt/colossalai_zero.py b/examples/tutorial/opt/opt/colossalai_zero.py index 833745f3e8d8..088c9ee43c73 100644 --- a/examples/tutorial/opt/opt/colossalai_zero.py +++ b/examples/tutorial/opt/opt/colossalai_zero.py @@ -1,4 +1,4 @@ -from colossalai.zero.shard_utils import TensorShardStrategy +from colossalai.zero.legacy import TensorShardStrategy zero = dict(model_config=dict(shard_strategy=TensorShardStrategy(), tensor_placement_policy="auto", From dac00c37dc0a4f8a4b8072a28d55a38faec20d40 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 14:14:59 +0800 Subject: [PATCH 3/7] [example] fix opt tutorial --- examples/tutorial/opt/opt/run_clm.py | 6 +++++- examples/tutorial/opt/opt/test_ci.sh | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100755 examples/tutorial/opt/opt/test_ci.sh diff --git a/examples/tutorial/opt/opt/run_clm.py b/examples/tutorial/opt/opt/run_clm.py index e618b4d66957..fdc86adab665 100755 --- a/examples/tutorial/opt/opt/run_clm.py +++ b/examples/tutorial/opt/opt/run_clm.py @@ -413,7 +413,11 @@ def main(): cai_version = colossalai.__version__ logger.info(f'using Colossal-AI version {cai_version}') if version.parse(cai_version) > version.parse("0.1.10"): - from colossalai.nn.parallel import GeminiDDP + try: + from colossalai.nn.parallel import GeminiDDP + except ImportError: + # this works for unreleased main branch, and this may be released on 0.2.9 + from colossalai.zero import GeminiDDP model = GeminiDDP(model, device=get_current_device(), placement_policy=PLACEMENT_POLICY, pin_memory=True) elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"): from colossalai.gemini import ChunkManager, GeminiManager diff --git a/examples/tutorial/opt/opt/test_ci.sh b/examples/tutorial/opt/opt/test_ci.sh new file mode 100755 index 000000000000..be85bfb8babb --- /dev/null +++ b/examples/tutorial/opt/opt/test_ci.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -xue + +BS=8 +MEMCAP=0 +GPUNUM=2 +MODLE="facebook/opt-125m" + +torchrun \ + --nproc_per_node ${GPUNUM} \ + --master_port 19198 \ + run_clm.py \ + -s \ + --output_dir $PWD \ + --mem_cap ${MEMCAP} \ + --model_name_or_path ${MODLE} \ + --per_device_train_batch_size ${BS} \ + --num_train_epochs 1 From f5afd9e8166d827bb5543f766810c677296ba150 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 14:46:58 +0800 Subject: [PATCH 4/7] [example] fix opt tutorial --- examples/tutorial/opt/test_ci.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 examples/tutorial/opt/test_ci.sh diff --git a/examples/tutorial/opt/test_ci.sh b/examples/tutorial/opt/test_ci.sh new file mode 100755 index 000000000000..8341bb10510f --- /dev/null +++ b/examples/tutorial/opt/test_ci.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +cd opt && bash test_ci.sh From 98bd74ba6049b14b27690422ed01a985445d10b9 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 16:04:29 +0800 Subject: [PATCH 5/7] [example] fix opt tutorial --- examples/tutorial/opt/opt/test_ci.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/tutorial/opt/opt/test_ci.sh b/examples/tutorial/opt/opt/test_ci.sh index be85bfb8babb..e505da1364de 100755 --- a/examples/tutorial/opt/opt/test_ci.sh +++ b/examples/tutorial/opt/opt/test_ci.sh @@ -2,6 +2,8 @@ set -xue +pip install -r requirements.txt + BS=8 MEMCAP=0 GPUNUM=2 From c7e5830b7de6fe9d3ec665cfac541e889e352606 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 16:09:24 +0800 Subject: [PATCH 6/7] [example] fix opt tutorial --- examples/tutorial/opt/opt/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/tutorial/opt/opt/requirements.txt b/examples/tutorial/opt/opt/requirements.txt index c34df7992d3f..d0ed2c717aee 100644 --- a/examples/tutorial/opt/opt/requirements.txt +++ b/examples/tutorial/opt/opt/requirements.txt @@ -4,3 +4,4 @@ datasets >= 1.8.0 sentencepiece != 0.1.92 protobuf accelerate == 0.13.2 +transformers From a7c69ff2d5ac5c0622dcadb2c2979135eb5025fb Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Apr 2023 16:59:58 +0800 Subject: [PATCH 7/7] [example] fix import --- examples/language/roberta/configs/colossalai_ddp.py | 7 ++++++- examples/language/roberta/configs/colossalai_zero.py | 7 ++++++- examples/tutorial/opt/opt/colossalai_zero.py | 6 +++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/examples/language/roberta/configs/colossalai_ddp.py b/examples/language/roberta/configs/colossalai_ddp.py index b98b1e902be1..3146ffc45eef 100644 --- a/examples/language/roberta/configs/colossalai_ddp.py +++ b/examples/language/roberta/configs/colossalai_ddp.py @@ -1,4 +1,9 @@ from colossalai.nn.optimizer import FusedAdam -from colossalai.zero.legacy import TensorShardStrategy + +try: + from colossalai.zero.shard_utils import TensorShardStrategy +except ImportError: + # colossalai > 0.2.8 + from colossalai.zero.legacy import TensorShardStrategy clip_grad_norm = 1.0 diff --git a/examples/language/roberta/configs/colossalai_zero.py b/examples/language/roberta/configs/colossalai_zero.py index a02c835fd394..bae4c723ccc8 100644 --- a/examples/language/roberta/configs/colossalai_zero.py +++ b/examples/language/roberta/configs/colossalai_zero.py @@ -1,5 +1,10 @@ from colossalai.nn.optimizer import FusedAdam -from colossalai.zero.legacy import TensorShardStrategy + +try: + from colossalai.zero.shard_utils import TensorShardStrategy +except ImportError: + # colossalai > 0.2.8 + from colossalai.zero.legacy import TensorShardStrategy # fp16 = dict( # mode=AMP_TYPE.TORCH, diff --git a/examples/tutorial/opt/opt/colossalai_zero.py b/examples/tutorial/opt/opt/colossalai_zero.py index 088c9ee43c73..7c2c152450c5 100644 --- a/examples/tutorial/opt/opt/colossalai_zero.py +++ b/examples/tutorial/opt/opt/colossalai_zero.py @@ -1,4 +1,8 @@ -from colossalai.zero.legacy import TensorShardStrategy +try: + from colossalai.zero.shard_utils import TensorShardStrategy +except ImportError: + # colossalai > 0.2.8 + from colossalai.zero.legacy import TensorShardStrategy zero = dict(model_config=dict(shard_strategy=TensorShardStrategy(), tensor_placement_policy="auto",