Thank you for any help. -Kastan
Colossalai should be built with cuda extension to use the FP16 optimizer
If you want to activate cuda mode for MoE, please install with cuda_ext!
train_gpt.py 4 <module>
"train_gpt.py", line 4
> import colossalai
...envs/col_ai_v4/lib/python3.8/site-packages/colossalai/__init__.py 1 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/__init__.py", line 1
> from .initialize import (initialize, launch, launch_from_openmpi,
...s/col_ai_v4/lib/python3.8/site-packages/colossalai/initialize.py 18 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/initialize.py", line 18
> from colossalai.amp import AMP_TYPE, convert_to_amp
.../col_ai_v4/lib/python3.8/site-packages/colossalai/amp/__init__.py 9 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/amp/__init__.py", line 9
> from .torch_amp import convert_to_torch_amp
.../lib/python3.8/site-packages/colossalai/amp/torch_amp/__init__.py 5 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/amp/torch_amp/__init__.py", line 5
> from .torch_amp import TorchAMPOptimizer, TorchAMPModel, TorchAMPLoss
...ib/python3.8/site-packages/colossalai/amp/torch_amp/torch_amp.py 12 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/amp/torch_amp/torch_amp.py", line 12
> from colossalai.nn.optimizer import ColossalaiOptimizer
...s/col_ai_v4/lib/python3.8/site-packages/colossalai/nn/__init__.py 1 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/nn/__init__.py", line 1
> from .layer import *
...ai_v4/lib/python3.8/site-packages/colossalai/nn/layer/__init__.py 7 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/nn/layer/__init__.py", line 7
> from .moe import *
...4/lib/python3.8/site-packages/colossalai/nn/layer/moe/__init__.py 1 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/nn/layer/moe/__init__.py", line 1
> from .experts import Experts, FFNExperts, TPExperts
...v4/lib/python3.8/site-packages/colossalai/nn/layer/moe/experts.py 8 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/nn/layer/moe/experts.py", line 8
> from colossalai.zero.init_ctx import no_shard_zero_decrator
...col_ai_v4/lib/python3.8/site-packages/colossalai/zero/__init__.py 6 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/zero/__init__.py", line 6
> from colossalai.zero.sharded_model.sharded_model_v2 import ShardedModelV2
...python3.8/site-packages/colossalai/zero/sharded_model/__init__.py 1 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/zero/sharded_model/__init__.py", line 1
> from .sharded_model_v2 import ShardedModelV2
.../site-packages/colossalai/zero/sharded_model/sharded_model_v2.py 11 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/zero/sharded_model/sharded_model_v2.py", line 11
> from colossalai.engine.ophooks import register_ophooks_recursively
...l_ai_v4/lib/python3.8/site-packages/colossalai/engine/__init__.py 1 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/engine/__init__.py", line 1
> from ._base_engine import Engine
...v4/lib/python3.8/site-packages/colossalai/engine/_base_engine.py 11 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 11
> from colossalai.engine.schedule import BaseSchedule, NonPipelineSchedule, PipelineSchedule, InterleavedPipelineSchedule
...ib/python3.8/site-packages/colossalai/engine/schedule/__init__.py 2 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/engine/schedule/__init__.py", line 2
> from ._pipeline_schedule import PipelineSchedule, InterleavedPipelineSchedule, get_tensor_shape
...8/site-packages/colossalai/engine/schedule/_pipeline_schedule.py 15 <module>"/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/engine/schedule/_pipeline_schedule.py", line 15
from colossalai.utils import switch_virtual_pipeline_parallel_rank
from colossalai.utils.cuda import get_current_device
> from colossalai.zero.sharded_model.sharded_model_v2 import ShardedModelV2
from ._base_schedule import BaseSchedule
def get_tensor_shape():
switch_virtual_pipeline_parallel_rank: <function switch_virtual_pipelin... [66]get_current_device: <function get_current_device at 0x7f69675b0550>
ImportError:
cannot import name 'ShardedModelV2' from partially initialized module 'colossalai.zero.sharded_model.sharded_model_v2' (most likely due to a circular import) (/u/kastanday/.conda/envs/nice_base/envs/col_ai_v4/lib/python3.8/site-packages/colossalai/zero/sharded_model/sharded_model_v2.py)
Colossal-AI installed from source.
$ pip list
bcrypt 3.2.2
certifi 2022.6.15
cffi 1.15.1
cfgv 3.3.1
charset-normalizer 2.1.0
click 8.1.3
colorama 0.4.5
colossalai 0.1.7
commonmark 0.9.1
cowsay 4.0
cryptography 37.0.4
distlib 0.3.4
fabric 2.7.0
filelock 3.7.1
huggingface-hub 0.8.1
identify 2.5.1
idna 3.3
invoke 1.7.1
nodeenv 1.7.0
numpy 1.23.0
packaging 21.3
paramiko 2.11.0
pathlib2 2.3.7.post1
Pillow 9.2.0
pip 22.1.2
platformdirs 2.5.2
pre-commit 2.19.0
pretty-errors 1.2.25
psutil 5.9.1
pycparser 2.21
Pygments 2.12.0
PyNaCl 1.5.0
pyparsing 3.0.9
PyYAML 6.0
regex 2022.6.2
requests 2.28.1
rich 12.4.4
setuptools 61.2.0
six 1.16.0
tokenizers 0.12.1
toml 0.10.2
torch 1.12.0+cu116
torchaudio 0.12.0+cu116
torchvision 0.13.0+cu116
tqdm 4.64.0
transformers 4.20.1
typing_extensions 4.3.0
urllib3 1.26.9
virtualenv 20.14.1
wheel 0.37.1
$ nvcc --version
...
Cuda compilation tools, release 11.6, V11.6.112
🐛 Describe the bug
Running the Examples (
language/gpt) worked a few weeks ago, after pulling I now get a circular import error (full trace below).Thank you for any help. -Kastan
Environment
Colossal-AI installed from source.