Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion colossalai/auto_parallel/offload/base_offload_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
import torch.nn as nn

from colossalai.nn.parallel.data_parallel import _cast_float
from colossalai.utils import _cast_float
from colossalai.zero.legacy.gemini.tensor_utils import free_storage

from .region_manager import RegionManager
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
class Registry:
# TODO: refactor the registry classes used in colossalai.legacy.registry, colossalai.fx and here

def __init__(self, name):
self.name = name
Expand Down
7 changes: 5 additions & 2 deletions colossalai/checkpoint_io/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import torch
import torch.nn as nn
from torch.optim import Optimizer
from transformers.modeling_utils import PreTrainedModel, get_parameter_dtype
from transformers.modeling_utils import unwrap_model as unwrap_huggingface_model

from colossalai.interface import ModelWrapper, OptimizerWrapper
from colossalai.nn.optimizer import ColossalaiOptimizer
Expand Down Expand Up @@ -383,6 +381,11 @@ def save_config_file(model: nn.Module, checkpoint_path: str, is_master: bool = T
checkpoint_path (str): Path to the checkpoint directory.
is_master (bool): Whether current rank is main process.
"""
try:
from transformers.modeling_utils import PreTrainedModel, get_parameter_dtype
from transformers.modeling_utils import unwrap_model as unwrap_huggingface_model
except ImportError:
return
if not isinstance(model, PreTrainedModel):
return

Expand Down
2 changes: 1 addition & 1 deletion colossalai/cli/benchmark/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch

import colossalai.nn as col_nn
import colossalai.legacy.nn as col_nn


class MLP(torch.nn.Module):
Expand Down
2 changes: 1 addition & 1 deletion colossalai/kernel/jit/option.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch

from colossalai.nn.layer.colossalai_layer import Embedding, Linear
from colossalai.legacy.nn.layer.colossalai_layer import Embedding, Linear
from colossalai.utils import get_current_device

from .bias_dropout_add import bias_dropout_add_fused_train
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
from .collective import all_gather, reduce_scatter, all_reduce, broadcast, reduce
from .p2p import (send_forward, send_forward_recv_forward, send_backward_recv_forward, send_backward,
send_backward_recv_backward, send_forward_recv_backward, send_forward_backward_recv_forward_backward,
recv_forward, recv_backward)
from .collective import all_gather, all_reduce, broadcast, reduce, reduce_scatter
from .p2p import (
recv_backward,
recv_forward,
send_backward,
send_backward_recv_backward,
send_backward_recv_forward,
send_forward,
send_forward_backward_recv_forward_backward,
send_forward_recv_backward,
send_forward_recv_forward,
)
from .ring import ring_forward
from .utils import send_obj_meta, recv_obj_meta
from .utils import recv_obj_meta, send_obj_meta

__all__ = [
'all_gather',
Expand Down
2 changes: 1 addition & 1 deletion colossalai/legacy/engine/schedule/_pipeline_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import torch.cuda

import colossalai.communication as comm
import colossalai.legacy.communication as comm
from colossalai.amp.naive_amp import NaiveAMPModel
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
Expand Down
6 changes: 3 additions & 3 deletions colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import torch.cuda

import colossalai.communication.p2p_v2 as comm
from colossalai import engine
import colossalai.legacy.communication.p2p_v2 as comm
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.engine import Engine
from colossalai.utils.cuda import get_current_device

from ._pipeline_schedule import PipelineSchedule
Expand Down Expand Up @@ -60,7 +60,7 @@ def data_process_func(stage_output, dataloader_output):
"""

def forward_backward_step(self,
engine: engine.Engine,
engine: Engine,
data_iter: Iterable,
forward_only=False,
return_loss=True,
Expand Down
4 changes: 4 additions & 0 deletions colossalai/legacy/nn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._ops import *
from .layer import *
from .loss import *
from .metric import *
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.distributed as dist

from colossalai.global_variables import tensor_parallel_env as env
from colossalai.nn.layer.utils import divide
from colossalai.legacy.nn.layer.utils import divide
from colossalai.tensor import ColoTensor, ColoTensorSpec, ProcessGroup

GeneralTensor = Union[ColoTensor, torch.Tensor]
Expand Down Expand Up @@ -232,7 +232,7 @@ def dual_all_to_all(x, pg, scatter_dim: int, gather_dim: int):
return _DualAllToAll.apply(x, pg, scatter_dim, gather_dim)


### table wise embedding shard
# table wise embedding shard


def _all_to_all_for_tablewise(x: torch.Tensor,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import torch.nn.functional as F
from typing import Optional

import torch.nn.functional as F

from colossalai.tensor import ColoTensor, ColoTensorSpec, ComputePattern, ComputeSpec, ReplicaSpec, ShardSpec
from colossalai.tensor.op_wrapper import colo_op_impl
from colossalai.tensor import ComputePattern, ColoTensorSpec, ComputePattern, ComputeSpec, ColoTensor, ShardSpec, \
ReplicaSpec

from ._utils import GeneralTensor, convert_to_colo_tensor, reduce_input


Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import torch.nn.functional as F
from typing import Optional

import torch.nn.functional as F
from torch import Tensor

from colossalai.tensor import ColoTensor, ColoTensorSpec, ComputePattern, ComputeSpec, ReplicaSpec, ShardSpec, distspec
from colossalai.tensor.op_wrapper import colo_op_impl
from colossalai.tensor import ComputePattern, ComputePattern, ComputeSpec, ColoTensor, distspec, ColoTensorSpec, \
ShardSpec, ReplicaSpec

from ._utils import GeneralTensor, convert_to_colo_tensor


Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import List, Optional

import torch.nn.functional as F

from colossalai.tensor import ColoTensor, ColoTensorSpec, ReplicaSpec, distspec
from colossalai.tensor.op_wrapper import colo_op_impl
from colossalai.tensor import ColoTensor, distspec, ColoTensorSpec, ReplicaSpec

from ._utils import GeneralTensor, convert_to_colo_tensor


Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from typing import Optional

import torch
import torch.nn.functional as F
from typing import Optional
from colossalai.tensor.op_wrapper import colo_op_impl

from colossalai.legacy.nn.loss.loss_1d import VocabParallelCrossEntropyLoss1D
from colossalai.tensor import ColoTensor, ColoTensorSpec
from colossalai.nn.loss.loss_1d import VocabParallelCrossEntropyLoss1D
from colossalai.tensor.op_wrapper import colo_op_impl

from ._utils import GeneralTensor, convert_to_colo_tensor


Expand Down
File renamed without changes.
9 changes: 9 additions & 0 deletions colossalai/legacy/nn/layer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .colossalai_layer import *
from .parallel_1d import *
from .parallel_2d import *
from .parallel_2p5d import *
from .parallel_3d import *
from .parallel_sequence import *
from .utils import *
from .vanilla import *
from .wrapper import *
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ._utils import partition_batch
from .dropout import Dropout
from .embedding import Embedding, PatchEmbedding
from .linear import Classifier, Linear
from .normalization import LayerNorm
__all__ = ['Linear', 'Classifier', 'Embedding', 'PatchEmbedding', 'LayerNorm', 'Dropout', 'partition_batch']
from ._utils import partition_batch
from .dropout import Dropout
from .embedding import Embedding, PatchEmbedding
from .linear import Classifier, Linear
from .normalization import LayerNorm

__all__ = ['Linear', 'Classifier', 'Embedding', 'PatchEmbedding', 'LayerNorm', 'Dropout', 'partition_batch']
Loading