Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
class Registry:
# TODO: refactor the registry classes used in colossalai.legacy.registry, colossalai.fx and here

def __init__(self, name):
self.name = name
Expand Down
47 changes: 18 additions & 29 deletions colossalai/logging/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from pathlib import Path
from typing import List, Union

import colossalai
from colossalai.context.parallel_mode import ParallelMode
import torch.distributed as dist


class DistributedLogger:
Expand Down Expand Up @@ -63,6 +62,7 @@ def __init__(self, name):
self._logger.propagate = False

DistributedLogger.__instances[name] = self
self.rank = dist.get_rank() if dist.is_initialized() else 0

@staticmethod
def __get_call_info():
Expand Down Expand Up @@ -109,16 +109,10 @@ def log_to_file(self, path: Union[str, Path], mode: str = 'a', level: str = 'INF
# create log directory
path.mkdir(parents=True, exist_ok=True)

# set the default file name if path is a directory
if not colossalai.core.global_context.is_initialized(ParallelMode.GLOBAL):
rank = 0
else:
rank = colossalai.core.global_context.get_global_rank()

if suffix is not None:
log_file_name = f'rank_{rank}_{suffix}.log'
log_file_name = f'rank_{self.rank}_{suffix}.log'
else:
log_file_name = f'rank_{rank}.log'
log_file_name = f'rank_{self.rank}.log'
path = path.joinpath(log_file_name)

# add file handler
Expand All @@ -128,19 +122,14 @@ def log_to_file(self, path: Union[str, Path], mode: str = 'a', level: str = 'INF
file_handler.setFormatter(formatter)
self._logger.addHandler(file_handler)

def _log(self,
level,
message: str,
parallel_mode: ParallelMode = ParallelMode.GLOBAL,
ranks: List[int] = None) -> None:
def _log(self, level, message: str, ranks: List[int] = None) -> None:
if ranks is None:
getattr(self._logger, level)(message)
else:
local_rank = colossalai.core.global_context.get_local_rank(parallel_mode)
if local_rank in ranks:
if self.rank in ranks:
getattr(self._logger, level)(message)

def info(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL, ranks: List[int] = None) -> None:
def info(self, message: str, ranks: List[int] = None) -> None:
"""Log an info message.

Args:
Expand All @@ -150,10 +139,10 @@ def info(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL,
ranks (List[int]): List of parallel ranks.
"""
message_prefix = "{}:{} {}".format(*self.__get_call_info())
self._log('info', message_prefix, parallel_mode, ranks)
self._log('info', message, parallel_mode, ranks)
self._log('info', message_prefix, ranks)
self._log('info', message, ranks)

def warning(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL, ranks: List[int] = None) -> None:
def warning(self, message: str, ranks: List[int] = None) -> None:
"""Log a warning message.

Args:
Expand All @@ -163,10 +152,10 @@ def warning(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBA
ranks (List[int]): List of parallel ranks.
"""
message_prefix = "{}:{} {}".format(*self.__get_call_info())
self._log('warning', message_prefix, parallel_mode, ranks)
self._log('warning', message, parallel_mode, ranks)
self._log('warning', message_prefix, ranks)
self._log('warning', message, ranks)

def debug(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL, ranks: List[int] = None) -> None:
def debug(self, message: str, ranks: List[int] = None) -> None:
"""Log a debug message.

Args:
Expand All @@ -176,10 +165,10 @@ def debug(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL,
ranks (List[int]): List of parallel ranks.
"""
message_prefix = "{}:{} {}".format(*self.__get_call_info())
self._log('debug', message_prefix, parallel_mode, ranks)
self._log('debug', message, parallel_mode, ranks)
self._log('debug', message_prefix, ranks)
self._log('debug', message, ranks)

def error(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL, ranks: List[int] = None) -> None:
def error(self, message: str, ranks: List[int] = None) -> None:
"""Log an error message.

Args:
Expand All @@ -189,5 +178,5 @@ def error(self, message: str, parallel_mode: ParallelMode = ParallelMode.GLOBAL,
ranks (List[int]): List of parallel ranks.
"""
message_prefix = "{}:{} {}".format(*self.__get_call_info())
self._log('error', message_prefix, parallel_mode, ranks)
self._log('error', message, parallel_mode, ranks)
self._log('error', message_prefix, ranks)
self._log('error', message, ranks)
6 changes: 0 additions & 6 deletions colossalai/nn/lr_scheduler/cosine.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from torch.optim.lr_scheduler import CosineAnnealingLR as _CosineAnnealingLR

from colossalai.legacy.registry import LR_SCHEDULERS

from .delayed import DelayerScheduler, WarmupDelayerScheduler, WarmupScheduler


@LR_SCHEDULERS.register_module
class CosineAnnealingLR(_CosineAnnealingLR):
r"""Set the learning rate of each parameter group using a cosine annealing
schedule, where :math:`\eta_{max}` is set to the initial lr and
Expand Down Expand Up @@ -49,7 +46,6 @@ def __init__(self, optimizer, total_steps: int, eta_min: int = 0, last_epoch: in
super().__init__(optimizer, total_steps, eta_min=eta_min, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class CosineAnnealingWarmupLR(WarmupScheduler):
"""Cosine annealing learning rate scheduler with learning rate warmup. A linear warmup schedule will be applied.

Expand All @@ -70,7 +66,6 @@ def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0, eta_min:
super().__init__(optimizer, warmup_steps, base_scheduler)


@LR_SCHEDULERS.register_module
class FlatAnnealingLR(DelayerScheduler):
"""Flat and cosine annealing learning rate scheduler. The learning rate will be a fixed value before starting decay.

Expand All @@ -91,7 +86,6 @@ def __init__(self, optimizer, total_steps: int, pct_start: float = 0.72, last_ep
super().__init__(optimizer, flat_steps, base_scheduler, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class FlatAnnealingWarmupLR(WarmupDelayerScheduler):
"""Flat and cosine annealing learning rate scheduler with learning rate warmup. A linear warmup schedule will be
applied, and then the learning rate will be a fixed value before starting decay.
Expand Down
3 changes: 0 additions & 3 deletions colossalai/nn/lr_scheduler/linear.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from torch.optim.lr_scheduler import _LRScheduler

from colossalai.legacy.registry import LR_SCHEDULERS


@LR_SCHEDULERS.register_module
class LinearWarmupLR(_LRScheduler):
"""Linearly warmup learning rate and then linearly decay.

Expand Down
4 changes: 0 additions & 4 deletions colossalai/nn/lr_scheduler/multistep.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

from torch.optim.lr_scheduler import MultiStepLR as _MultiStepLR

from colossalai.legacy.registry import LR_SCHEDULERS

from .delayed import WarmupScheduler


@LR_SCHEDULERS.register_module
class MultiStepLR(_MultiStepLR):
"""Decays the learning rate of each parameter group by gamma once the
number of epoch reaches one of the milestones. Notice that such decay can
Expand All @@ -33,7 +30,6 @@ def __init__(self,
super().__init__(optimizer, milestones, gamma=gamma, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class MultiStepWarmupLR(WarmupScheduler):
"""Multistep learning rate scheduler with warmup.

Expand Down
3 changes: 0 additions & 3 deletions colossalai/nn/lr_scheduler/onecycle.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from torch.optim.lr_scheduler import OneCycleLR as _OneCycleLR

from colossalai.legacy.registry import LR_SCHEDULERS


@LR_SCHEDULERS.register_module
class OneCycleLR(_OneCycleLR):
r"""Sets the learning rate of each parameter group according to the
1cycle learning rate policy. The 1cycle policy anneals the learning
Expand Down
4 changes: 0 additions & 4 deletions colossalai/nn/lr_scheduler/poly.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from torch.optim.lr_scheduler import _LRScheduler

from colossalai.legacy.registry import LR_SCHEDULERS

from .delayed import WarmupScheduler


@LR_SCHEDULERS.register_module
class PolynomialLR(_LRScheduler):
"""Polynomial learning rate scheduler.

Expand Down Expand Up @@ -41,7 +38,6 @@ def _get_closed_form_lr(self):
for base_lr in self.base_lrs]


@LR_SCHEDULERS.register_module
class PolynomialWarmupLR(WarmupScheduler):
"""Polynomial learning rate scheduler with warmup.

Expand Down
6 changes: 0 additions & 6 deletions colossalai/nn/lr_scheduler/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
from torch.optim.lr_scheduler import MultiplicativeLR as _MultiplicativeLR
from torch.optim.lr_scheduler import StepLR as _StepLR

from colossalai.legacy.registry import LR_SCHEDULERS


@LR_SCHEDULERS.register_module
class LambdaLR(_LambdaLR):
"""Sets the learning rate of each parameter group to the initial lr
times a given function. When last_epoch=-1, sets initial lr as lr.
Expand All @@ -24,7 +21,6 @@ def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch: int = -1)
super().__init__(optimizer, lr_lambda, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class MultiplicativeLR(_MultiplicativeLR):
"""Multiply the learning rate of each parameter group by the factor given
in the specified function. When last_epoch=-1, sets initial lr as lr.
Expand All @@ -42,7 +38,6 @@ def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch: int = -1)
super().__init__(optimizer, lr_lambda, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class StepLR(_StepLR):
"""Decays the learning rate of each parameter group by gamma every
step_size epochs. Notice that such decay can happen simultaneously with
Expand All @@ -61,7 +56,6 @@ def __init__(self, optimizer, total_steps, step_size: int = 1, gamma: float = 0.
super().__init__(optimizer, step_size, gamma=gamma, last_epoch=last_epoch)


@LR_SCHEDULERS.register_module
class ExponentialLR(_ExponentialLR):
"""Decays the learning rate of each parameter group by gamma every epoch.
When last_epoch=-1, sets initial lr as lr
Expand Down
2 changes: 0 additions & 2 deletions colossalai/nn/optimizer/cpu_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
import torch

from colossalai.kernel.op_builder import CPUAdamBuilder
from colossalai.legacy.registry import OPTIMIZERS

from .nvme_optimizer import NVMeOptimizer


@OPTIMIZERS.register_module
class CPUAdam(NVMeOptimizer):
"""Implements Adam algorithm.

Expand Down
2 changes: 0 additions & 2 deletions colossalai/nn/optimizer/fused_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
'''
import torch

from colossalai.legacy.registry import OPTIMIZERS
from colossalai.utils import multi_tensor_applier


@OPTIMIZERS.register_module
class FusedAdam(torch.optim.Optimizer):
"""Implements Adam algorithm.

Expand Down
2 changes: 0 additions & 2 deletions colossalai/nn/optimizer/fused_lamb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# modified from https://github.com/NVIDIA/apex/blob/master/apex/optimizers/fused_lamb.py
import torch

from colossalai.legacy.registry import OPTIMIZERS
from colossalai.utils import multi_tensor_applier


@OPTIMIZERS.register_module
class FusedLAMB(torch.optim.Optimizer):
"""Implements LAMB algorithm.

Expand Down
2 changes: 0 additions & 2 deletions colossalai/nn/optimizer/fused_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
import torch
from torch.optim.optimizer import Optimizer, required

from colossalai.legacy.registry import OPTIMIZERS
from colossalai.utils import multi_tensor_applier


@OPTIMIZERS.register_module
class FusedSGD(Optimizer):
r"""Implements stochastic gradient descent (optionally with momentum).

Expand Down
2 changes: 0 additions & 2 deletions colossalai/nn/optimizer/hybrid_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
from torch.optim import Adam

from colossalai.kernel.op_builder import FusedOptimBuilder
from colossalai.legacy.registry import OPTIMIZERS
from colossalai.utils import multi_tensor_applier

from .cpu_adam import CPUAdam


@OPTIMIZERS.register_module
class HybridAdam(CPUAdam):
"""Implements Adam algorithm.

Expand Down
3 changes: 0 additions & 3 deletions colossalai/nn/optimizer/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import torch
from torch.optim import Optimizer

from colossalai.legacy.registry import OPTIMIZERS


@OPTIMIZERS.register_module
class Lamb(Optimizer):
r"""Implements Lamb algorithm.
It has been proposed in `Large Batch Optimization for Deep Learning: Training BERT in 76 minutes`_.
Expand Down
3 changes: 0 additions & 3 deletions colossalai/nn/optimizer/lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import torch
from torch.optim import Optimizer

from colossalai.legacy.registry import OPTIMIZERS


@OPTIMIZERS.register_module
class Lars(Optimizer):
r"""Implements the LARS optimizer from `"Large batch training of convolutional networks"
<https://arxiv.org/pdf/1708.03888.pdf>`_.
Expand Down
2 changes: 0 additions & 2 deletions colossalai/utils/data_sampler/data_parallel_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@

from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.registry import DATA_SAMPLERS

T_co = TypeVar('T_co', covariant=True)


@DATA_SAMPLERS.register_module
class DataParallelSampler(Sampler):
"""A data sampler for distributed data parallelism.

Expand Down