Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions deepspeed/runtime/bf16_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import torch
import sys
import os
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from deepspeed import comm as dist
from deepspeed.runtime.constants import PIPE_REPLICATED
from deepspeed.ops.op_builder import UtilsBuilder
from deepspeed.runtime import ZeROOptimizer
from packaging import version as pkg_version

Expand Down Expand Up @@ -53,10 +53,9 @@ def __init__(self,
self.dp_rank = dist.get_rank(group=self.dp_process_group)
self.real_dp_process_group = [dp_process_group for i in range(len(self.optimizer.param_groups))]

# Load pre-built or JIT compile (un)flatten ops
util_ops = UtilsBuilder().load()
self.flatten = util_ops.flatten
self.unflatten = util_ops.unflatten
# Use torch (un)flatten ops
self.flatten = _flatten_dense_tensors
self.unflatten = _unflatten_dense_tensors

#align nccl all-gather send buffers to 4-bye boundary
self.nccl_start_alignment_factor = 2 # 4-byte alignment/sizeof(fp16) = 2
Expand Down
9 changes: 4 additions & 5 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from torch.nn.parameter import Parameter
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from typing import Callable, Dict, Union, Iterable

Expand Down Expand Up @@ -93,7 +94,6 @@
from deepspeed.utils.logging import print_json_dist, print_configuration

from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import UtilsBuilder

from deepspeed.runtime.config import DtypeEnum

Expand Down Expand Up @@ -360,10 +360,9 @@ def __init__(
if self.dump_state():
print_configuration(self, "DeepSpeedEngine")

# Load pre-installed or JIT compile (un)flatten ops
util_ops = UtilsBuilder().load()
self.flatten = util_ops.flatten
self.unflatten = util_ops.unflatten
# Use torch (un)flatten ops
self.flatten = _flatten_dense_tensors
self.unflatten = _unflatten_dense_tensors

def destroy(self):
if self.optimizer is not None and hasattr(self.optimizer, 'destroy'):
Expand Down
9 changes: 4 additions & 5 deletions deepspeed/runtime/zero/stage3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import collections
from typing import Deque, Dict, Tuple

from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from deepspeed.runtime import ZeROOptimizer
from deepspeed.utils import logger
from deepspeed.runtime.fp16.loss_scaler import CreateLossScaler
Expand All @@ -23,7 +24,6 @@
from deepspeed.runtime.swap_tensor.pipelined_optimizer_swapper import PipelinedOptimizerSwapper
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT, FP32_FLAT_GROUPS, PARTITION_COUNT, ZERO_STAGE
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import UtilsBuilder

# Toggle this to true to enable correctness test
# with gradient partitioning and without
Expand Down Expand Up @@ -126,10 +126,9 @@ def __init__(self,

self.optimizer = init_optimizer

# Load pre-built or JIT compile (un)flatten ops
util_ops = UtilsBuilder().load()
self.flatten = util_ops.flatten
self.unflatten = util_ops.unflatten
# Use torch (un)flatten ops
self.flatten = _flatten_dense_tensors
self.unflatten = _unflatten_dense_tensors
self.dtype = self.optimizer.param_groups[0]['params'][0].dtype
self._global_grad_norm = 0.

Expand Down
9 changes: 4 additions & 5 deletions deepspeed/runtime/zero/stage_1_and_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from deepspeed import comm as dist
from packaging import version as pkg_version
from collections import OrderedDict
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from deepspeed.runtime import ZeROOptimizer
from deepspeed.runtime.fp16.loss_scaler import CreateLossScaler
Expand All @@ -23,7 +24,6 @@

from deepspeed.runtime.constants import PIPE_REPLICATED
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import UtilsBuilder

from deepspeed.checkpoint.constants import (DS_VERSION, GROUP_PADDINGS, PARTITION_COUNT,
SINGLE_PARTITION_OF_FP32_GROUPS, BASE_OPTIMIZER_STATE, CLIP_GRAD,
Expand Down Expand Up @@ -150,10 +150,9 @@ def __init__(self,
raise SystemError("Cannot use fp16 without accelerator.")
self.optimizer = init_optimizer

# Load pre-built or JIT compile (un)flatten ops
util_ops = UtilsBuilder().load()
self.flatten = util_ops.flatten
self.unflatten = util_ops.unflatten
# Use torch (un)flatten ops
self.flatten = _flatten_dense_tensors
self.unflatten = _unflatten_dense_tensors

# ZeRO stage 1 (False) or 2 (True)
self.partition_gradients = partition_grads
Expand Down
20 changes: 0 additions & 20 deletions op_builder/utils.py

This file was deleted.