Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ information [here](https://innovation.microsoft.com/en-us/exploring-ai-at-scale)


# News
* [2021/03/16] [1-bit Adam v2: NCCL-based implementation and more](https://www.deepspeed.ai/tutorials/onebit-adam/)
* [2021/03/08] [ZeRO-3 Offload: Scale your models to trillion parameters without code changes while leveraging both CPUs & GPUs](https://www.deepspeed.ai/news/2021/03/07/zero3-offload.html)
* [2020/11/12] [Simplified install, JIT compiled ops, PyPI releases, and reduced dependencies](#installation)
* [2020/11/10] [Efficient and robust compressed training through progressive layer dropping](https://www.deepspeed.ai/news/2020/10/28/progressive-layer-dropping-news.html)
Expand Down
12 changes: 8 additions & 4 deletions deepspeed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def _parse_version(version_str):
sys.modules['deepspeed.pt.loss_scaler'] = deepspeed.runtime.fp16.loss_scaler


def initialize(args,
model,
def initialize(args=None,
model=None,
optimizer=None,
model_parameters=None,
training_data=None,
Expand All @@ -62,8 +62,7 @@ def initialize(args,
"""Initialize the DeepSpeed Engine.

Arguments:
args: a dictionary containing local_rank and deepspeed_config
file location
args: an object containing local_rank and deepspeed_config fields. This is optional if `config_params` is passed.

model: Required: nn.module class before apply any wrappers

Expand All @@ -88,6 +87,9 @@ def initialize(args,
mini-batch of Tensor(s). Used when using batched loading from a
map-style dataset.

config_params: Optional: Instead of requiring args.deepspeed_config you can pass your deepspeed config
as a dictionary instead.

Returns:
A tuple of ``engine``, ``optimizer``, ``training_dataloader``, ``lr_scheduler``

Expand All @@ -108,6 +110,8 @@ def initialize(args,
__git_branch__),
ranks=[0])

assert model is not None, "deepspeed.initialize requires a model"

if not isinstance(model, PipelineModule):
engine = DeepSpeedEngine(args=args,
model=model,
Expand Down
6 changes: 6 additions & 0 deletions deepspeed/launcher/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,12 @@ def main(args=None):
result = subprocess.Popen(cmd, env=env)
result.wait()

# In case of failure must propagate the error-condition back to the caller (usually shell). The
# actual error and traceback should have been printed in the subprocess, so in order to avoid
# unnecessary noise we just quietly exit here with the same code as the subprocess
if result.returncode > 0:
sys.exit(result.returncode)


if __name__ == "__main__":
main()
10 changes: 8 additions & 2 deletions deepspeed/profiling/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
Licensed under the MIT license.
"""

from deepspeed.runtime.config_utils import get_scalar_param
from deepspeed.runtime.config_utils import get_scalar_param, DeepSpeedConfigObject
from deepspeed.profiling.constants import *


class DeepSpeedFlopsProfilerConfig(object):
class DeepSpeedFlopsProfilerConfig(DeepSpeedConfigObject):
def __init__(self, param_dict):
"""
docstring
"""
super(DeepSpeedFlopsProfilerConfig, self).__init__()

self.enabled = None
Expand All @@ -24,6 +27,9 @@ def __init__(self, param_dict):
self._initialize(flops_profiler_dict)

def _initialize(self, flops_profiler_dict):
"""
docstring
"""
self.enabled = get_scalar_param(flops_profiler_dict,
FLOPS_PROFILER_ENABLED,
FLOPS_PROFILER_ENABLED_DEFAULT)
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/profiling/flops_profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def del_extra_repr(module):
"Each module profile is listed after its name in the following order: \nnumber of parameters, percentage of total parameters, number of multiply-accumulate operations (MACs), percentage of total MACs, latency, percentage of total latency, number of floating point operations per second (FLOPS, computed as 2 * MACs / latency)."
)
print(
"Note: \n1. A module can have torch.nn.functional (e.g. to compute logits) along with submodules, thus making the difference between the parent's MACs(or latency) and the sum of its submodules'.\n2. Number of floating point operations is a theoretical estimation, thus FLOPS computed using that could be larger than the maximum system throught.\n"
"Note: \n1. A module can have torch.nn.functional (e.g. to compute logits) along with submodules, thus making the difference between the parent's MACs(or latency) and the sum of its submodules'.\n2. Number of floating point operations is a theoretical estimation, thus FLOPS computed using that could be larger than the maximum system throughput.\n"
)
print(self.model)

Expand Down
11 changes: 2 additions & 9 deletions deepspeed/runtime/activation_checkpointing/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Licensed under the MIT license.
"""

from deepspeed.runtime.config_utils import get_scalar_param
from deepspeed.runtime.config_utils import get_scalar_param, DeepSpeedConfigObject

#########################################
# DeepSpeed Activation Checkpointing
Expand Down Expand Up @@ -56,7 +56,7 @@
}


class DeepSpeedActivationCheckpointingConfig(object):
class DeepSpeedActivationCheckpointingConfig(DeepSpeedConfigObject):
def __init__(self, param_dict):
super(DeepSpeedActivationCheckpointingConfig, self).__init__()

Expand All @@ -74,13 +74,6 @@ def __init__(self, param_dict):

self._initialize(act_chkpt_config_dict)

"""
For json serialization
"""

def repr(self):
return self.__dict__

def _initialize(self, act_chkpt_config_dict):
self.partition_activations = get_scalar_param(
act_chkpt_config_dict,
Expand Down
Empty file.
Loading