From e239b3be2075b663eed059766c96b878b30f6777 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Fri, 30 Apr 2021 09:15:16 -0700 Subject: [PATCH] [fp32] fix default dtype I'm not sure if this is the best approach but with https://github.com/microsoft/DeepSpeed/pull/1004 I still have to pass `zero.Init(dtype)` because this branch never gets executed: ``` def _set_dtype(self, ds_config, dtype): if ds_config is not None and dtype is None: _ds_config = DeepSpeedConfig(ds_config) self.dtype = torch.half if _ds_config.fp16_enabled else torch.float ``` --- deepspeed/runtime/zero/partition_parameters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index 3cacc524bd69..72e5d8e04be9 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -301,7 +301,7 @@ def __init__(self, pin_memory=False, config=None, enabled=True, - dtype=torch.half): + dtype=None): """A context to enable massive model construction for training with ZeRO-3. Models are automatically partitioned (or, sharded) across the system and converted to half precision. @@ -325,8 +325,8 @@ def __init__(self, for swapping fp16 params to NVMe. enabled (bool, optional): If ``False``, this context has no effect. Defaults to ``True``. - dtype (``torch.dtype``, optional): Can be used to change the data type of the parameters. - Supported options are ``torch.half`` and ``torch.float``. Defaults to ``torch.half`` + dtype (``dtype``, optional): Can be used to change the data type of the parameters. + Supported options are ``torch.half`` and ``torch.float``. Defaults to ``None`` This context accelerates model initialization and enables models that are too large to allocate in their entirety in CPU memory. It has the