From e239b3be2075b663eed059766c96b878b30f6777 Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Fri, 30 Apr 2021 09:15:16 -0700
Subject: [PATCH] [fp32] fix default dtype

I'm not sure if this is the best approach but with https://github.com/microsoft/DeepSpeed/pull/1004 I still have to pass `zero.Init(dtype)` because this branch never gets executed:
```
    def _set_dtype(self, ds_config, dtype):
        if ds_config is not None and dtype is None:
            _ds_config = DeepSpeedConfig(ds_config)
            self.dtype = torch.half if _ds_config.fp16_enabled else torch.float
```
---
 deepspeed/runtime/zero/partition_parameters.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py
index 3cacc524bd69..72e5d8e04be9 100755
--- a/deepspeed/runtime/zero/partition_parameters.py
+++ b/deepspeed/runtime/zero/partition_parameters.py
@@ -301,7 +301,7 @@ def __init__(self,
                  pin_memory=False,
                  config=None,
                  enabled=True,
-                 dtype=torch.half):
+                 dtype=None):
         """A context to enable massive model construction for training with
         ZeRO-3. Models are automatically partitioned (or, sharded) across the
         system and converted to half precision.
@@ -325,8 +325,8 @@ def __init__(self,
                 for swapping fp16 params to NVMe.
             enabled (bool, optional): If ``False``, this context has no
                 effect. Defaults to ``True``.
-            dtype (``torch.dtype``, optional): Can be used to change the data type of the parameters.
-                Supported options are ``torch.half`` and ``torch.float``. Defaults to ``torch.half``
+            dtype (``dtype``, optional): Can be used to change the data type of the parameters.
+                Supported options are ``torch.half`` and ``torch.float``. Defaults to ``None``
 
         This context accelerates model initialization and enables models that
         are too large to allocate in their entirety in CPU memory. It has the