From bc98d268e4ef85e593f43482a318cdb68ae32b26 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 20 Apr 2021 16:35:47 +0100
Subject: [PATCH 1/4] Add check to see if json file is already loaded

---
 deepspeed/runtime/config.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/deepspeed/runtime/config.py b/deepspeed/runtime/config.py
index 9e33876994f9..3ded59e7fca9 100755
--- a/deepspeed/runtime/config.py
+++ b/deepspeed/runtime/config.py
@@ -521,8 +521,10 @@ def write_config(self, filename):
 class DeepSpeedConfig(object):
     def __init__(self, json_file, mpu=None, param_dict=None):
         super(DeepSpeedConfig, self).__init__()
-
-        if param_dict is None:
+        if isinstance(json_file, dict):
+            # Assume json file has been loaded
+            self._param_dict = json_file
+        elif param_dict is None:
             self._param_dict = json.load(
                 open(json_file,
                      'r'),

From 4b31cc2ed994feeff1a5def1cd768f65a5a7b349 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 20 Apr 2021 16:39:52 +0100
Subject: [PATCH 2/4] Update doc

---
 deepspeed/runtime/zero/partition_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py
index c8bde6390b3c..863aa564d925 100755
--- a/deepspeed/runtime/zero/partition_parameters.py
+++ b/deepspeed/runtime/zero/partition_parameters.py
@@ -291,7 +291,7 @@ def __init__(self,
             pin_memory (bool, optional): Potentially increase performance by
                 using pinned memory for model weights. ``remote_device`` must be
                 ``"cpu"``. Defaults to ``False``.
-            deepspeed_config (``json file``, optional): If provided, provides configuration
+            deepspeed_config (``json file`` or dict, optional): If provided, provides configuration
                 for swapping fp16 params to NVMe.
             enabled (bool, optional): If ``False``, this context has no
                 effect. Defaults to ``True``.

From 077d4e97f9c11e0c0318d1be9414a36eef86e2c4 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 20 Apr 2021 17:32:31 +0100
Subject: [PATCH 3/4] Address review

---
 deepspeed/runtime/config.py                    |  6 ++----
 deepspeed/runtime/zero/partition_parameters.py | 11 +++++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/deepspeed/runtime/config.py b/deepspeed/runtime/config.py
index 3ded59e7fca9..9e33876994f9 100755
--- a/deepspeed/runtime/config.py
+++ b/deepspeed/runtime/config.py
@@ -521,10 +521,8 @@ def write_config(self, filename):
 class DeepSpeedConfig(object):
     def __init__(self, json_file, mpu=None, param_dict=None):
         super(DeepSpeedConfig, self).__init__()
-        if isinstance(json_file, dict):
-            # Assume json file has been loaded
-            self._param_dict = json_file
-        elif param_dict is None:
+
+        if param_dict is None:
             self._param_dict = json.load(
                 open(json_file,
                      'r'),
diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py
index 863aa564d925..b130c2cd527b 100755
--- a/deepspeed/runtime/zero/partition_parameters.py
+++ b/deepspeed/runtime/zero/partition_parameters.py
@@ -271,6 +271,7 @@ def __init__(self,
                  remote_device=None,
                  pin_memory=False,
                  deepspeed_config=None,
+                 param_dict=None,
                  enabled=True):
         """A context to enable massive model construction for training with
         ZeRO-3. Models are automatically partitioned (or, sharded) across the
@@ -293,6 +294,8 @@ def __init__(self,
                 ``"cpu"``. Defaults to ``False``.
             deepspeed_config (``json file`` or dict, optional): If provided, provides configuration
                 for swapping fp16 params to NVMe.
+            param_dict (dict, optional): Instead of requiring a deepspeed_config you can pass your deepspeed config
+                as a dictionary instead for swapping fp16 params to NVMe.
             enabled (bool, optional): If ``False``, this context has no
                 effect. Defaults to ``True``.
 
@@ -382,7 +385,7 @@ def get_model():
         #It is the device where parameters are fully instantiated using allgather
         self.local_device = torch.device('cuda:{}'.format(os.environ["LOCAL_RANK"]))
 
-        self._validate_remote_device(remote_device, deepspeed_config)
+        self._validate_remote_device(remote_device, deepspeed_config, param_dict)
 
         #Remote device is the device where parameter partiitons are stored
         #It can be same as local_device or it could be CPU or NVMe.
@@ -392,7 +395,7 @@ def get_model():
 
         # Enable fp16 param swapping to NVMe
         if self.remote_device == OFFLOAD_NVME_DEVICE:
-            _ds_config = DeepSpeedConfig(deepspeed_config)
+            _ds_config = DeepSpeedConfig(deepspeed_config, param_dict=param_dict)
             self.param_swapper = AsyncPartitionedParameterSwapper(_ds_config)
         else:
             self.param_swapper = None
@@ -406,9 +409,9 @@ def get_model():
                 self._convert_to_deepspeed_param(param)
                 param.partition()
 
-    def _validate_remote_device(self, remote_device, ds_config):
+    def _validate_remote_device(self, remote_device, ds_config, param_dict):
         if ds_config is not None:
-            _ds_config = DeepSpeedConfig(ds_config)
+            _ds_config = DeepSpeedConfig(ds_config, param_dict=param_dict)
             if remote_device in [None, OFFLOAD_CPU_DEVICE]:
                 if _ds_config.zero_config.offload_param is not None:
                     offload_param_device = _ds_config.zero_config.offload_param[

From e6a6f4e1619996c4e1f0ca8dad1e51a0791f8a19 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 20 Apr 2021 17:33:26 +0100
Subject: [PATCH 4/4] Remove doc comment

---
 deepspeed/runtime/zero/partition_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py
index b130c2cd527b..9ff5a7232a73 100755
--- a/deepspeed/runtime/zero/partition_parameters.py
+++ b/deepspeed/runtime/zero/partition_parameters.py
@@ -292,7 +292,7 @@ def __init__(self,
             pin_memory (bool, optional): Potentially increase performance by
                 using pinned memory for model weights. ``remote_device`` must be
                 ``"cpu"``. Defaults to ``False``.
-            deepspeed_config (``json file`` or dict, optional): If provided, provides configuration
+            deepspeed_config (``json file``, optional): If provided, provides configuration
                 for swapping fp16 params to NVMe.
             param_dict (dict, optional): Instead of requiring a deepspeed_config you can pass your deepspeed config
                 as a dictionary instead for swapping fp16 params to NVMe.