From bc98d268e4ef85e593f43482a318cdb68ae32b26 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 20 Apr 2021 16:35:47 +0100 Subject: [PATCH 1/4] Add check to see if json file is already loaded --- deepspeed/runtime/config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deepspeed/runtime/config.py b/deepspeed/runtime/config.py index 9e33876994f9..3ded59e7fca9 100755 --- a/deepspeed/runtime/config.py +++ b/deepspeed/runtime/config.py @@ -521,8 +521,10 @@ def write_config(self, filename): class DeepSpeedConfig(object): def __init__(self, json_file, mpu=None, param_dict=None): super(DeepSpeedConfig, self).__init__() - - if param_dict is None: + if isinstance(json_file, dict): + # Assume json file has been loaded + self._param_dict = json_file + elif param_dict is None: self._param_dict = json.load( open(json_file, 'r'), From 4b31cc2ed994feeff1a5def1cd768f65a5a7b349 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 20 Apr 2021 16:39:52 +0100 Subject: [PATCH 2/4] Update doc --- deepspeed/runtime/zero/partition_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index c8bde6390b3c..863aa564d925 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -291,7 +291,7 @@ def __init__(self, pin_memory (bool, optional): Potentially increase performance by using pinned memory for model weights. ``remote_device`` must be ``"cpu"``. Defaults to ``False``. - deepspeed_config (``json file``, optional): If provided, provides configuration + deepspeed_config (``json file`` or dict, optional): If provided, provides configuration for swapping fp16 params to NVMe. enabled (bool, optional): If ``False``, this context has no effect. Defaults to ``True``. From 077d4e97f9c11e0c0318d1be9414a36eef86e2c4 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 20 Apr 2021 17:32:31 +0100 Subject: [PATCH 3/4] Address review --- deepspeed/runtime/config.py | 6 ++---- deepspeed/runtime/zero/partition_parameters.py | 11 +++++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/deepspeed/runtime/config.py b/deepspeed/runtime/config.py index 3ded59e7fca9..9e33876994f9 100755 --- a/deepspeed/runtime/config.py +++ b/deepspeed/runtime/config.py @@ -521,10 +521,8 @@ def write_config(self, filename): class DeepSpeedConfig(object): def __init__(self, json_file, mpu=None, param_dict=None): super(DeepSpeedConfig, self).__init__() - if isinstance(json_file, dict): - # Assume json file has been loaded - self._param_dict = json_file - elif param_dict is None: + + if param_dict is None: self._param_dict = json.load( open(json_file, 'r'), diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index 863aa564d925..b130c2cd527b 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -271,6 +271,7 @@ def __init__(self, remote_device=None, pin_memory=False, deepspeed_config=None, + param_dict=None, enabled=True): """A context to enable massive model construction for training with ZeRO-3. Models are automatically partitioned (or, sharded) across the @@ -293,6 +294,8 @@ def __init__(self, ``"cpu"``. Defaults to ``False``. deepspeed_config (``json file`` or dict, optional): If provided, provides configuration for swapping fp16 params to NVMe. + param_dict (dict, optional): Instead of requiring a deepspeed_config you can pass your deepspeed config + as a dictionary instead for swapping fp16 params to NVMe. enabled (bool, optional): If ``False``, this context has no effect. Defaults to ``True``. @@ -382,7 +385,7 @@ def get_model(): #It is the device where parameters are fully instantiated using allgather self.local_device = torch.device('cuda:{}'.format(os.environ["LOCAL_RANK"])) - self._validate_remote_device(remote_device, deepspeed_config) + self._validate_remote_device(remote_device, deepspeed_config, param_dict) #Remote device is the device where parameter partiitons are stored #It can be same as local_device or it could be CPU or NVMe. @@ -392,7 +395,7 @@ def get_model(): # Enable fp16 param swapping to NVMe if self.remote_device == OFFLOAD_NVME_DEVICE: - _ds_config = DeepSpeedConfig(deepspeed_config) + _ds_config = DeepSpeedConfig(deepspeed_config, param_dict=param_dict) self.param_swapper = AsyncPartitionedParameterSwapper(_ds_config) else: self.param_swapper = None @@ -406,9 +409,9 @@ def get_model(): self._convert_to_deepspeed_param(param) param.partition() - def _validate_remote_device(self, remote_device, ds_config): + def _validate_remote_device(self, remote_device, ds_config, param_dict): if ds_config is not None: - _ds_config = DeepSpeedConfig(ds_config) + _ds_config = DeepSpeedConfig(ds_config, param_dict=param_dict) if remote_device in [None, OFFLOAD_CPU_DEVICE]: if _ds_config.zero_config.offload_param is not None: offload_param_device = _ds_config.zero_config.offload_param[ From e6a6f4e1619996c4e1f0ca8dad1e51a0791f8a19 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 20 Apr 2021 17:33:26 +0100 Subject: [PATCH 4/4] Remove doc comment --- deepspeed/runtime/zero/partition_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index b130c2cd527b..9ff5a7232a73 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -292,7 +292,7 @@ def __init__(self, pin_memory (bool, optional): Potentially increase performance by using pinned memory for model weights. ``remote_device`` must be ``"cpu"``. Defaults to ``False``. - deepspeed_config (``json file`` or dict, optional): If provided, provides configuration + deepspeed_config (``json file``, optional): If provided, provides configuration for swapping fp16 params to NVMe. param_dict (dict, optional): Instead of requiring a deepspeed_config you can pass your deepspeed config as a dictionary instead for swapping fp16 params to NVMe.