From 74e6073aca34b40f04ee9c6586e6c194372593d9 Mon Sep 17 00:00:00 2001 From: MaximumEntropy Date: Thu, 17 Nov 2022 10:18:47 -0800 Subject: [PATCH] Set sync_batch_comm in other places Signed-off-by: MaximumEntropy --- .../language_modeling/megatron_gpt_prompt_learning_model.py | 2 ++ .../language_modeling/megatron_t5_prompt_learning_model.py | 2 ++ nemo/collections/nlp/modules/common/text_generation_strategy.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index 21ff3e6ad4d9..febec3fe0da5 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -585,6 +585,7 @@ def fwd_bwd_step(self, batch, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel_enabled=self.cfg.get("sequence_parallel", False), + sync_batch_comm=self.frozen_model.cfg.get('sync_batch_comm', False), ) else: losses_reduced_per_micro_batch = forward_backward_no_pipelining( @@ -595,6 +596,7 @@ def fwd_bwd_step(self, batch, batch_idx, forward_only): tensor_shape=tensor_shape, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + sync_batch_comm=self.frozen_model.cfg.get('sync_batch_comm', False), ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py index 3c132abe1991..3e668347ce14 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py @@ -203,6 +203,7 @@ def fwd_bwd_step(self, batch, batch_idx, forward_only): dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, sequence_parallel_enabled=False, + sync_batch_comm=self.frozen_model.cfg.get('sync_batch_comm', False), ) else: losses_reduced_per_micro_batch = forward_backward_no_pipelining( @@ -214,6 +215,7 @@ def fwd_bwd_step(self, batch, batch_idx, forward_only): decoder_sequence_length=dec_seq_length, dtype=self.autocast_dtype, grad_scaler=self.trainer.precision_plugin.scaler if self.cfg.precision == 16 else None, + sync_batch_comm=self.frozen_model.cfg.get('sync_batch_comm', False), ) # only the last stages of the pipeline return losses diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py index c6daf110b830..1bb9c3e41014 100644 --- a/nemo/collections/nlp/modules/common/text_generation_strategy.py +++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py @@ -61,6 +61,7 @@ def forward_step(self, batch, tensor_shape): forward_only=True, tensor_shape=tensor_shape, dtype=self.model.autocast_dtype, + sync_batch_comm=self.model.cfg.get('sync_batch_comm', False), ) else: output_tensor = forward_backward_no_pipelining( @@ -70,6 +71,7 @@ def forward_step(self, batch, tensor_shape): forward_only=True, tensor_shape=tensor_shape, dtype=self.model.autocast_dtype, + sync_batch_comm=self.model.cfg.get('sync_batch_comm', False), ) return output_tensor