From b72a9fa5ba742ec3c9348457f9784981db1c1862 Mon Sep 17 00:00:00 2001 From: Zhilin Wang Date: Fri, 28 Oct 2022 15:13:30 -0700 Subject: [PATCH 1/3] set add_pooling_layer=False for huggingface bert model --- .../nlp/modules/common/huggingface/huggingface_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py b/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py index 5ecd0591048c..f7e8810edf33 100644 --- a/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py +++ b/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py @@ -129,9 +129,9 @@ def get_huggingface_lm_model( return model_class(config_class.from_json_file(config_file)) if config_dict: config_class = HUGGINGFACE_MODELS[model_type]["config"] - return model_class(config=config_class(**config_dict)) + return model_class(config=config_class(**config_dict), add_pooling_layer=False) else: - return model_class.from_pretrained(pretrained_model_name) + return model_class.from_pretrained(pretrained_model_name, add_pooling_layer=False) else: raise ValueError(f"Use HuggingFace API directly in NeMo for {pretrained_model_name}") From f293771229dfab40c5cd0e641c9ef382efa75235 Mon Sep 17 00:00:00 2001 From: Zhilin Wang Date: Fri, 28 Oct 2022 16:18:37 -0700 Subject: [PATCH 2/3] remove add_pooling_layer=False and set find_unused_parameters=True --- examples/nlp/dialogue/dialogue.py | 2 +- .../nlp/modules/common/huggingface/huggingface_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/nlp/dialogue/dialogue.py b/examples/nlp/dialogue/dialogue.py index 894937a6b365..0da5ae5717da 100644 --- a/examples/nlp/dialogue/dialogue.py +++ b/examples/nlp/dialogue/dialogue.py @@ -66,7 +66,7 @@ def main(cfg: DictConfig) -> None: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') try: - strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=False,) + strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=True,) except (ImportError, ModuleNotFoundError): strategy = None diff --git a/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py b/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py index f7e8810edf33..5ecd0591048c 100644 --- a/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py +++ b/nemo/collections/nlp/modules/common/huggingface/huggingface_utils.py @@ -129,9 +129,9 @@ def get_huggingface_lm_model( return model_class(config_class.from_json_file(config_file)) if config_dict: config_class = HUGGINGFACE_MODELS[model_type]["config"] - return model_class(config=config_class(**config_dict), add_pooling_layer=False) + return model_class(config=config_class(**config_dict)) else: - return model_class.from_pretrained(pretrained_model_name, add_pooling_layer=False) + return model_class.from_pretrained(pretrained_model_name) else: raise ValueError(f"Use HuggingFace API directly in NeMo for {pretrained_model_name}") From 261eaef2db553bd00e15ad76fd804d30b6a4c499 Mon Sep 17 00:00:00 2001 From: Zhilin Wang Date: Tue, 1 Nov 2022 12:02:10 -0700 Subject: [PATCH 3/3] set num_prompt_tokens to 0 for huggingface --- .../nlp/models/dialogue/dialogue_gpt_classification_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py b/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py index dcf461d5334f..9608a0320bd6 100644 --- a/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py +++ b/nemo/collections/nlp/models/dialogue/dialogue_gpt_classification_model.py @@ -539,6 +539,7 @@ def generate_candidates(self, labels, template_length, input_ids, attn_masks): for i in generated_tokens ] generated_tokens = torch.cat(generated_tokens, axis=0) + num_prompt_tokens = 0 elif self.cfg.library == "megatron":