From 236f309b730cf294af2d8fad96a0cfe9b430a899 Mon Sep 17 00:00:00 2001 From: Reza Yazdani Date: Thu, 20 Jan 2022 03:59:31 +0500 Subject: [PATCH] fix the inference tests based on the new changes on DeepSpeed --- inference/huggingface/gpt-neo.py | 3 ++- inference/huggingface/run_generation.py | 3 ++- inference/huggingface/test-gpt.sh | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/inference/huggingface/gpt-neo.py b/inference/huggingface/gpt-neo.py index 25499c595..4c4f5707e 100644 --- a/inference/huggingface/gpt-neo.py +++ b/inference/huggingface/gpt-neo.py @@ -22,6 +22,7 @@ generator.model = deepspeed.init_inference(generator.model, mp_size=world_size, dtype=torch.float, - replace_method='auto') + replace_method='auto', + replace_with_kernel_inject=True) string = generator("DeepSpeed is", do_sample=True, min_length=50) print(string) diff --git a/inference/huggingface/run_generation.py b/inference/huggingface/run_generation.py index 0bef0f499..a609bd1c5 100644 --- a/inference/huggingface/run_generation.py +++ b/inference/huggingface/run_generation.py @@ -261,7 +261,8 @@ def main(): model = deepspeed.init_inference(model, mp_size=1, dtype=(torch.half if args.fp16 else torch.float), - injection_policy=injection_policy) + injection_policy=injection_policy, + replace_with_kernel_inject=True) model = model.module args.length = adjust_length_to_model(args.length, max_sequence_length=model.config.max_position_embeddings) diff --git a/inference/huggingface/test-gpt.sh b/inference/huggingface/test-gpt.sh index fe2bcd710..445f2dc2f 100644 --- a/inference/huggingface/test-gpt.sh +++ b/inference/huggingface/test-gpt.sh @@ -1,4 +1,4 @@ -deepspeed --num_gpus 1 run_generation.py \ +deepspeed --num_nodes 1 --num_gpus 1 run_generation.py \ --model_type=gpt2 \ --model_name_or_path=gpt2-xl \ --sample_input single_query.txt \