From 236f309b730cf294af2d8fad96a0cfe9b430a899 Mon Sep 17 00:00:00 2001
From: Reza Yazdani <reyazda@microsoft.com>
Date: Thu, 20 Jan 2022 03:59:31 +0500
Subject: [PATCH] fix the inference tests based on the new changes on DeepSpeed

---
 inference/huggingface/gpt-neo.py        | 3 ++-
 inference/huggingface/run_generation.py | 3 ++-
 inference/huggingface/test-gpt.sh       | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/inference/huggingface/gpt-neo.py b/inference/huggingface/gpt-neo.py
index 25499c595..4c4f5707e 100644
--- a/inference/huggingface/gpt-neo.py
+++ b/inference/huggingface/gpt-neo.py
@@ -22,6 +22,7 @@
 generator.model = deepspeed.init_inference(generator.model,
                                            mp_size=world_size,
                                            dtype=torch.float,
-                                           replace_method='auto')
+                                           replace_method='auto',
+                                           replace_with_kernel_inject=True)
 string = generator("DeepSpeed is", do_sample=True, min_length=50)
 print(string)
diff --git a/inference/huggingface/run_generation.py b/inference/huggingface/run_generation.py
index 0bef0f499..a609bd1c5 100644
--- a/inference/huggingface/run_generation.py
+++ b/inference/huggingface/run_generation.py
@@ -261,7 +261,8 @@ def main():
         model = deepspeed.init_inference(model, 
                                          mp_size=1,
                                          dtype=(torch.half if args.fp16 else torch.float),
-                                         injection_policy=injection_policy)
+                                         injection_policy=injection_policy,
+                                         replace_with_kernel_inject=True)
         model = model.module
 
     args.length = adjust_length_to_model(args.length, max_sequence_length=model.config.max_position_embeddings)
diff --git a/inference/huggingface/test-gpt.sh b/inference/huggingface/test-gpt.sh
index fe2bcd710..445f2dc2f 100644
--- a/inference/huggingface/test-gpt.sh
+++ b/inference/huggingface/test-gpt.sh
@@ -1,4 +1,4 @@
-deepspeed --num_gpus 1 run_generation.py \
+deepspeed --num_nodes 1 --num_gpus 1 run_generation.py \
     --model_type=gpt2 \
     --model_name_or_path=gpt2-xl \
     --sample_input single_query.txt \