diff --git a/examples/language/bert/README.md b/examples/language/bert/README.md
index 81c3f03fffca..da38e8375bf0 100644
--- a/examples/language/bert/README.md
+++ b/examples/language/bert/README.md
@@ -7,6 +7,14 @@ This directory includes two parts: Using the Booster API finetune Huggingface Be
 bash test_ci.sh
 ```
 
+### Results on 2-GPU
+
+| Plugin         | Accuracy | F1-score |
+| -------------- | -------- | -------- |
+| torch_ddp      | 84.4%    | 88.6%    |
+| torch_ddp_fp16 | 84.7%    | 88.8%    |
+| gemini         | 84.0%    | 88.4%    |
+
 ## Benchmark
 ```
 bash benchmark.sh
@@ -14,9 +22,9 @@ bash benchmark.sh
 
 Now include these metrics in benchmark: CUDA mem occupy, throughput and the number of model parameters. If you have custom metrics, you can add them to benchmark_util.
 
-## Results
+### Results
 
-### Bert
+#### Bert
 
 |       | max cuda mem | throughput(sample/s) | params |
 | :-----| -----------: | :--------: | :----: |
@@ -25,10 +33,10 @@ Now include these metrics in benchmark: CUDA mem occupy, throughput and the numb
 | gemini | 11.0 GB | 12.9 | 82M |
 | low_level_zero | 11.29 G | 14.7 | 82M |
 
-### AlBert
+#### AlBert
 |       | max cuda mem | throughput(sample/s) | params |
 | :-----| -----------: | :--------: | :----: |
 | ddp | OOM |  | |
 | ddp_fp16 | OOM |  | |
 | gemini | 69.39 G | 1.3 | 208M |
-| low_level_zero | 56.89 G | 1.4 | 208M |
\ No newline at end of file
+| low_level_zero | 56.89 G | 1.4 | 208M |
diff --git a/examples/language/bert/finetune.py b/examples/language/bert/finetune.py
index b209ffde85a4..59f10a77c22d 100644
--- a/examples/language/bert/finetune.py
+++ b/examples/language/bert/finetune.py
@@ -38,8 +38,8 @@ def move_to_cuda(batch):
 
 
 @torch.no_grad()
-def evaluate_model(model: nn.Module, test_dataloader: Union[DataLoader, List[DataLoader]], num_labels: int, task_name: str,
-             eval_splits: List[str], coordinator: DistCoordinator):
+def evaluate_model(model: nn.Module, test_dataloader: Union[DataLoader, List[DataLoader]], num_labels: int,
+                   task_name: str, eval_splits: List[str], coordinator: DistCoordinator):
     metric = evaluate.load("glue", task_name, process_id=coordinator.rank, num_process=coordinator.world_size)
     model.eval()
 
@@ -142,7 +142,7 @@ def main():
     if args.plugin.startswith('torch_ddp'):
         plugin = TorchDDPPlugin()
     elif args.plugin == 'gemini':
-        plugin = GeminiPlugin(placement_policy='cuda', strict_ddp_mode=True, initial_scale=2**5)
+        plugin = GeminiPlugin(initial_scale=2**5)
     elif args.plugin == 'low_level_zero':
         plugin = LowLevelZeroPlugin(initial_scale=2**5)
 
@@ -208,7 +208,7 @@ def main():
         train_epoch(epoch, model, optimizer, lr_scheduler, train_dataloader, booster, coordinator)
 
     results = evaluate_model(model, test_dataloader, data_builder.num_labels, args.task, data_builder.eval_splits,
-                       coordinator)
+                             coordinator)
 
     if coordinator.is_master():
         print(results)