diff --git a/examples/language/bert/README.md b/examples/language/bert/README.md index 81c3f03fffca..da38e8375bf0 100644 --- a/examples/language/bert/README.md +++ b/examples/language/bert/README.md @@ -7,6 +7,14 @@ This directory includes two parts: Using the Booster API finetune Huggingface Be bash test_ci.sh ``` +### Results on 2-GPU + +| Plugin | Accuracy | F1-score | +| -------------- | -------- | -------- | +| torch_ddp | 84.4% | 88.6% | +| torch_ddp_fp16 | 84.7% | 88.8% | +| gemini | 84.0% | 88.4% | + ## Benchmark ``` bash benchmark.sh @@ -14,9 +22,9 @@ bash benchmark.sh Now include these metrics in benchmark: CUDA mem occupy, throughput and the number of model parameters. If you have custom metrics, you can add them to benchmark_util. -## Results +### Results -### Bert +#### Bert | | max cuda mem | throughput(sample/s) | params | | :-----| -----------: | :--------: | :----: | @@ -25,10 +33,10 @@ Now include these metrics in benchmark: CUDA mem occupy, throughput and the numb | gemini | 11.0 GB | 12.9 | 82M | | low_level_zero | 11.29 G | 14.7 | 82M | -### AlBert +#### AlBert | | max cuda mem | throughput(sample/s) | params | | :-----| -----------: | :--------: | :----: | | ddp | OOM | | | | ddp_fp16 | OOM | | | | gemini | 69.39 G | 1.3 | 208M | -| low_level_zero | 56.89 G | 1.4 | 208M | \ No newline at end of file +| low_level_zero | 56.89 G | 1.4 | 208M | diff --git a/examples/language/bert/finetune.py b/examples/language/bert/finetune.py index b209ffde85a4..59f10a77c22d 100644 --- a/examples/language/bert/finetune.py +++ b/examples/language/bert/finetune.py @@ -38,8 +38,8 @@ def move_to_cuda(batch): @torch.no_grad() -def evaluate_model(model: nn.Module, test_dataloader: Union[DataLoader, List[DataLoader]], num_labels: int, task_name: str, - eval_splits: List[str], coordinator: DistCoordinator): +def evaluate_model(model: nn.Module, test_dataloader: Union[DataLoader, List[DataLoader]], num_labels: int, + task_name: str, eval_splits: List[str], coordinator: DistCoordinator): metric = evaluate.load("glue", task_name, process_id=coordinator.rank, num_process=coordinator.world_size) model.eval() @@ -142,7 +142,7 @@ def main(): if args.plugin.startswith('torch_ddp'): plugin = TorchDDPPlugin() elif args.plugin == 'gemini': - plugin = GeminiPlugin(placement_policy='cuda', strict_ddp_mode=True, initial_scale=2**5) + plugin = GeminiPlugin(initial_scale=2**5) elif args.plugin == 'low_level_zero': plugin = LowLevelZeroPlugin(initial_scale=2**5) @@ -208,7 +208,7 @@ def main(): train_epoch(epoch, model, optimizer, lr_scheduler, train_dataloader, booster, coordinator) results = evaluate_model(model, test_dataloader, data_builder.num_labels, args.task, data_builder.eval_splits, - coordinator) + coordinator) if coordinator.is_master(): print(results)