From d1b5e00b3035153633f5fd560b15dec8d61b0cd0 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 4 Jul 2023 17:46:19 +0800 Subject: [PATCH] [cli] hotfix launch command for multi-nodes --- colossalai/cli/launcher/run.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/colossalai/cli/launcher/run.py b/colossalai/cli/launcher/run.py index daa5107caf90..5e74c2c4f5b8 100644 --- a/colossalai/cli/launcher/run.py +++ b/colossalai/cli/launcher/run.py @@ -164,9 +164,7 @@ def _arg_dict_to_list(arg_dict): ] else: # extra launch args for torch distributed launcher with torch >= 1.9 - default_torchrun_rdzv_args = dict(rdzv_backend="c10d", - rdzv_endpoint=f"{master_addr}:{master_port}", - rdzv_id="colossalai-default-job") + default_torchrun_rdzv_args = dict(master_addr=master_addr, master_port=master_port) # update rdzv arguments for key in default_torchrun_rdzv_args.keys():