From ed789f3add3b32c59a138bb2d67cd416c9b2de74 Mon Sep 17 00:00:00 2001 From: HydrogenSulfate <490868991@qq.com> Date: Wed, 23 Apr 2025 15:31:20 +0800 Subject: [PATCH] fix saving model in parallel training --- deepmd/pd/train/training.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py index 9389b8f4b5..9d21987617 100644 --- a/deepmd/pd/train/training.py +++ b/deepmd/pd/train/training.py @@ -876,12 +876,6 @@ def log_loss_valid(_task_key="Default"): ) and (self.rank == 0 or dist.get_rank() == 0): # Handle the case if rank 0 aborted and re-assigned self.latest_model = Path(self.save_ckpt + f"-{_step_id + 1}.pd") - - module = ( - self.wrapper.module - if dist.is_available() and dist.is_initialized() - else self.wrapper - ) self.save_model(self.latest_model, lr=cur_lr, step=_step_id) log.info(f"Saved model to {self.latest_model}") symlink_prefix_files(self.latest_model.stem, self.save_ckpt) @@ -1000,7 +994,7 @@ def log_loss_valid(_task_key="Default"): def save_model(self, save_path, lr=0.0, step=0) -> None: module = ( - self.wrapper.module + self.wrapper._layers if dist.is_available() and dist.is_initialized() else self.wrapper )