hpcaitech · ver217 · May 25, 2023 · May 24, 2023
@@ -3,10 +3,10 @@
 
 import torch
 import torch.nn as nn
+import warnings
 from packaging import version
 from torch.distributed import ProcessGroup
 
-
 if version.parse(torch.__version__) >= version.parse('1.12.0'):
     from torch.distributed.fsdp import FullStateDictConfig
     from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
@@ -202,6 +202,11 @@ def configure(
 
         # wrap the model with PyTorch FSDP
         fsdp_model = TorchFSDPModel(model, device_id=torch.cuda.current_device(), **self.fsdp_kwargs)
+
+        if len(optimizer.param_groups) > 1:
+            warnings.warn(
+                'TorchFSDPPlugin does not support optimizer that use multi param groups. The results may not be as expected if used.'
+            )
         optimizer.__init__(fsdp_model.parameters(), **optimizer.defaults)
 
         if not isinstance(optimizer, FSDPOptimizerWrapper):

@@ -62,8 +62,11 @@ More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/genera
 ### Torch FSDP Plugin
 
 > ⚠ This plugin is not available when torch version is lower than 1.12.0.
+
 > ⚠ This plugin does not support save/load sharded model checkpoint now.
 
+> ⚠ This plugin does not support optimizer that use multi params group.
+
 More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/fsdp.html).
 
 {{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}

@@ -62,8 +62,11 @@ Zero-2 不支持局部梯度累积。如果您坚持使用，虽然可以积累
 ### Torch FSDP 插件
 
 > ⚠ 如果 torch 版本低于 1.12.0，此插件将不可用。
+
 > ⚠ 该插件现在还不支持保存/加载分片的模型 checkpoint。
 
+> ⚠ 该插件现在还不支持使用了multi params group的optimizer。
+
 更多详细信息，请参阅 [Pytorch 文档](https://pytorch.org/docs/main/fsdp.html).
 
 {{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}