From 8b0468d09bcdb712e11b5b1de0c055184f0a9e2a Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 18:16:43 +0800
Subject: [PATCH 1/5] add cosine lr

---
 deepmd/dpmodel/utils/learning_rate.py | 38 +++++++++++++++++++++++++++
 deepmd/pt/train/training.py           | 14 ++++++----
 deepmd/pt/utils/learning_rate.py      |  2 ++
 deepmd/utils/argcheck.py              | 22 +++++++++++++++-
 4 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py
index 10f7ec8d04..777d518a3c 100644
--- a/deepmd/dpmodel/utils/learning_rate.py
+++ b/deepmd/dpmodel/utils/learning_rate.py
@@ -55,3 +55,41 @@ def value(self, step: int) -> np.float64:
         if step_lr < self.min_lr:
             step_lr = self.min_lr
         return step_lr
+
+
+class LearningRateCosine:
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float,
+        stop_steps: int,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Defines a cosine annealing learning rate schedule.
+        The learning rate starts at `start_lr` and gradually decreases to `stop_lr`
+        following a cosine curve over the training steps.
+
+        Parameters
+        ----------
+        start_lr
+            The initial learning rate at the beginning of training.
+        stop_lr
+            The final learning rate at the end of training.
+        stop_steps
+            The total number of training steps over which the learning rate
+            will be annealed from start_lr to stop_lr.
+        """
+        self.start_lr = start_lr
+        self.lr_min_factor = stop_lr / start_lr
+        self.stop_steps = stop_steps
+
+    def value(self, step: int) -> np.float64:
+        if step >= self.stop_steps:
+            return self.start_lr * self.lr_min_factor
+        return self.start_lr * (
+            self.lr_min_factor
+            + 0.5
+            * (1 - self.lr_min_factor)
+            * (1 + np.cos(np.pi * (step / self.stop_steps)))
+        )
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index d98b23d25c..713ee59a23 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -63,6 +63,7 @@
     SAMPLER_RECORD,
 )
 from deepmd.pt.utils.learning_rate import (
+    LearningRateCosine,
     LearningRateExp,
 )
 from deepmd.pt.utils.stat import (
@@ -267,12 +268,15 @@ def get_sample() -> Any:
             return get_sample
 
         def get_lr(lr_params: dict[str, Any]) -> LearningRateExp:
-            assert lr_params.get("type", "exp") == "exp", (
-                "Only learning rate `exp` is supported!"
-            )
+            lr_type = lr_params.get("type", "exp")
             lr_params["stop_steps"] = self.num_steps - self.warmup_steps
-            lr_exp = LearningRateExp(**lr_params)
-            return lr_exp
+            if lr_type == "exp":
+                lr_schedule = LearningRateExp(**lr_params)
+            elif lr_type == "cosine":
+                lr_schedule = LearningRateCosine(**lr_params)
+            else:
+                raise ValueError(f"Not supported learning rate type '{lr_type}'!")
+            return lr_schedule
 
         # Optimizer
         if self.multi_task and training_params.get("optim_dict", None) is not None:
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
index 3502434bc0..31ae1c3152 100644
--- a/deepmd/pt/utils/learning_rate.py
+++ b/deepmd/pt/utils/learning_rate.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from deepmd.dpmodel.utils.learning_rate import (
+    LearningRateCosine,
     LearningRateExp,
 )
 
 __all__ = [
+    "LearningRateCosine",
     "LearningRateExp",
 ]
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 7fcc117ab5..22b71b0183 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2509,12 +2509,32 @@ def learning_rate_exp() -> list[Argument]:
     return args
 
 
+def learning_rate_cosine() -> list[Argument]:
+    """
+    Defines a cosine annealing learning rate schedule.
+
+    The learning rate starts at `start_lr` and gradually decreases to `stop_lr`
+    following a cosine curve over the training steps.
+    """
+    doc_start_lr = "The learning rate at the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training. "
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-5, doc=doc_stop_lr),
+    ]
+    return args
+
+
 def learning_rate_variant_type_args() -> Variant:
     doc_lr = "The type of the learning rate."
 
     return Variant(
         "type",
-        [Argument("exp", dict, learning_rate_exp())],
+        [
+            Argument("exp", dict, learning_rate_exp()),
+            Argument("cosine", dict, learning_rate_cosine()),
+        ],
         optional=True,
         default_tag="exp",
         doc=doc_lr,

From f7ac57cf66f6024e0c952216c6eb0000c59f5f82 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 18:31:28 +0800
Subject: [PATCH 2/5] add ut

---
 deepmd/utils/argcheck.py   |  2 +-
 source/tests/pt/test_lr.py | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 22b71b0183..09814beb59 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2533,7 +2533,7 @@ def learning_rate_variant_type_args() -> Variant:
         "type",
         [
             Argument("exp", dict, learning_rate_exp()),
-            Argument("cosine", dict, learning_rate_cosine()),
+            Argument("cosine", dict, learning_rate_cosine(), doc=doc_only_pt_supported),
         ],
         optional=True,
         default_tag="exp",
diff --git a/source/tests/pt/test_lr.py b/source/tests/pt/test_lr.py
index 2d6bf156e1..75f663f041 100644
--- a/source/tests/pt/test_lr.py
+++ b/source/tests/pt/test_lr.py
@@ -7,6 +7,7 @@
 tf.disable_eager_execution()
 
 from deepmd.pt.utils.learning_rate import (
+    LearningRateCosine,
     LearningRateExp,
 )
 from deepmd.tf.utils import (
@@ -102,5 +103,21 @@ def decay_rate_pt(self) -> None:
         )
 
 
+class TestLearningRateCosine(unittest.TestCase):
+    def test_basic_curve(self) -> None:
+        start_lr = 1.0
+        stop_lr = 0.1
+        stop_steps = 10
+        lr = LearningRateCosine(start_lr, stop_lr, stop_steps)
+
+        self.assertTrue(np.allclose(lr.value(0), start_lr))
+        self.assertTrue(np.allclose(lr.value(stop_steps), stop_lr))
+        self.assertTrue(np.allclose(lr.value(stop_steps + 5), stop_lr))
+
+        mid_step = stop_steps // 2
+        expected_mid = stop_lr + (start_lr - stop_lr) * 0.5
+        self.assertTrue(np.allclose(lr.value(mid_step), expected_mid))
+
+
 if __name__ == "__main__":
     unittest.main()

From b0df231841c1fd4086f5db6e9598b028509c1f43 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 18:49:22 +0800
Subject: [PATCH 3/5] Update learning_rate.py

---
 deepmd/dpmodel/utils/learning_rate.py | 41 +++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py
index 777d518a3c..971dd3391f 100644
--- a/deepmd/dpmodel/utils/learning_rate.py
+++ b/deepmd/dpmodel/utils/learning_rate.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
 from typing import (
     Any,
 )
@@ -6,7 +10,33 @@
 import numpy as np
 
 
-class LearningRateExp:
+class LearningRateSchedule(ABC):
+    def __init__(
+        self, start_lr: float, stop_lr: float, stop_steps: int, **kwargs: Any
+    ) -> None:
+        """
+        Base class for learning rate schedules.
+
+        Parameters
+        ----------
+        start_lr
+            The initial learning rate.
+        stop_lr
+            The final learning rate.
+        stop_steps
+            The total training steps for learning rate scheduler.
+        """
+        self.start_lr = start_lr
+        self.stop_lr = stop_lr
+        self.stop_steps = stop_steps
+
+    @abstractmethod
+    def value(self, step: int) -> np.float64:
+        """Get the learning rate at the given step."""
+        pass
+
+
+class LearningRateExp(LearningRateSchedule):
     def __init__(
         self,
         start_lr: float,
@@ -37,7 +67,7 @@ def __init__(
             If provided, the decay rate will be set instead of
             calculating it through interpolation between start_lr and stop_lr.
         """
-        self.start_lr = start_lr
+        super().__init__(start_lr, stop_lr, stop_steps, **kwargs)
         default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
         self.decay_steps = decay_steps
         if self.decay_steps >= stop_steps:
@@ -47,7 +77,7 @@ def __init__(
         )
         if decay_rate is not None:
             self.decay_rate = decay_rate
-        self.min_lr = stop_lr
+        self.min_lr = self.stop_lr
 
     def value(self, step: int) -> np.float64:
         """Get the learning rate at the given step."""
@@ -57,7 +87,7 @@ def value(self, step: int) -> np.float64:
         return step_lr
 
 
-class LearningRateCosine:
+class LearningRateCosine(LearningRateSchedule):
     def __init__(
         self,
         start_lr: float,
@@ -80,9 +110,8 @@ def __init__(
             The total number of training steps over which the learning rate
             will be annealed from start_lr to stop_lr.
         """
-        self.start_lr = start_lr
+        super().__init__(start_lr, stop_lr, stop_steps, **kwargs)
         self.lr_min_factor = stop_lr / start_lr
-        self.stop_steps = stop_steps
 
     def value(self, step: int) -> np.float64:
         if step >= self.stop_steps:

From 55d482725e809f2453032204789949be4fa31d56 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 19:01:28 +0800
Subject: [PATCH 4/5] Update argcheck.py

---
 deepmd/utils/argcheck.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 09814beb59..1809b19083 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2477,6 +2477,10 @@ def linear_ener_model_args() -> Argument:
 
 
 #  --- Learning rate configurations: --- #
+lr_args_plugin = ArgsPlugin()
+
+
+@lr_args_plugin.register("exp")
 def learning_rate_exp() -> list[Argument]:
     doc_start_lr = "The learning rate at the start of the training."
     doc_stop_lr = (
@@ -2509,6 +2513,7 @@ def learning_rate_exp() -> list[Argument]:
     return args
 
 
+@lr_args_plugin.register("cosine", doc=doc_only_pt_supported)
 def learning_rate_cosine() -> list[Argument]:
     """
     Defines a cosine annealing learning rate schedule.
@@ -2531,10 +2536,7 @@ def learning_rate_variant_type_args() -> Variant:
 
     return Variant(
         "type",
-        [
-            Argument("exp", dict, learning_rate_exp()),
-            Argument("cosine", dict, learning_rate_cosine(), doc=doc_only_pt_supported),
-        ],
+        lr_args_plugin.get_all_argument(),
         optional=True,
         default_tag="exp",
         doc=doc_lr,

From d0f3d710e888410f6e976fa27114a1a3c6ace5ea Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 19:22:44 +0800
Subject: [PATCH 5/5] make lr plugin

---
 deepmd/dpmodel/utils/learning_rate.py | 21 ++++++++++++++++++---
 deepmd/pt/train/training.py           | 13 +++----------
 deepmd/pt/utils/learning_rate.py      |  2 ++
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py
index 971dd3391f..f82a42660b 100644
--- a/deepmd/dpmodel/utils/learning_rate.py
+++ b/deepmd/dpmodel/utils/learning_rate.py
@@ -9,8 +9,21 @@
 
 import numpy as np
 
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+
+class BaseLR(ABC, PluginVariant, make_plugin_registry("lr")):
+    def __new__(cls: type, *args: Any, **kwargs: Any) -> Any:
+        if cls is BaseLR:
+            cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__))
+        return super().__new__(cls)
 
-class LearningRateSchedule(ABC):
     def __init__(
         self, start_lr: float, stop_lr: float, stop_steps: int, **kwargs: Any
     ) -> None:
@@ -36,7 +49,8 @@ def value(self, step: int) -> np.float64:
         pass
 
 
-class LearningRateExp(LearningRateSchedule):
+@BaseLR.register("exp")
+class LearningRateExp(BaseLR):
     def __init__(
         self,
         start_lr: float,
@@ -87,7 +101,8 @@ def value(self, step: int) -> np.float64:
         return step_lr
 
 
-class LearningRateCosine(LearningRateSchedule):
+@BaseLR.register("cosine")
+class LearningRateCosine(BaseLR):
     def __init__(
         self,
         start_lr: float,
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 713ee59a23..7d768cf66b 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -63,8 +63,7 @@
     SAMPLER_RECORD,
 )
 from deepmd.pt.utils.learning_rate import (
-    LearningRateCosine,
-    LearningRateExp,
+    BaseLR,
 )
 from deepmd.pt.utils.stat import (
     make_stat_input,
@@ -267,15 +266,9 @@ def get_sample() -> Any:
                     _stat_file_path.root.close()
             return get_sample
 
-        def get_lr(lr_params: dict[str, Any]) -> LearningRateExp:
-            lr_type = lr_params.get("type", "exp")
+        def get_lr(lr_params: dict[str, Any]) -> BaseLR:
             lr_params["stop_steps"] = self.num_steps - self.warmup_steps
-            if lr_type == "exp":
-                lr_schedule = LearningRateExp(**lr_params)
-            elif lr_type == "cosine":
-                lr_schedule = LearningRateCosine(**lr_params)
-            else:
-                raise ValueError(f"Not supported learning rate type '{lr_type}'!")
+            lr_schedule = BaseLR(**lr_params)
             return lr_schedule
 
         # Optimizer
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
index 31ae1c3152..ff7d4f7ec7 100644
--- a/deepmd/pt/utils/learning_rate.py
+++ b/deepmd/pt/utils/learning_rate.py
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from deepmd.dpmodel.utils.learning_rate import (
+    BaseLR,
     LearningRateCosine,
     LearningRateExp,
 )
 
 __all__ = [
+    "BaseLR",
     "LearningRateCosine",
     "LearningRateExp",
 ]