From 623e71896488bed317ac32ee330d7a4b27a915c1 Mon Sep 17 00:00:00 2001 From: njzjz-bot Date: Sun, 15 Mar 2026 03:48:41 +0000 Subject: [PATCH 1/5] docs(skills): add dpdata driver and plugin skills --- skills/dpdata-driver/SKILL.md | 135 ++++++++++++++++++++++++++++++++++ skills/dpdata-plugin/SKILL.md | 112 ++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 skills/dpdata-driver/SKILL.md create mode 100644 skills/dpdata-plugin/SKILL.md diff --git a/skills/dpdata-driver/SKILL.md b/skills/dpdata-driver/SKILL.md new file mode 100644 index 00000000..b0c6e0ce --- /dev/null +++ b/skills/dpdata-driver/SKILL.md @@ -0,0 +1,135 @@ +--- +name: dpdata-driver +description: Use dpdata Python Driver plugins to label systems (energies/forces/virials) via System.predict(), list available drivers, and build Driver objects (ase/deepmd/gaussian/sqm/hybrid). Use when working with dpdata Python API (not CLI) and you need driver-based energy/force prediction, plugin registration keys, or examples of using dpdata with ASE calculators or DeePMD models. +--- + +# dpdata-driver + +Use dpdata “driver plugins” to **label** a `dpdata.System` (predict energies/forces/virials) and obtain a `dpdata.LabeledSystem`. + +## Key idea + +- A **Driver** converts an unlabeled `System` into a `LabeledSystem` by computing: + - `energies` (required) + - `forces` (optional but common) + - `virials` (optional) + +In dpdata, this is exposed as: + +- `System.predict(*args, driver="dp", **kwargs) -> LabeledSystem` + +`driver` can be: + +- a **string key** (plugin name), e.g. `"ase"`, `"dp"`, `"gaussian"` +- a **Driver object**, e.g. `Driver.get_driver("ase")(...)` + +## List supported driver keys (runtime) + +When unsure what drivers exist in *this* dpdata version/env, query them at runtime: + +```python +from dpdata.driver import Driver +print(sorted(Driver.get_drivers().keys())) +``` + +In the current repo state, keys include: + +- `ase` +- `dp` / `deepmd` / `deepmd-kit` +- `gaussian` +- `sqm` +- `hybrid` + +(Exact set depends on dpdata version and installed extras.) + +## Minimal workflow + +```python +import dpdata +from dpdata.system import System + +sys = System("input.xyz", fmt="xyz") +ls = sys.predict(driver="ase", calculator=...) # returns dpdata.LabeledSystem +``` + +### Verify you got a labeled system + +```python +assert "energies" in ls.data +# optional: +# assert "forces" in ls.data +# assert "virials" in ls.data +``` + +## Example: use the ASE driver with an ASE calculator (runnable) + +This is the easiest *fully runnable* example because it doesn’t require external QM software. + +Dependencies (recommended): use `uv`: + +```bash +uv run --with numpy --with ase python3 your_script.py +``` + +Script: + +```python +import numpy as np +from ase.calculators.emt import EMT +from dpdata.system import System + +# write a tiny molecule +open("tmp.xyz", "w").write( + """2\n\nH 0 0 0\nH 0 0 0.74\n""" +) + +sys = System("tmp.xyz", fmt="xyz") +ls = sys.predict(driver="ase", calculator=EMT()) + +print("energies", np.array(ls.data["energies"])) +print("forces shape", np.array(ls.data["forces"]).shape) +print("virials shape", np.array(ls.data["virials"]).shape) +``` + +## Example: pass a Driver object instead of a string + +```python +from ase.calculators.emt import EMT +from dpdata.driver import Driver +from dpdata.system import System + +sys = System("tmp.xyz", fmt="xyz") +ase_driver = Driver.get_driver("ase")(calculator=EMT()) +ls = sys.predict(driver=ase_driver) +``` + +## Hybrid driver + +Use `driver="hybrid"` to sum energies/forces/virials from multiple drivers. + +The `HybridDriver` accepts `drivers=[ ... ]` where each item is either: + +- a `Driver` instance +- a dict like `{"type": "sqm", ...}` (type is the driver key) + +Example (structure only; may require external executables): + +```python +from dpdata.driver import Driver + +hyb = Driver.get_driver("hybrid")( + drivers=[ + {"type": "sqm", "qm_theory": "DFTB3"}, + {"type": "dp", "dp": "frozen_model.pb"}, + ] +) +# ls = sys.predict(driver=hyb) +``` + +## Notes / gotchas + +- Many drivers require extra dependencies or external programs: + - `dp` requires `deepmd-kit` + a model file + - `gaussian` requires Gaussian and a valid executable (default `g16`) + - `sqm` requires AmberTools `sqm` +- If you just need file format conversion, use the existing **dpdata CLI** skill instead. diff --git a/skills/dpdata-plugin/SKILL.md b/skills/dpdata-plugin/SKILL.md new file mode 100644 index 00000000..744d31bc --- /dev/null +++ b/skills/dpdata-plugin/SKILL.md @@ -0,0 +1,112 @@ +--- +name: dpdata-plugin +description: Create and install dpdata plugins (especially custom Format readers/writers) using Format.register(...) and pyproject.toml entry_points under 'dpdata.plugins'. Use when extending dpdata with new formats or distributing plugins as separate Python packages. +--- + +# dpdata-plugin + +dpdata loads plugins in two ways: + +1) **Built-in plugins** in `dpdata.plugins.*` (imported automatically) +2) **External plugins** exposed via Python package entry points: `dpdata.plugins` + +This skill focuses on **external plugin packages**, the recommended way to add new formats without modifying dpdata itself. + +## What can be extended? + +Most commonly: add a new **Format** (file reader/writer) via: + +```python +from dpdata.format import Format + +@Format.register("myfmt") +class MyFormat(Format): + ... +``` + +## How dpdata discovers plugins + +dpdata imports `dpdata.plugins` during normal use (e.g. `dpdata.system` imports it). That module: + +- imports every built-in module in `dpdata/plugins/*.py` +- then loads all **entry points** in group `dpdata.plugins` + +So an external plugin package only needs to ensure that importing the entry-point target triggers the `@Format.register(...)` side effects. + +## Minimal external plugin package (based on plugin_example/) + +### 1) Create a new Python package + +Example layout: + +``` +dpdata_random/ + pyproject.toml + dpdata_random/ + __init__.py +``` + +### 2) Implement and register your Format + +In `dpdata_random/__init__.py` (shortened example): + +```python +from __future__ import annotations + +import numpy as np +from dpdata.format import Format + +@Format.register("random") +class RandomFormat(Format): + def from_system(self, N, **kwargs): + return { + "atom_numbs": [20], + "atom_names": ["X"], + "atom_types": [0] * 20, + "cells": np.repeat(np.eye(3)[None, ...], N, axis=0) * 100.0, + "coords": np.random.rand(N, 20, 3) * 100.0, + "orig": np.zeros(3), + "nopbc": False, + } +``` + +Return dicts must match dpdata’s expected schema (cells/coords/atom_names/atom_types/...). + +### 3) Expose an entry point + +In `pyproject.toml`: + +```toml +[project] +name = "dpdata_random" +version = "0.0.0" +dependencies = ["numpy", "dpdata"] + +[project.entry-points.'dpdata.plugins'] +random = "dpdata_random:RandomFormat" +``` + +Any importable target works; this pattern points directly at the class. + +### 4) Install and test + +In a clean env (recommended via `uv`): + +```bash +uv run --with dpdata --with numpy python3 - <<'PY' +import dpdata +from dpdata.format import Format + +# importing dpdata will load entry points (dpdata.plugins) +print('random' in Format.get_formats()) +PY +``` + +If it prints `True`, your plugin was discovered. + +## Debug checklist + +- Did you install the plugin package into the same environment where you run dpdata? +- Does `pyproject.toml` contain `[project.entry-points.'dpdata.plugins']`? +- Does importing the entry point module/class execute the `@Format.register(...)` decorator? +- If using `uv run`, remember each command runs in its own environment unless you’re in a `uv` project (or you rely on `uv run --with ...`). From 388a09938906830790a262a9d92ab042c7eb4f1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 15 Mar 2026 03:50:31 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- skills/dpdata-driver/SKILL.md | 5 ++--- skills/dpdata-plugin/SKILL.md | 9 +++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/skills/dpdata-driver/SKILL.md b/skills/dpdata-driver/SKILL.md index b0c6e0ce..6da54a62 100644 --- a/skills/dpdata-driver/SKILL.md +++ b/skills/dpdata-driver/SKILL.md @@ -29,6 +29,7 @@ When unsure what drivers exist in *this* dpdata version/env, query them at runti ```python from dpdata.driver import Driver + print(sorted(Driver.get_drivers().keys())) ``` @@ -79,9 +80,7 @@ from ase.calculators.emt import EMT from dpdata.system import System # write a tiny molecule -open("tmp.xyz", "w").write( - """2\n\nH 0 0 0\nH 0 0 0.74\n""" -) +open("tmp.xyz", "w").write("""2\n\nH 0 0 0\nH 0 0 0.74\n""") sys = System("tmp.xyz", fmt="xyz") ls = sys.predict(driver="ase", calculator=EMT()) diff --git a/skills/dpdata-plugin/SKILL.md b/skills/dpdata-plugin/SKILL.md index 744d31bc..02d3bf3e 100644 --- a/skills/dpdata-plugin/SKILL.md +++ b/skills/dpdata-plugin/SKILL.md @@ -7,8 +7,8 @@ description: Create and install dpdata plugins (especially custom Format readers dpdata loads plugins in two ways: -1) **Built-in plugins** in `dpdata.plugins.*` (imported automatically) -2) **External plugins** exposed via Python package entry points: `dpdata.plugins` +1. **Built-in plugins** in `dpdata.plugins.*` (imported automatically) +1. **External plugins** exposed via Python package entry points: `dpdata.plugins` This skill focuses on **external plugin packages**, the recommended way to add new formats without modifying dpdata itself. @@ -19,9 +19,9 @@ Most commonly: add a new **Format** (file reader/writer) via: ```python from dpdata.format import Format + @Format.register("myfmt") -class MyFormat(Format): - ... +class MyFormat(Format): ... ``` ## How dpdata discovers plugins @@ -56,6 +56,7 @@ from __future__ import annotations import numpy as np from dpdata.format import Format + @Format.register("random") class RandomFormat(Format): def from_system(self, N, **kwargs): From bbc6c86cffaace7611ba87c823d03fb931f1e83e Mon Sep 17 00:00:00 2001 From: njzjz-bot Date: Sun, 15 Mar 2026 03:50:35 +0000 Subject: [PATCH 3/5] chore: ignore uv artifacts --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 7fd27471..645b6210 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,7 @@ tests/data_*.h5 tests/data_*/ tests/tmp.* tests/.coverage + +# local dev artifact +uv.lock +.venv/ From 0da05246c5f68c122c5c7f2e090cc812c4019e86 Mon Sep 17 00:00:00 2001 From: njzjz-bot Date: Sun, 15 Mar 2026 03:59:40 +0000 Subject: [PATCH 4/5] docs(skills): fix code fence language tag --- skills/dpdata-plugin/SKILL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/dpdata-plugin/SKILL.md b/skills/dpdata-plugin/SKILL.md index 02d3bf3e..1ba8de00 100644 --- a/skills/dpdata-plugin/SKILL.md +++ b/skills/dpdata-plugin/SKILL.md @@ -39,7 +39,7 @@ So an external plugin package only needs to ensure that importing the entry-poin Example layout: -``` +```text dpdata_random/ pyproject.toml dpdata_random/ From 85c142766231f79dbbec2ad9512f59d0ada36b48 Mon Sep 17 00:00:00 2001 From: njzjz-bot Date: Sun, 15 Mar 2026 04:02:22 +0000 Subject: [PATCH 5/5] docs(skills): address review comments --- skills/dpdata-driver/SKILL.md | 14 +++++++++++--- skills/dpdata-plugin/SKILL.md | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/skills/dpdata-driver/SKILL.md b/skills/dpdata-driver/SKILL.md index 6da54a62..132c36c7 100644 --- a/skills/dpdata-driver/SKILL.md +++ b/skills/dpdata-driver/SKILL.md @@ -66,12 +66,17 @@ assert "energies" in ls.data This is the easiest *fully runnable* example because it doesn’t require external QM software. -Dependencies (recommended): use `uv`: +Dependencies (recommended): use `uv`. + +Option A (one-off invocation): ```bash -uv run --with numpy --with ase python3 your_script.py +uv run --with dpdata --with numpy --with ase python3 your_script.py ``` +Option B (recommended for shareable scripts): declare dependencies in the script via inline metadata, then run `uv run script.py`. +See: https://docs.astral.sh/uv/guides/scripts/#inline-metadata + Script: ```python @@ -87,7 +92,10 @@ ls = sys.predict(driver="ase", calculator=EMT()) print("energies", np.array(ls.data["energies"])) print("forces shape", np.array(ls.data["forces"]).shape) -print("virials shape", np.array(ls.data["virials"]).shape) +if "virials" in ls.data: + print("virials shape", np.array(ls.data["virials"]).shape) +else: + print("virials: ") ``` ## Example: pass a Driver object instead of a string diff --git a/skills/dpdata-plugin/SKILL.md b/skills/dpdata-plugin/SKILL.md index 1ba8de00..ef9dba2f 100644 --- a/skills/dpdata-plugin/SKILL.md +++ b/skills/dpdata-plugin/SKILL.md @@ -63,7 +63,7 @@ class RandomFormat(Format): return { "atom_numbs": [20], "atom_names": ["X"], - "atom_types": [0] * 20, + "atom_types": np.zeros(20, dtype=int), "cells": np.repeat(np.eye(3)[None, ...], N, axis=0) * 100.0, "coords": np.random.rand(N, 20, 3) * 100.0, "orig": np.zeros(3),