diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index 01b97fd6..30067657 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -3,14 +3,17 @@ from pathlib import Path from typing import Optional -import ase import dpdata import numpy as np from ase.calculators.calculator import Calculator +from ase import Atoms from ase.io import write from tqdm import tqdm from lambench.models.basemodel import BaseLargeAtomModel +from ase.optimize import FIRE +from ase.constraints import FixSymmetry +from ase.filters import FrechetCellFilter class ASEModel(BaseLargeAtomModel): @@ -81,7 +84,9 @@ def evaluate(self, task) -> Optional[dict[str, float]]: return self.run_ase_dptest(self.calc, task.test_data) elif isinstance(task, CalculatorTask): if task.task_name == "nve_md": - from lambench.tasks.calculator.nve_md import run_md_nve_simulation + from lambench.tasks.calculator.nve_md.nve_md import ( + run_md_nve_simulation, + ) num_steps = task.calculator_params.get("num_steps", 1000) timestep = task.calculator_params.get("timestep", 1.0) @@ -91,6 +96,18 @@ def evaluate(self, task) -> Optional[dict[str, float]]: self, num_steps, timestep, temperature_K ) } + elif task.task_name == "phonon_mdr": + from lambench.tasks.calculator.phonon.phonon import ( + run_phonon_simulation, + ) + + task.workdir.mkdir(exist_ok=True) + distance = task.calculator_params.get("distance", 0.01) + return { + "metrics": run_phonon_simulation( + self, task.test_data, distance, task.workdir + ) + } else: raise NotImplementedError(f"Task {task.task_name} is not implemented.") @@ -124,7 +141,7 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict: sys = dpdata.LabeledSystem(filepth, fmt="deepmd/npy") for ls in tqdm(sys, desc="Set", leave=False): # type: ignore for frame in tqdm(ls, desc="Frames", leave=False): - atoms: ase.Atoms = frame.to_ase_structure()[0] # type: ignore + atoms: Atoms = frame.to_ase_structure()[0] # type: ignore atoms.calc = calc # Energy @@ -133,7 +150,9 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict: if not np.isfinite(energy_predict): raise ValueError("Energy prediction is non-finite.") except (ValueError, RuntimeError): - file = Path(f"failed_structures/{calc.name}/{atoms.symbols}.cif") + file = Path( + f"failed_structures/{calc.name}/{atoms.symbols}.cif" + ) file.parent.mkdir(parents=True, exist_ok=True) write(file, atoms) logging.error( @@ -229,3 +248,27 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict: } ) return res + + @staticmethod + def run_ase_relaxation( + atoms: Atoms, + calc: Calculator, + fmax: float = 5e-3, + steps: int = 500, + fix_symmetry: bool = True, + relax_cell: bool = True, + ) -> Optional[Atoms]: + atoms.calc = calc + if fix_symmetry: + atoms.set_constraint(FixSymmetry(atoms)) + if relax_cell: + atoms = FrechetCellFilter(atoms) + opt = FIRE(atoms, trajectory=None, logfile=None) + try: + opt.run(fmax=fmax, steps=steps) + except Exception as e: + logging.error(f"Relaxation failed: {e}") + return None + if relax_cell: + atoms = atoms.atoms + return atoms diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index 6b8d9b66..2643cc2b 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -4,3 +4,7 @@ nve_md: num_steps: 10000 timestep: 1.0 temperature: 300 +phonon_mdr: + test_data: /bohr/lambench-phonon-y7vk/v1/MDR_PBE_phonon + calculator_params: + distance: 0.01 diff --git a/lambench/tasks/calculator/nve_md.py b/lambench/tasks/calculator/nve_md/nve_md.py similarity index 98% rename from lambench/tasks/calculator/nve_md.py rename to lambench/tasks/calculator/nve_md/nve_md.py index df3139a3..e5bb384b 100644 --- a/lambench/tasks/calculator/nve_md.py +++ b/lambench/tasks/calculator/nve_md/nve_md.py @@ -11,7 +11,7 @@ import numpy as np import time from typing import Optional -from lambench.tasks.calculator.nve_md_data import TEST_DATA +from lambench.tasks.calculator.nve_md.nve_md_data import TEST_DATA import logging diff --git a/lambench/tasks/calculator/nve_md_data.py b/lambench/tasks/calculator/nve_md/nve_md_data.py similarity index 100% rename from lambench/tasks/calculator/nve_md_data.py rename to lambench/tasks/calculator/nve_md/nve_md_data.py diff --git a/lambench/tasks/calculator/phonon/phonon.py b/lambench/tasks/calculator/phonon/phonon.py new file mode 100644 index 00000000..a83ae348 --- /dev/null +++ b/lambench/tasks/calculator/phonon/phonon.py @@ -0,0 +1,178 @@ +""" +Code adapted from the following paper and code: + +@misc{loew2024universalmachinelearninginteratomic, + title={Universal Machine Learning Interatomic Potentials are Ready for Phonons}, + author={Antoine Loew and Dewen Sun and Hai-Chen Wang and Silvana Botti and Miguel A. L. Marques}, + year={2024}, + eprint={2412.16551}, + archivePrefix={arXiv}, + primaryClass={cond-mat.mtrl-sci}, + url={https://arxiv.org/abs/2412.16551}, +} +""" + +import logging +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +import phonopy +import yaml +from ase import Atoms +from phonopy.harmonic.dynmat_to_fc import get_commensurate_points +from sklearn.metrics import mean_absolute_error +from tqdm import tqdm + +from lambench.models.ase_models import ASEModel +from lambench.tasks.calculator.phonon.phonon_utils import ( + THz_TO_K, + ase_to_phonopy_atoms, + phonopy_to_ase_atoms, +) + + +def run_phonon_simulation_single( + model: ASEModel, + phonon_file: Path, + distance: float, + workdir: Path, +) -> Optional[dict[str, float]]: + """ + Run phonon related calculations for a single given phonon file. + + Parameters: + model: ASEModel object. + phonon_file: Path to the phonon file. + distance: Distance for displacements. + workdir: Path to the working directory. + """ + try: + # Step 1: Run relaxation + atoms: Atoms = phonopy_to_ase_atoms(phonon_file) + atoms = model.run_ase_relaxation(atoms, model.calc, fmax=1e-3) + + # Step 2: Convert ASE Atoms object to PhonopyAtoms object + phonon_atoms = ase_to_phonopy_atoms(atoms) + phonon = phonopy.Phonopy( + phonon_atoms, supercell_matrix=atoms.info["supercell_matrix"] + ) + + # Step 3: Generate displacements + phonon.generate_displacements(distance=distance, is_diagonal=False) + + # Step 4: Calculate force constants + forcesets = [] + + for frame in phonon.supercells_with_displacements: + frame_atom = Atoms( + cell=frame.cell, + symbols=frame.symbols, + scaled_positions=frame.scaled_positions, + pbc=True, + ) + frame_atom.calc = model.calc + forces = frame_atom.get_forces() + forcesets.append(forces) + + phonon.forces = forcesets + phonon.produce_force_constants() + phonon.symmetrize_force_constants() + + # Step 5: save output files + + phonon.save(workdir / phonon_file.name, settings={"force_constants": True}) + + # Step 6: Calculate thermal properties + phonon.init_mesh() + phonon.run_mesh() + phonon.run_thermal_properties(temperatures=(300,)) + thermal_dict = phonon.get_thermal_properties_dict() + + commensurate_q = get_commensurate_points(phonon.supercell_matrix) + phonon_freqs = np.array([phonon.get_frequencies(q) for q in commensurate_q]) + + # Step 7: Updata output files + with open(workdir / phonon_file.name, "r") as f: + output = yaml.load(f, yaml.FullLoader) + + output["free_e"] = thermal_dict["free_energy"].tolist() + output["entropy"] = thermal_dict["entropy"].tolist() + output["heat_capacity"] = thermal_dict["heat_capacity"].tolist() + output["phonon_freq"] = phonon_freqs.tolist() + + # TODO: optional: update and save output files + return { + "mp_id": phonon_file.name.split(".")[0], + "entropy": output["entropy"][0], + "heat_capacity": output["heat_capacity"][0], + "free_energy": output["free_e"][0], + "max_freq": np.max(np.array(phonon_freqs)) * THz_TO_K, + } + + except Exception as e: + logging.error(f"Error occured for {str(phonon_file.name)}: {e}") + return None + + +def run_phonon_simulation( + model: ASEModel, + test_data: Path, + distance: float, + workdir: Path, +) -> dict[str, float]: + """ + This function runs phonon simulations for a list of test systems using the given model. + """ + test_files = list(test_data.glob("*.yaml.bz2")) + if len(test_files) == 0: + logging.error("No test files found.") + return {} + logging.info(f"Running phonon simulations for {len(test_files)} files...") + + dataframe_rows = [] + for test_file in tqdm(test_files): + result = run_phonon_simulation_single( + model, + test_file, + distance, + workdir, + ) + logging.info(f"Simulation completed for system {str(test_file.name)}.\n") + + if result is not None: + dataframe_rows.append(result) + preds = pd.DataFrame(dataframe_rows) + + # Post-processing + results = {} + try: + labels = pd.read_csv(test_data / "pbe.csv") + TOTAL_RECORDS = len(labels) + preds.sort_values("mp_id", inplace=True) + labels.sort_values("mp_id", inplace=True) + + # Filter predictions and labels based on valid mp_ids + valid_preds = preds[ + np.isfinite(preds[["free_energy", "heat_capacity"]]).all(axis=1) + ] + valid_mp_ids = set(valid_preds["mp_id"]) + labels = labels[labels["mp_id"].isin(valid_mp_ids)] + preds = valid_preds + + success_rate = len(preds) / TOTAL_RECORDS + mae_wmax = mean_absolute_error(labels["max_freq"], preds["max_freq"]) + mae_s = mean_absolute_error(labels["entropy"], preds["entropy"]) + mae_f = mean_absolute_error(labels["free_energy"], preds["free_energy"]) + mae_c = mean_absolute_error(labels["heat_capacity"], preds["heat_capacity"]) + results = { + "success_rate": success_rate, + "mae_max_freq": mae_wmax, + "mae_entropy": mae_s, + "mae_free_energy": mae_f, + "mae_heat_capacity": mae_c, + } + except Exception as e: + logging.error(f"Error occured during post-processing: {e}") + return results diff --git a/lambench/tasks/calculator/phonon/phonon_utils.py b/lambench/tasks/calculator/phonon/phonon_utils.py new file mode 100644 index 00000000..e96d3bd9 --- /dev/null +++ b/lambench/tasks/calculator/phonon/phonon_utils.py @@ -0,0 +1,35 @@ +from ase import Atoms +from phonopy.structure.atoms import PhonopyAtoms +from pathlib import Path +import phonopy + + +# Constants unit conversion +THz_TO_K = 47.9924 + + +def ase_to_phonopy_atoms(atoms: Atoms) -> PhonopyAtoms: + """ + Convert ASE Atoms object to PhonopyAtoms object. + """ + # Extract atomic symbols and positions + symbols = atoms.get_chemical_symbols() + positions = atoms.get_positions() + cell = atoms.get_cell() + masses = atoms.get_masses() + + return PhonopyAtoms(symbols=symbols, positions=positions, cell=cell, masses=masses) + + +def phonopy_to_ase_atoms(phonon_file: Path) -> Atoms: + """ + Convert PhonopyAtoms object to ASE Atoms object. + """ + phonon = phonopy.load(phonon_file) + return Atoms( + cell=phonon.unitcell.cell, + symbols=phonon.unitcell.symbols, + scaled_positions=phonon.unitcell.scaled_positions, + pbc=True, + info={"supercell_matrix": phonon.supercell_matrix}, + ) diff --git a/pyproject.toml b/pyproject.toml index 9de47ee5..3c01e517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ orb = ["orb-models","pynanoflann@git+https://github.com/dwastberg/pynanoflann#eg sevenn = ["sevenn"] test = ["pytest"] dflow = ["pydflow", "lbg"] +phonopy = ["phonopy@git+https://github.com/phonopy/phonopy.git", "scikit-learn"] + [project.urls] Homepage = "https://github.com/deepmodeling/LAMBench" diff --git a/test/tasks/calculator/test_nve_md.py b/test/tasks/calculator/test_nve_md.py index 9c0d3cb0..42b0afb6 100644 --- a/test/tasks/calculator/test_nve_md.py +++ b/test/tasks/calculator/test_nve_md.py @@ -1,4 +1,4 @@ -from lambench.tasks.calculator.nve_md import ( +from lambench.tasks.calculator.nve_md.nve_md import ( nve_simulation_single, run_md_nve_simulation, )