diff --git a/dev/ase_test_interface.py b/dev/ase_test_interface.py index cf873a5c..90be24ef 100644 --- a/dev/ase_test_interface.py +++ b/dev/ase_test_interface.py @@ -6,9 +6,10 @@ from pathlib import Path from lamstare.infra.ood_database import OODRecord + def run_ase_dptest( - calc, - testpath: str, + calc, + testpath: str, ): """ Given a ASE calculator and a test path, run ASE dptest and return the results. @@ -25,22 +26,16 @@ def run_ase_dptest( virial_err_per_atom = [] max_ele_num = 120 - systems = [] - for path in testpath: - systems.extend(glob.glob(f"{path}/*")) - # check if the system is mixed type - if len(glob.glob(systems[0] + '/**/real_atom_types.npy', recursive=True)) == 0: - mix_type = False - else: - mix_type = True - + systems = [i.parent for i in testpath.rglob("type_map.raw")] + assert systems, f"No systems found in the test data {testpath}." + mix_type = any(systems[0].rglob("real_atom_types.npy")) for filepth in systems: if mix_type: sys = dpdata.MultiSystems() - sys.load_systems_from_file(filepth, fmt='deepmd/npy/mixed') + sys.load_systems_from_file(filepth, fmt="deepmd/npy/mixed") else: - sys = dpdata.LabeledSystem(filepth, fmt='deepmd/npy') + sys = dpdata.LabeledSystem(filepth, fmt="deepmd/npy") for ls in sys: for frame in ls: @@ -57,46 +52,58 @@ def run_ase_dptest( energy_lab.append(frame.data["energies"]) energy_err.append(energy_predict - frame.data["energies"]) force_err.append(frame.data["forces"].squeeze(0) - np.array(ff)) - energy_err_per_atom.append(energy_err[-1]/force_err[-1].shape[0]) + energy_err_per_atom.append(energy_err[-1] / force_err[-1].shape[0]) try: stress = atoms.get_stress() - stress_tensor = - np.array( - [[stress[0],stress[5],stress[4]], - [stress[5],stress[1],stress[3]], - [stress[4],stress[3], stress[2]]] - )* atoms.get_volume() - virial_err.append(frame.data['virials'] - stress_tensor) - virial_err_per_atom.append(virial_err[-1]/force_err[-1].shape[0]) + stress_tensor = ( + -np.array( + [ + [stress[0], stress[5], stress[4]], + [stress[5], stress[1], stress[3]], + [stress[4], stress[3], stress[2]], + ] + ) + * atoms.get_volume() + ) + virial_err.append(frame.data["virials"] - stress_tensor) + virial_err_per_atom.append( + virial_err[-1] / force_err[-1].shape[0] + ) except: pass else: pass - - + atom_num = np.array(atom_num) energy_err = np.array(energy_err) energy_pre = np.array(energy_pre) energy_lab = np.array(energy_lab) shift_bias, _, _, _ = np.linalg.lstsq(atom_num, energy_err, rcond=1e-10) - unbiased_energy = energy_pre - (atom_num @ shift_bias.reshape(max_ele_num, -1)).reshape(-1) - energy_lab.squeeze() + unbiased_energy = ( + energy_pre + - (atom_num @ shift_bias.reshape(max_ele_num, -1)).reshape(-1) + - energy_lab.squeeze() + ) unbiased_energy_err_per_a = unbiased_energy / atom_num.sum(-1) - res = { - "Energy MAE": [np.mean(np.abs(np.stack(unbiased_energy)))], - "Energy RMSE": [np.sqrt(np.mean(np.square(unbiased_energy)))], - "Energy MAE/Natoms": [np.mean(np.abs(np.stack(unbiased_energy_err_per_a)))], - "Energy RMSE/Natoms": [np.sqrt(np.mean(np.square(unbiased_energy_err_per_a)))], - "Force MAE": [np.mean(np.abs(np.concatenate(force_err)))], - "Force RMSE": [np.sqrt(np.mean(np.square(np.concatenate(force_err))))], + "Energy MAE": [np.mean(np.abs(np.stack(unbiased_energy)))], + "Energy RMSE": [np.sqrt(np.mean(np.square(unbiased_energy)))], + "Energy MAE/Natoms": [np.mean(np.abs(np.stack(unbiased_energy_err_per_a)))], + "Energy RMSE/Natoms": [np.sqrt(np.mean(np.square(unbiased_energy_err_per_a)))], + "Force MAE": [np.mean(np.abs(np.concatenate(force_err)))], + "Force RMSE": [np.sqrt(np.mean(np.square(np.concatenate(force_err))))], } if virial_err_per_atom != []: res.update( { - "Virial MAE": [np.mean(np.abs(np.stack(virial_err)))], - "Virial RMSE": [np.sqrt(np.mean(np.square(np.stack(virial_err))))], - "Virial MAE/Natoms": [np.mean(np.abs(np.stack(virial_err_per_atom)))], - "Virial RMSE/Natoms": [np.sqrt(np.mean(np.square(np.stack(virial_err_per_atom))))]} + "Virial MAE": [np.mean(np.abs(np.stack(virial_err)))], + "Virial RMSE": [np.sqrt(np.mean(np.square(np.stack(virial_err))))], + "Virial MAE/Natoms": [np.mean(np.abs(np.stack(virial_err_per_atom)))], + "Virial RMSE/Natoms": [ + np.sqrt(np.mean(np.square(np.stack(virial_err_per_atom)))) + ], + } ) return res @@ -107,9 +114,11 @@ def ase_test(model_name, testpath_mapping): """ if model_name == "DP": from deepmd.calculator import DP + CALC = DP("/path/to/your/model.pth") elif model_name == "MACE": from mace.calculators import mace_mp + CALC = mace_mp(model="medium", device="cuda", default_dtype="float64") else: raise ValueError(f"Model {model_name} not supported.") @@ -120,10 +129,10 @@ def ase_test(model_name, testpath_mapping): with open(f"{ood_name}_dptest.json", "w") as f: json.dump(head_dptest_res, f, indent=4) - + if __name__ == "__main__": mapping = { "head_A": ["/path/to/your/test_data"], "head_B": ["/path/to/your/test_data"], } - ase_test("DP", mapping) \ No newline at end of file + ase_test("DP", mapping) diff --git a/lambench/metrics/direct_tasks_metrics.yml b/lambench/metrics/direct_tasks_metrics.yml index 35d60fe2..5088276d 100644 --- a/lambench/metrics/direct_tasks_metrics.yml +++ b/lambench/metrics/direct_tasks_metrics.yml @@ -2,59 +2,104 @@ ANI: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.19852160741116331 + dataset_lstsq_std_force: 2.1346504447862467 + dataset_lstsq_std_virial: null HEA25_S: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.4030134885402307 + dataset_lstsq_std_force: 1.547935906005218 + dataset_lstsq_std_virial: null HEA25_bulk: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.3915534935785154 + dataset_lstsq_std_force: 2.0282929631352595 + dataset_lstsq_std_virial: null HEMC_HEMB: energy_weight: 1.0 force_weight: 1.0 virial_weight: 1.0 + dataset_lstsq_std_energy: 0.4747620729960006 + dataset_lstsq_std_force: 1.8364064815279475 + dataset_lstsq_std_virial: 0.46794153820403517 MD22: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.007933722948473606 + dataset_lstsq_std_force: 1.1408107903449518 + dataset_lstsq_std_virial: null Collision: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.46905055129776263 + dataset_lstsq_std_force: 1.9436163958502828 + dataset_lstsq_std_virial: null H_nature_2022: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.3866962525278814 + dataset_lstsq_std_force: 2.038498926750271 + dataset_lstsq_std_virial: null REANN_CO2_Ni100: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.007410380917403982 + dataset_lstsq_std_force: 0.6333957228116258 + dataset_lstsq_std_virial: null NequIP_NC_2022: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.006640637887476306 + dataset_lstsq_std_force: 0.9421629142611048 + dataset_lstsq_std_virial: null AIMD-Chig: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.012594134644011467 + dataset_lstsq_std_force: 0.8599492186937476 + dataset_lstsq_std_virial: null CGM_MLP_NC2023: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.9985195007897916 + dataset_lstsq_std_force: 2.0217020563415375 + dataset_lstsq_std_virial: null Cu_MgO_catalysts: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.014304435776268616 + dataset_lstsq_std_force: 0.18909695700558055 + dataset_lstsq_std_virial: null Subalex_9k: energy_weight: 1.0 force_weight: 1.0 virial_weight: null + dataset_lstsq_std_energy: 0.7749643377228373 + dataset_lstsq_std_force: 1.1503770816187873 + dataset_lstsq_std_virial: 0.8678699239404155 WBM_downsampled: energy_weight: 1.0 force_weight: null virial_weight: null + dataset_lstsq_std_energy: 0.374831692072961 + dataset_lstsq_std_force: null + dataset_lstsq_std_virial: null Torsionnet500: energy_weight: 1.0 force_weight: 1.0 virial_weight: 1.0 + dataset_lstsq_std_energy: 0.03387935785428199 + dataset_lstsq_std_force: 0.05088788030453556 + dataset_lstsq_std_virial: null diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index f4ebdcd2..afdb312c 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -9,11 +9,14 @@ from lambench.tasks.direct.direct_predict import DirectPredictTask + class ASEModel(BaseLargeAtomModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.model_type != "ASE": - raise ValueError(f"Model type {self.model_type} is not supported by ASEModel") + raise ValueError( + f"Model type {self.model_type} is not supported by ASEModel" + ) def evaluate(self, task: DirectPredictTask) -> Optional[dict[str, float]]: if task.target_name != "standard": @@ -22,24 +25,30 @@ def evaluate(self, task: DirectPredictTask) -> Optional[dict[str, float]]: else: if self.model_id.lower().startswith("mace"): from mace.calculators import mace_mp + CALC = mace_mp(model="medium", device="cuda", default_dtype="float64") elif self.model_id.lower().startswith("orb"): from orb_models.forcefield import pretrained from orb_models.forcefield.calculator import ORBCalculator - orbff = pretrained.orb_v2(device="cuda") # orb-v2-20241011.ckpt + + orbff = pretrained.orb_v2(device="cuda") # orb-v2-20241011.ckpt CALC = ORBCalculator(orbff, device="cuda") elif self.model_id.lower().startswith("7net"): from sevenn.sevennet_calculator import SevenNetCalculator - CALC = SevenNetCalculator("7net-0_11July2024", device='cuda') + + CALC = SevenNetCalculator("7net-0_11July2024", device="cuda") elif self.model_id.lower().startswith("eqv2"): from fairchem.core import OCPCalculator + CALC = OCPCalculator( - checkpoint_path="eqV2_153M_omat_mp_salex.pt", - cpu=False + checkpoint_path="eqV2_153M_omat_mp_salex.pt", cpu=False ) elif self.model_id.lower().startswith("mattersim"): from mattersim.forcefield import MatterSimCalculator - CALC = MatterSimCalculator(load_path="MatterSim-v1.0.0-5M.pth", device="cuda") + + CALC = MatterSimCalculator( + load_path="MatterSim-v1.0.0-5M.pth", device="cuda" + ) elif self.model_id.lower().startswith("dp"): logging.error("Please use DPModel for DP models.") return @@ -66,13 +75,12 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict: assert systems, f"No systems found in the test data {test_data}." mix_type = any(systems[0].rglob('real_atom_types.npy')) - for filepth in systems: if mix_type: sys = dpdata.MultiSystems() - sys.load_systems_from_file(filepth, fmt='deepmd/npy/mixed') + sys.load_systems_from_file(filepth, fmt="deepmd/npy/mixed") else: - sys = dpdata.LabeledSystem(filepth, fmt='deepmd/npy') + sys = dpdata.LabeledSystem(filepth, fmt="deepmd/npy") for ls in sys: for frame in ls: @@ -83,50 +91,72 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict: energy_predict = np.array(atoms.get_potential_energy()) if not np.isnan(energy_predict): atomic_numbers = atoms.get_atomic_numbers() - atom_num.append(np.bincount(atomic_numbers, minlength=max_ele_num)) + atom_num.append( + np.bincount(atomic_numbers, minlength=max_ele_num) + ) energy_pre.append(energy_predict) energy_lab.append(frame.data["energies"]) energy_err.append(energy_predict - frame.data["energies"]) # TODO: handle the datasets without force labels - force_err.append(frame.data["forces"].squeeze(0) - np.array(force_pred)) - energy_err_per_atom.append(energy_err[-1]/force_err[-1].shape[0]) + force_err.append( + frame.data["forces"].squeeze(0) - np.array(force_pred) + ) + energy_err_per_atom.append( + energy_err[-1] / force_err[-1].shape[0] + ) try: stress = atoms.get_stress() - stress_tensor = - np.array( - [[stress[0],stress[5],stress[4]], - [stress[5],stress[1],stress[3]], - [stress[4],stress[3], stress[2]]] - )* atoms.get_volume() - virial_err.append(frame.data['virials'] - stress_tensor) - virial_err_per_atom.append(virial_err[-1]/force_err[-1].shape[0]) + stress_tensor = ( + -np.array( + [ + [stress[0], stress[5], stress[4]], + [stress[5], stress[1], stress[3]], + [stress[4], stress[3], stress[2]], + ] + ) + * atoms.get_volume() + ) + virial_err.append(frame.data["virials"] - stress_tensor) + virial_err_per_atom.append( + virial_err[-1] / force_err[-1].shape[0] + ) except: pass # no virial in the data - atom_num = np.array(atom_num) energy_err = np.array(energy_err) energy_pre = np.array(energy_pre) energy_lab = np.array(energy_lab) shift_bias, _, _, _ = np.linalg.lstsq(atom_num, energy_err, rcond=1e-10) - unbiased_energy = energy_pre - (atom_num @ shift_bias.reshape(max_ele_num, -1)).reshape(-1) - energy_lab.squeeze() + unbiased_energy = ( + energy_pre + - (atom_num @ shift_bias.reshape(max_ele_num, -1)).reshape(-1) + - energy_lab.squeeze() + ) unbiased_energy_err_per_a = unbiased_energy / atom_num.sum(-1) - res = { - "Energy MAE": [np.mean(np.abs(np.stack(unbiased_energy)))], - "Energy RMSE": [np.sqrt(np.mean(np.square(unbiased_energy)))], - "Energy MAE/Natoms": [np.mean(np.abs(np.stack(unbiased_energy_err_per_a)))], - "Energy RMSE/Natoms": [np.sqrt(np.mean(np.square(unbiased_energy_err_per_a)))], - "Force MAE": [np.mean(np.abs(np.concatenate(force_err)))], - "Force RMSE": [np.sqrt(np.mean(np.square(np.concatenate(force_err))))], + "Energy MAE": [np.mean(np.abs(np.stack(unbiased_energy)))], + "Energy RMSE": [np.sqrt(np.mean(np.square(unbiased_energy)))], + "Energy MAE/Natoms": [np.mean(np.abs(np.stack(unbiased_energy_err_per_a)))], + "Energy RMSE/Natoms": [ + np.sqrt(np.mean(np.square(unbiased_energy_err_per_a))) + ], + "Force MAE": [np.mean(np.abs(np.concatenate(force_err)))], + "Force RMSE": [np.sqrt(np.mean(np.square(np.concatenate(force_err))))], } if virial_err_per_atom != []: res.update( { - "Virial MAE": [np.mean(np.abs(np.stack(virial_err)))], - "Virial RMSE": [np.sqrt(np.mean(np.square(np.stack(virial_err))))], - "Virial MAE/Natoms": [np.mean(np.abs(np.stack(virial_err_per_atom)))], - "Virial RMSE/Natoms": [np.sqrt(np.mean(np.square(np.stack(virial_err_per_atom))))]} + "Virial MAE": [np.mean(np.abs(np.stack(virial_err)))], + "Virial RMSE": [np.sqrt(np.mean(np.square(np.stack(virial_err))))], + "Virial MAE/Natoms": [ + np.mean(np.abs(np.stack(virial_err_per_atom))) + ], + "Virial RMSE/Natoms": [ + np.sqrt(np.mean(np.square(np.stack(virial_err_per_atom)))) + ], + } ) return res diff --git a/requirements.txt b/requirements.txt index fc636400..3ed91b3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ certifi==2024.8.30 charset-normalizer==3.4.0 contourpy==1.3.1 cycler==0.12.1 -dpdata==0.2.21 fonttools==4.55.3 h5py==3.12.1 idna==3.10 @@ -53,3 +52,4 @@ uncertainties==3.2.2 urllib3==2.2.3 wcmatch==10.0 -e git+https://github.com/iProzd/deepmd-kit.git@1224_dpa3_alpha_mulg1mess#egg=deepmd-kit +-e git+https://github.com/anyangml/dpdata.git@feat/set-force-label-optional#egg=dpdata