From 76d988f653eac91ffc1201202ce530c70bdcc2e6 Mon Sep 17 00:00:00 2001 From: Qianyu Zheng Date: Sun, 19 Nov 2023 20:26:46 -0800 Subject: [PATCH 1/6] Remove dependence of calculator on trainer --- configs/config_calculator.yml | 87 +--------- matdeeplearn/common/ase_utils.py | 190 +++++++++++++++++--- matdeeplearn/models/__init__.py | 4 +- matdeeplearn/trainers/property_trainer.py | 24 --- scripts/optimization.py | 201 ++++++++++++++++++++++ 5 files changed, 378 insertions(+), 128 deletions(-) create mode 100644 scripts/optimization.py diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml index 64ba8b7e..d16505f5 100644 --- a/configs/config_calculator.yml +++ b/configs/config_calculator.yml @@ -1,26 +1,9 @@ -trainer: matdeeplearn.trainers.PropertyTrainer - task: - run_mode: train - identifier: my_train_job - parallel: False - # If seed is not set, then it will be random every time - seed: 12345678 - # Defaults to run directory if not specified - save_dir: - # continue from a previous job - continue_job: False - # spefcify if the training state is loaded: epochs, learning rate, etc - load_training_state: False # Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file. - checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt - # E.g. ["train", "val", "test"] - write_output: [train, val, test] - # Specify if labels are provided for the predict task - # labels: True - use_amp: True + checkpoint_path: ./checkpoints/cgcnn_checkpoint.pt model: + # Model used by the calculator name: CGCNN # model attributes dim1: 100 @@ -39,62 +22,11 @@ model: # Compute edge attributes on the fly in the model forward otf_edge_attr: True # Compute node attributes on the fly in the model forward - otf_node_attr: True + otf_node_attr: False # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True gradient: True -optim: - max_epochs: 40 - max_checkpoint_epochs: 0 - lr: 0.002 - # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper - loss: - loss_type: TorchLossWrapper - loss_args: {loss_fn: l1_loss} - # gradient clipping value - clip_grad_norm: 10 - batch_size: 100 - optimizer: - optimizer_type: AdamW - optimizer_args: {} - scheduler: - scheduler_type: ReduceLROnPlateau - scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002} - #Training print out frequency (print per n number of epochs) - verbosity: 5 - # tdqm progress bar per batch in the epoch - batch_tqdm: False - -dataset: - name: test_data - # Whether the data has already been processed and a data.pt file is present from a previous run - processed: False - # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path} - src: data/force_data/data.json - # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file - # Example: target_path: "data/raw_graph_scalar/targets.csv" - target_path: - # Path to save processed data.pt file - pt_path: data/force_data/ - # Either "node" or "graph" level - prediction_level: graph - - transforms: - - name: GetY - args: - # index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset - # For example, an index: 0 (default) will use the first entry in the target vector - # if all values are to be predicted simultaneously, then specify index: -1 - index: -1 - otf_transform: True # Optional parameter, default is True - # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html) - data_format: json - # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress] - additional_attributes: - # Print out processing info - verbose: True - # Index of target column in targets.csv - # graph specific settings +dataset: preprocess_params: # one of mdl (minimum image convention), ocp (all neighbors included) edge_calc_method: ocp @@ -118,13 +50,4 @@ dataset: self_loop: True # Method of obtaining atom dictionary: available: (onehot) node_representation: onehot - all_neighbors: True - - # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html - num_workers: 0 - # Where the dataset is loaded; either "cpu" or "cuda" - dataset_device: cpu - # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data) - train_ratio: 0.9 - val_ratio: 0.05 - test_ratio: 0.05 + all_neighbors: True diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py index c25047c3..16b86ddd 100644 --- a/matdeeplearn/common/ase_utils.py +++ b/matdeeplearn/common/ase_utils.py @@ -1,7 +1,7 @@ import torch import numpy as np import yaml -from ase import Atoms +from ase import Atoms, units from ase.geometry import Cell from ase.calculators.calculator import Calculator from matdeeplearn.preprocessor.helpers import generate_node_features @@ -10,6 +10,16 @@ import logging from typing import List from matdeeplearn.common.registry import registry +from ase.io.trajectory import Trajectory +from ase.optimize import FIRE +from ase.constraints import ExpCellFilter +from ase.io import read, write +from ase.md.langevin import Langevin +from ase.md.verlet import VelocityVerlet +from ase.md.npt import NPT +from ase.md.velocitydistribution import MaxwellBoltzmannDistribution +import os +from time import time logging.basicConfig(level=logging.INFO) @@ -39,21 +49,9 @@ def __init__(self, config): self.otf_node_attr = config["model"].get("otf_node_attr", False) assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True." - trainer_name = config.get("trainer", "matdeeplearn.trainers.PropertyTrainer") - assert trainer_name.count(".") >= 1, "Trainer name should be in format {module}.{trainer_name}, like matdeeplearn.trainers.PropertyTrainer" - - trainer_cls = registry.get_trainer_class(trainer_name) - load_state = config['task'].get('checkpoint_path', None) - assert trainer_cls is not None, "Trainer not found" - self.trainer = trainer_cls.from_config(config) - - try: - self.trainer.load_checkpoint() - except ValueError: - logging.warning("No checkpoint.pt file is found, and an untrained model is used for prediction.") - + self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' + self.model = MDLCalculator._load_model(config, self.device) self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250) - self.device = 'cpu' def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None): """ @@ -87,11 +85,13 @@ def calculate(self, atoms: Atoms, properties=implemented_properties, system_chan data_list = [data] loader = DataLoader(data_list, batch_size=1) - - out = self.trainer.predict_by_calculator(loader) - self.results['energy'] = out['energy'] - self.results['forces'] = out['forces'] - self.results['stress'] = out['stress'] + loader_iter = iter(loader) + batch = next(loader_iter).to(self.device) + out = self.model(batch) + + self.results['energy'] = out['output'].detach().cpu().numpy() + self.results['forces'] = out['pos_grad'].detach().cpu().numpy() + self.results['stress'] = out['cell_grad'].squeeze().detach().cpu().numpy() @staticmethod def data_to_atoms_list(data: Data) -> List[Atoms]: @@ -120,3 +120,153 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: for i in range(len(data.structure_id)): atoms_list[i].structure_id = data.structure_id[i][0] return atoms_list + + @staticmethod + def _load_model(config, rank): + """Loads the model if from a config file.""" + + graph_config = config['dataset']['preprocess_params'] + model_config = config['model'] + node_dim = graph_config["node_dim"] + edge_dim = graph_config["edge_dim"] + + model_name = 'matdeeplearn.models.' + model_config["name"] + logging.info(f'MDLCalculator: setting up {model_name} for calculation') + model_cls = registry.get_model_class(model_name) + model = model_cls( + node_dim=node_dim, + edge_dim=edge_dim, + output_dim=1, + cutoff_radius=graph_config["cutoff_radius"], + n_neighbors=graph_config["n_neighbors"], + graph_method=graph_config["edge_calc_method"], + num_offsets=graph_config["num_offsets"], + **model_config + ) + model = model.to(rank) + checkpoint = torch.load(config['task']["checkpoint_path"]) + + try: + model.load_state_dict(checkpoint["state_dict"]) + logging.info(f'MDLCalculator: model weights loaded from {config["task"]["checkpoint_path"]}') + except ValueError: + logging.warning("MDLCalculator: No checkpoint.pt file is found, and an untrained model is used for prediction.") + + return model + + +class StructureOptimizer: + def __init__(self, + calculator, + relax_cell: bool = False, + ): + self.calculator = calculator + self.relax_cell = relax_cell + + def optimize(self, atoms: Atoms, logfile=None, write_traj_name=None): + atoms.calc = self.calculator + if self.relax_cell: + atoms = ExpCellFilter(atoms) + + optimizer = FIRE(atoms, logfile=logfile) + + if write_traj_name is not None: + traj = Trajectory(write_traj_name + '.traj', 'w', atoms) + optimizer.attach(traj.write, interval=1) + + start_time = time() + optimizer.run(fmax=0.001, steps=500) + end_time = time() + num_steps = optimizer.get_number_of_steps() + + if isinstance(atoms, ExpCellFilter): + atoms = atoms.atoms + time_per_step = (end_time - start_time) / num_steps if num_steps != 0 else 0 + return atoms, time_per_step + + @staticmethod + def save_results(original, optimized, structure_ids, file_path, header=None): + if header == None: + header = "strucure_id, original_pos, original_pos, original_pos, optimized_pos, optimized_pos, optimized_pos" + for i in range(len(original)): + with open(file_path, 'a') as f: + if i == 0: + f.write(header + '\n') + n = len(original[i]) if isinstance(original, list) else 1 + ids = np.array([structure_ids[i]] * n).reshape(-1, 1) + data = np.hstack((ids, original[i], optimized[i])) + np.savetxt(f, data, fmt='%s', delimiter=', ') + + +class MDSimulator: + def __init__(self, + simulation_constant_type: str, + timestep: float, + temperature: float, + calculator: Calculator + ): + self.simulation = simulation_constant_type + self.timestep = timestep + self.temperature = temperature + self.calculator = calculator + self.results = [] + + def run_simulation(self, + atoms: Atoms, + num_steps: int, + log_console: int = 0, + save_energy: bool = False, + **kwargs): + atoms.set_calculator(self.calculator) + MaxwellBoltzmannDistribution(atoms, temperature_K=self.temperature) + + common_valid_params = ['trajectory', 'logfile', 'loginterval', 'append_trajectory', 'temperature_K'] + valid_params = common_valid_params + + if self.simulation == 'NVE': + valid_params.append('dt') + simulation_class = VelocityVerlet + elif self.simulation == 'NVT': + valid_params += ['friction', 'fixcm', 'communicator', 'rng'] + kwargs.setdefault('friction', 1e-3) + kwargs.setdefault('temperature_K', self.temperature) + simulation_class = Langevin + elif self.simulation == 'NPT': + valid_params += ['externalstress', 'ttime', 'pfactor', 'mask'] + kwargs.setdefault('externalstress', 0.01623) + kwargs.setdefault('pfactor', 0.6) + kwargs.setdefault('temperature_K', self.temperature) + simulation_class = NPT + else: + raise NotImplementedError("Currently unimplemented simulation type") + + invalid_params = [key for key in kwargs.keys() if key not in valid_params] + if invalid_params: + raise ValueError(f"Invalid parameter(s): {', '.join(invalid_params)}") + + dyn = simulation_class(atoms, timestep=self.timestep * units.fs, **kwargs) + time_ps = 0 + + if log_console: + def printenergy(a=atoms): + epot = a.get_potential_energy() + ekin = a.get_kinetic_energy() + temperature = 2 * ekin / (3 * len(a) * units.kB) + if save_energy: + self.results.append({'time': time_ps, 'Etot': epot + ekin, + 'Epot': epot, 'Ekin': ekin, 'T': temperature}) + print('Energy of system: Epot = %.3feV Ekin = %.3feV (T=%3.0fK) ' + 'Etot = %.3feV' % (epot, ekin, temperature, epot + ekin)) + time_ps += 0.005 + dyn.attach(printenergy, interval=log_console) + + start = time() + dyn.run(steps=num_steps) + end = time() + print(f"Time: {end-start:.4f}") + + @staticmethod + def traj_to_xdatcar(traj_file): + traj = read(traj_file, index=':') + file_name, _ = os.path.splitext(traj_file) + write(file_name + '.XDATCAR', traj) \ No newline at end of file diff --git a/matdeeplearn/models/__init__.py b/matdeeplearn/models/__init__.py index 4ea6de74..f229e758 100644 --- a/matdeeplearn/models/__init__.py +++ b/matdeeplearn/models/__init__.py @@ -1,4 +1,4 @@ -__all__ = ["BaseModel"] +__all__ = ["BaseModel", 'CGCNN'] from .base_model import BaseModel - +from .cgcnn import CGCNN diff --git a/matdeeplearn/trainers/property_trainer.py b/matdeeplearn/trainers/property_trainer.py index 3fd71839..c81a8d8a 100644 --- a/matdeeplearn/trainers/property_trainer.py +++ b/matdeeplearn/trainers/property_trainer.py @@ -335,30 +335,6 @@ def predict(self, loader, split, results_dir="train_results", write_output=True, torch.cuda.empty_cache() return predictions - - def predict_by_calculator(self, loader): - self.model.eval() - - assert isinstance(loader, torch.utils.data.dataloader.DataLoader) - assert len(loader) == 1, f"Predicting by calculator only allows one structure at a time, but got {len(loader)} structures." - - if str(self.rank) not in ("cpu", "cuda"): - loader = get_dataloader( - loader.dataset, batch_size=loader.batch_size, sampler=None - ) - - results = [] - loader_iter = iter(loader) - for i in range(0, len(loader_iter)): - batch = next(loader_iter).to(self.rank) - out = self._forward(batch.to(self.rank)) - - energy = None if out.get('output') is None else out.get('output').data.cpu().numpy() - stress = None if out.get('cell_grad') is None else out.get('cell_grad').view(-1, 3).data.cpu().numpy() - forces = None if out.get('pos_grad') is None else out.get('pos_grad').data.cpu().numpy() - - results = {'energy': energy, 'stress': stress, 'forces': forces} - return results def _forward(self, batch_data): output = self.model(batch_data) diff --git a/scripts/optimization.py b/scripts/optimization.py new file mode 100644 index 00000000..a12f61b6 --- /dev/null +++ b/scripts/optimization.py @@ -0,0 +1,201 @@ +from ase import Atoms +from matdeeplearn.common.ase_utils import MDLCalculator, StructureOptimizer +import numpy as np +from time import time +import numpy as np +from tqdm import tqdm +import json +from ase.geometry import Cell +import copy +import os +from scipy.stats import norm +from pymatgen.optimization.neighbors import find_points_in_spheres + + +def get_rdf(structure: Atoms, σ = 0.05, dr = 0.01, max_r = 12.0): + rmax = max_r + 3 * σ + dr + rs = np.arange(0.5, rmax + dr, dr) + nr = len(rs) - 1 + natoms = len(structure) + + normalization = 4 / structure.get_cell().volume * np.pi + normalization *= (natoms * rs[0:-1]) ** 2 + + rdf = np.zeros(nr, dtype = int) + lattice_matrix = np.array(structure.get_cell(), dtype=float) + cart_coords = np.array(structure.get_positions(), dtype=float) + + for i in range(natoms): + rdf += np.histogram(find_points_in_spheres( + all_coords = cart_coords, + center_coords = np.array([cart_coords[i]], dtype=float), + r = rmax, + pbc = np.array([1, 1, 1], dtype=int), + lattice = lattice_matrix, + tol = 1e-8, + )[3], rs)[0] + + return np.convolve(rdf / normalization, + norm.pdf(np.arange(-3 * σ, 3 * σ + dr, dr), 0.0, σ), + mode="same")[0:(nr - int((3 * σ) / dr) - 1)] + +def rdf_mse(rdf1, rdf2): + if isinstance(rdf1, list): + rdf1 = np.array(rdf1) + if isinstance(rdf2, list): + rdf2 = np.array(rdf2) + return np.mean((rdf1 - rdf2) ** 2) + +def wright_factor(rdf, rdf_ref): + return np.sum((rdf - rdf_ref) ** 2) / np.sum(rdf_ref ** 2) + +def cartesian_distance(original, optimized): + original, optimized = np.array(original), np.array(optimized) + dist = np.sqrt(np.sum((original - optimized) ** 2, axis=1)) + return np.mean(dist) + +def build_atoms_list(num=-1): + with open('./data/test_data/optimize_data/data.json', 'r') as f: + data = json.load(f) + total = num if num != -1 else len(data) + unrelaxed = [] + dft_unrelaxed = [] + dft_unrelaxed_energy = [] + relaxed = [] + unrelaxed_energy = [] + relaxed_energy = [] + ids = [] + for structure in data[:total]: + if 'type' not in structure.keys(): + continue + + # unrelaxed structures: need its unrelaxed and ground truth relaxed information + if structure['type'] == 'unrelaxed_in_test': + unrelaxed_atoms = Atoms(symbols=structure['atomic_numbers'], + positions=structure['unrelaxed_positions'], + cell=Cell(structure['unrelaxed_cell'])) + dft_atoms = Atoms(symbols=structure['atomic_numbers'], + positions=structure['relaxed_positions'], + cell=Cell(structure['relaxed_cell'])) + + unrelaxed_atoms.structure_id = structure['structure_id'] + dft_atoms.structure_id = structure['structure_id'] + unrelaxed.append(unrelaxed_atoms) + dft_unrelaxed.append(dft_atoms) + unrelaxed_energy.append(structure['unrelaxed_energy']) + dft_unrelaxed_energy.append(structure['relaxed_energy']) + + # relaxed structures: only need its ground truth relaxed information + if structure['type'] == 'relaxed_in_test': + relaxed_atoms = Atoms(symbols=structure['atomic_numbers'], + positions=structure['relaxed_positions'], + cell=Cell(structure['relaxed_cell'])) + relaxed_atoms.structure_id = structure['structure_id'] + relaxed.append(relaxed_atoms) + relaxed_energy.append(structure['relaxed_energy']) + + ids.append(structure['structure_id']) + return ids, unrelaxed, relaxed, dft_unrelaxed,\ + unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy + + +def evaluate(task, original_atoms, optimized_atoms, times, dft_relaxed=None, + save=False, optim=None, folder=None, filename=None): + original_energy = [atoms.get_potential_energy() for atoms in original_atoms] + optimized_energy = [atoms.get_potential_energy() for atoms in optimized_atoms] + n_atoms = [len(atoms.get_atomic_numbers()) for atoms in optimized_atoms] + original_pos = [atoms.get_positions() for atoms in original_atoms] + optimized_pos = [atoms.get_positions() for atoms in optimized_atoms] + original_cell = [atoms.get_cell() for atoms in original_atoms] + optimized_cell = [atoms.get_cell() for atoms in optimized_atoms] + + result = None + + e_drop = np.mean((np.array(original_energy) - np.array(optimized_energy)) / n_atoms) + + if task == 'relaxed': + e_drop = np.mean((np.array(original_energy) - np.array(optimized_energy)) / n_atoms) + pos_delta = np.mean([cartesian_distance(pos1, pos2) for pos1, pos2 in zip(original_pos, optimized_pos)]) + cell_delta = np.mean([cartesian_distance(c1, c2) for c1, c2 in zip(original_cell, optimized_cell)]) + + rdf_optimized = [get_rdf(atoms) for atoms in optimized_atoms] + rdf_original = [get_rdf(atoms) for atoms in original_atoms] + rdf_compared_to_dft = np.mean([wright_factor(x, y) for x, y in zip(rdf_optimized, rdf_original)]) + + result = { + 'Average energy drop per atom': e_drop, + 'Average change in atoms cartesian positions': pos_delta, + 'Average change in cell parameters': cell_delta, + 'MSE for RDF': rdf_mse(rdf_original, rdf_optimized), + 'Wright Factor': rdf_compared_to_dft, + 'Averge time per step': np.mean(times) + } + elif task == 'unrelaxed': + assert dft_relaxed is not None + dft_pos = [atoms.get_positions() for atoms in dft_relaxed] + pos_compared_to_dft = np.mean([cartesian_distance(pos1, pos2) for pos1, pos2 in zip(optimized_pos, dft_pos)]) + + dft_cell = [atoms.get_cell()[:] for atoms in dft_relaxed] + cell_compared_to_dft = np.mean([cartesian_distance(c1, c2) for c1, c2 in zip(optimized_cell, dft_cell)]) + + rdf_optimized = [get_rdf(atoms) for atoms in optimized_atoms] + rdf_dft = [get_rdf(atoms) for atoms in dft_relaxed] + rdf_compared_to_dft = np.mean([wright_factor(x, y) for x, y in zip(rdf_dft, rdf_optimized)]) + + result = { + 'Average energy drop per atom': e_drop, + 'Average cartesian distance for atom positions compared to dft': pos_compared_to_dft, + 'Average cartesian distance for cell positions compared to dft': cell_compared_to_dft, + 'MSE for RDF': rdf_mse(rdf_dft, rdf_optimized), + 'Wright Factor': rdf_compared_to_dft, + 'Averge time per step': np.mean(times) + } + else: + raise NotImplementedError("Not implemented evaluate type.") + + if optim is not None and save: + os.makedirs(folder, exist_ok=True) + optim.save_results(original_energy, optimized_energy, structure_ids, f"{folder}/{filename}_energy.csv", header="strucure_id, original_energy, optimized_energy") + optim.save_results(original_pos, optimized_pos, structure_ids, f"{folder}/{filename}_positions.csv", header="strucure_id, original_pos, original_pos, original_pos, optimized_pos, optimized_pos, optimized_pos") + optim.save_results(original_cell, optimized_cell, structure_ids, f"{folder}/{filename}_cell.csv", header="strucure_id, original_cell, original_cell, original_cell, optimized_cell, optimized_cell, optimized_cell") + return result + + +if __name__ == '__main__': + ''' + What you need to adjust: + - A data.json containing relaxed/unrelaxed version of structures (line 58) + - A config file containing information for calculator (line 174) + - Choose whether optimizing relaxed or unrelaxed structures (line 183 and 186) + - Folder to save results (line 197) + ''' + structure_ids, unrelaxed, relaxed, dft,\ + unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy = build_atoms_list(20) + + calc_str = './configs/config_calculator.yml' + # calc_str = './configs/calculator/config_calculator.yml' + print('Using calculator config from', calc_str) + calculator = MDLCalculator(config=calc_str) + + original_atoms, optimized_atoms = [], [] + + optim = StructureOptimizer(calculator, relax_cell=True) + start = time() + total_iterations = len(unrelaxed) + times = [] + + for idx, atoms in enumerate(tqdm(unrelaxed, total=total_iterations)): + atoms.set_calculator(calculator) + original_atoms.append(atoms) + to_optim = copy.deepcopy(atoms) + optimized, time_per_step = optim.optimize(to_optim) + times.append(time_per_step) + optimized_atoms.append(optimized) + end = time() + + print(f"Time elapsed: {end - start:.3f}") + result = evaluate('unrelaxed', original_atoms, optimized_atoms, times, dft_relaxed=dft, + save=False, optim=optim, folder='./train_outs/cgcnn_lj_unrelax', filename='unrelax') + + for key in result.keys(): + print(f"{key}: {result[key]:.4f}") \ No newline at end of file From b14714cf76fb545dba65957e402bc0f37f4bad4e Mon Sep 17 00:00:00 2001 From: Qianyu Zheng Date: Sat, 25 Nov 2023 10:07:13 -0800 Subject: [PATCH 2/6] Update optimization --- matdeeplearn/common/ase_utils.py | 129 ++++++++++++++++++++++++++----- scripts/optimization.py | 39 ++++++---- 2 files changed, 131 insertions(+), 37 deletions(-) diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py index 16b86ddd..70a42a11 100644 --- a/matdeeplearn/common/ase_utils.py +++ b/matdeeplearn/common/ase_utils.py @@ -5,10 +5,11 @@ from ase.geometry import Cell from ase.calculators.calculator import Calculator from matdeeplearn.preprocessor.helpers import generate_node_features +from matdeeplearn.models.base_model import BaseModel from torch_geometric.data.data import Data from torch_geometric.loader import DataLoader import logging -from typing import List +from typing import List, Tuple from matdeeplearn.common.registry import registry from ase.io.trajectory import Trajectory from ase.optimize import FIRE @@ -26,22 +27,32 @@ class MDLCalculator(Calculator): + """ + A neural networked based Calculator that calculates the energy, forces and stress of a crystal structure. + """ implemented_properties = ["energy", "forces", "stress"] - def __init__(self, config): + def __init__(self, config, rank='cuda:0'): """ Initialize the MDLCalculator instance. Args: - config (str or dict): Configuration settings for the MDLCalculator. + - config (str or dict): Configuration settings for the MDLCalculator. + - rank (str): Rank of device the calculator calculates properties. Defaults to 'cuda:0' Raises: - AssertionError: If the trainer name is not in the correct format or if the trainer class is not found. + - AssertionError: If the trainer name is not in the correct format or if the trainer class is not found. """ Calculator.__init__(self) + if isinstance(config, str): + logging.info(f'MDLCalculator instantiated from config: {config}') with open(config, "r") as yaml_file: config = yaml.safe_load(yaml_file) + elif isinstance(config, dict): + logging.info('MDLCalculator instantiated from a dictionary.') + else: + raise NotImplementedError('Unsupported config type.') gradient = config["model"].get("gradient", False) otf_edge_index = config["model"].get("otf_edge_index", False) @@ -49,24 +60,24 @@ def __init__(self, config): self.otf_node_attr = config["model"].get("otf_node_attr", False) assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True." - self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' + self.device = rank if torch.cuda.is_available() else 'cpu' self.model = MDLCalculator._load_model(config, self.device) self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250) - def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None): + def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None) -> None: """ Calculate energy, forces, and stress for a given ase.Atoms object. Args: - atoms (ase.Atoms): The atomic structure for which calculations are to be performed. - properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress']. - system_changes: Not supported in the current implementation. + - atoms (ase.Atoms): The atomic structure for which calculations are to be performed. + - properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress']. + - system_changes: Not supported in the current implementation. Returns: - None: The results are stored in the instance variable 'self.results'. + - None: The results are stored in the instance variable 'self.results'. Note: - This method performs energy, forces, and stress calculations using a neural network-based calculator. + - This method performs energy, forces, and stress calculations using a neural network-based calculator. The results are stored in the instance variable 'self.results' as 'energy', 'forces', and 'stress'. """ Calculator.calculate(self, atoms, properties, system_changes) @@ -101,11 +112,11 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: with its associated properties such as positions and cell. Args: - data (Data): A data object containing information about atomic structures. + - data (Data): A data object containing information about atomic structures. Returns: - List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure - with positions and associated properties. + - List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure + with positions and associated properties. """ cells = data.cell.numpy() @@ -122,8 +133,20 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: return atoms_list @staticmethod - def _load_model(config, rank): - """Loads the model if from a config file.""" + def _load_model(config: dict, rank: str) -> BaseModel: + """ + This static method loads a machine learning model based on the provided configuration. + + Parameters: + - config (dict): Configuration dictionary containing model and dataset parameters. + - rank: Rank information for distributed training. + + Returns: + - model: Loaded model for further calculations. + + Raises: + - ValueError: If the 'checkpoint.pt' file is not found, the method issues a warning and uses an untrained model for prediction. + """ graph_config = config['dataset']['preprocess_params'] model_config = config['model'] @@ -156,14 +179,37 @@ def _load_model(config, rank): class StructureOptimizer: + """ + This class provides functionality to optimize the structure of an Atoms object using a specified calculator. + """ def __init__(self, calculator, relax_cell: bool = False, ): + """ + Initialize the StructureOptimizer. + + Parameters: + - calculator (Calculator): A calculator object for performing energy and force calculations. + - relax_cell (bool): If True, the cell will be relaxed in addition to positions during the optimization process. + """ self.calculator = calculator self.relax_cell = relax_cell - def optimize(self, atoms: Atoms, logfile=None, write_traj_name=None): + def optimize(self, atoms: Atoms, logfile=None, write_traj_name=None) -> Tuple[Atoms, float]: + """ + This method optimizes the structure of the given Atoms object using the specified calculator. + If `relax_cell` is True, the cell will be relaxed. Trajectory information can be written to a file. + + Parameters: + - atoms: An Atoms object representing the structure to be optimized. + - logfile: File to write optimization log information. + - write_traj_name: File to write trajectory of the optimization. + + Returns: + - atoms: The optimized Atoms object. + - time_per_step: The average time taken per optimization step. + """ atoms.calc = self.calculator if self.relax_cell: atoms = ExpCellFilter(atoms) @@ -185,7 +231,17 @@ def optimize(self, atoms: Atoms, logfile=None, write_traj_name=None): return atoms, time_per_step @staticmethod - def save_results(original, optimized, structure_ids, file_path, header=None): + def save_results(original, optimized, structure_ids, file_path, header=None) -> None: + """ + This static method saves the original and optimized atomic positions along with structure IDs to a file. + + Parameters: + - original: List of original atomic positions or a single set of positions. + - optimized: List of optimized atomic positions or a single set of positions. + - structure_ids: List of structure IDs corresponding to each set of positions. + - file_path: Path to the file where results will be saved. + - header: Optional header for the saved file. + """ if header == None: header = "strucure_id, original_pos, original_pos, original_pos, optimized_pos, optimized_pos, optimized_pos" for i in range(len(original)): @@ -199,12 +255,25 @@ def save_results(original, optimized, structure_ids, file_path, header=None): class MDSimulator: + """ + This class provides a simple interface for running molecular dynamics simulations. + It currently supports microcanonical ('NVE'), canonical ('NVT'), or isothermal-isobaric ('NPT') simulations. + """ def __init__(self, simulation_constant_type: str, timestep: float, temperature: float, calculator: Calculator ): + """ + Initialize a Molecular Dynamics Simulator. + + Parameters: + - simulation_constant_type (str): Type of molecular dynamics simulation ('NVE', 'NVT', 'NPT'). + - timestep (float): Time step for the simulation in femtoseconds. + - temperature (float): Initial temperature for the simulation in Kelvin. + - calculator (Calculator): A calculator object for energy and force calculations. + """ self.simulation = simulation_constant_type self.timestep = timestep self.temperature = temperature @@ -216,7 +285,18 @@ def run_simulation(self, num_steps: int, log_console: int = 0, save_energy: bool = False, - **kwargs): + **kwargs) -> None: + """ + This method runs a molecular dynamics simulation for the specified number of steps using the chosen ensemble. + It currently supports the 'NVE', 'NVT', and 'NPT' ensembles. Energy information can be logged to the console and saved. + + Parameters: + - atoms (Atoms): An Atoms object representing the molecular system. + - num_steps (int): Number of simulation steps to run. + - log_console (int): Interval for logging energy information to the console (0 for no logging). + - save_energy (bool): If True, save energy information during the simulation. + - **kwargs: Additional parameters specific to the chosen simulation ensemble. + """ atoms.set_calculator(self.calculator) MaxwellBoltzmannDistribution(atoms, temperature_K=self.temperature) @@ -266,7 +346,16 @@ def printenergy(a=atoms): print(f"Time: {end-start:.4f}") @staticmethod - def traj_to_xdatcar(traj_file): + def traj_to_xdatcar(traj_file) -> None: + """ + This static method reads a trajectory file and saves it in the XDATCAR format. + + Parameters: + - traj_file (str): Path to the trajectory file. + + Returns: + - None + """ traj = read(traj_file, index=':') file_name, _ = os.path.splitext(traj_file) write(file_name + '.XDATCAR', traj) \ No newline at end of file diff --git a/scripts/optimization.py b/scripts/optimization.py index a12f61b6..a27d8b72 100644 --- a/scripts/optimization.py +++ b/scripts/optimization.py @@ -3,7 +3,7 @@ import numpy as np from time import time import numpy as np -from tqdm import tqdm +import logging import json from ase.geometry import Cell import copy @@ -11,6 +11,8 @@ from scipy.stats import norm from pymatgen.optimization.neighbors import find_points_in_spheres +logging.basicConfig(level=logging.INFO) + def get_rdf(structure: Atoms, σ = 0.05, dr = 0.01, max_r = 12.0): rmax = max_r + 3 * σ + dr @@ -55,7 +57,7 @@ def cartesian_distance(original, optimized): return np.mean(dist) def build_atoms_list(num=-1): - with open('./data/test_data/optimize_data/data.json', 'r') as f: + with open('./data/torchmd_test_data/data.json', 'r') as f: data = json.load(f) total = num if num != -1 else len(data) unrelaxed = [] @@ -162,40 +164,43 @@ def evaluate(task, original_atoms, optimized_atoms, times, dft_relaxed=None, if __name__ == '__main__': - ''' - What you need to adjust: - - A data.json containing relaxed/unrelaxed version of structures (line 58) - - A config file containing information for calculator (line 174) - - Choose whether optimizing relaxed or unrelaxed structures (line 183 and 186) - - Folder to save results (line 197) - ''' structure_ids, unrelaxed, relaxed, dft,\ - unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy = build_atoms_list(20) + unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy = build_atoms_list() + + # Configurations below + calc_str = './configs/calculator/config_cgcnn_lj.yml' + + save = True + folder = './train_outs/unrelaxed_data' + optim_type = 'unrelax' + # Configurations above - calc_str = './configs/config_calculator.yml' - # calc_str = './configs/calculator/config_calculator.yml' - print('Using calculator config from', calc_str) calculator = MDLCalculator(config=calc_str) + + logging.info(f"Saving optimizaion results for {optim_type}ed structures to: {folder}") original_atoms, optimized_atoms = [], [] optim = StructureOptimizer(calculator, relax_cell=True) start = time() - total_iterations = len(unrelaxed) times = [] + + to_optim = relaxed if optim_type == 'relax' else unrelaxed - for idx, atoms in enumerate(tqdm(unrelaxed, total=total_iterations)): + for idx, atoms in enumerate(to_optim): atoms.set_calculator(calculator) original_atoms.append(atoms) to_optim = copy.deepcopy(atoms) optimized, time_per_step = optim.optimize(to_optim) times.append(time_per_step) optimized_atoms.append(optimized) + if (idx + 1) % 20 == 0: + logging.info(f"Completed optimizing {idx + 1} structures.") end = time() print(f"Time elapsed: {end - start:.3f}") - result = evaluate('unrelaxed', original_atoms, optimized_atoms, times, dft_relaxed=dft, - save=False, optim=optim, folder='./train_outs/cgcnn_lj_unrelax', filename='unrelax') + result = evaluate(optim_type, original_atoms, optimized_atoms, times, dft_relaxed=dft, + save=True, optim=optim, folder=folder, filename=optim_type) for key in result.keys(): print(f"{key}: {result[key]:.4f}") \ No newline at end of file From 394f9b14f2c519319d064b30e3adba91b0a69d25 Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Mon, 8 Jan 2024 16:17:22 -0800 Subject: [PATCH 3/6] Fixed merging conficts --- matdeeplearn/trainers/base_trainer.py | 689 ++++++++++++++++++++++ matdeeplearn/trainers/property_trainer.py | 541 +++++++++++++++++ 2 files changed, 1230 insertions(+) create mode 100644 matdeeplearn/trainers/base_trainer.py create mode 100644 matdeeplearn/trainers/property_trainer.py diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py new file mode 100644 index 00000000..318d31b1 --- /dev/null +++ b/matdeeplearn/trainers/base_trainer.py @@ -0,0 +1,689 @@ +import copy +import csv +import logging +import os +import random +from abc import ABC, abstractmethod +from datetime import datetime + +import numpy as np +import torch +import torch.optim as optim +from torch import distributed as dist +from torch import nn +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel +from torch.optim import Optimizer +from torch.utils.data.distributed import DistributedSampler +from torch_geometric.data import Dataset + +from matdeeplearn.common.data import (DataLoader, dataset_split, + get_dataloader, get_dataset) +from matdeeplearn.common.registry import registry +from matdeeplearn.models.base_model import BaseModel +from matdeeplearn.modules.evaluator import Evaluator +from matdeeplearn.modules.scheduler import LRScheduler + + +@registry.register_trainer("base") +class BaseTrainer(ABC): + def __init__( + self, + model: BaseModel, + dataset: Dataset, + optimizer: Optimizer, + sampler: DistributedSampler, + scheduler: LRScheduler, + data_loader: DataLoader, + loss: nn.Module, + max_epochs: int, + clip_grad_norm: float = None, + max_checkpoint_epochs: int = None, + identifier: str = None, + verbosity: int = None, + batch_tqdm: bool = False, + write_output: list = ["train", "val", "test"], + output_frequency: int = 1, + model_save_frequency: int = 1, + save_dir: str = None, + checkpoint_path: str = None, + use_amp: bool = False, + ): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = model + self.dataset = dataset + self.optimizer = optimizer + self.train_sampler = sampler + self.data_loader = data_loader + self.scheduler = scheduler + self.loss_fn = loss + self.max_epochs = max_epochs + self.clip_grad_norm = clip_grad_norm + self.max_checkpoint_epochs = max_checkpoint_epochs + self.train_verbosity = verbosity + self.batch_tqdm = batch_tqdm + self.write_output = write_output + self.output_frequency = output_frequency + self.model_save_frequency = model_save_frequency + + self.epoch = 0 + self.step = 0 + self.epoch_time = None + + self.metrics = [{} for _ in range(len(self.model))] + self.best_metric = [1e10 for _ in range(len(self.model))] + self.best_model_state = [None for _ in range(len(self.model))] + + self.save_dir = save_dir if save_dir else os.getcwd() + self.checkpoint_path = checkpoint_path + self.use_amp = use_amp + + if self.use_amp: + logging.info("Using PyTorch automatic mixed-precision") + + self.scaler = GradScaler(enabled=self.use_amp and self.device.type == "cuda") + + self.evaluator = Evaluator() + + if self.train_sampler == None: + self.rank = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + self.rank = self.train_sampler.rank + + timestamp = datetime.now().timestamp() + self.timestamp_id = datetime.fromtimestamp(timestamp).strftime( + "%Y-%m-%d-%H-%M-%S-%f" + )[:-3] + if identifier: + self.timestamp_id = f"{self.timestamp_id}-{identifier}" + + if self.train_verbosity: + logging.info( + f"GPU is available: {torch.cuda.is_available()}, Quantity: {os.environ.get('LOCAL_WORLD_SIZE', None)}" + ) + logging.info("Dataset(s) used:") + for key in self.dataset: + logging.info(f"Dataset length: {key, len(self.dataset[key])}") + if self.dataset.get("train"): + logging.debug(self.dataset["train"][0]) + logging.debug(self.dataset["train"][0].z[0]) + logging.debug(self.dataset["train"][0].y[0]) + else: + logging.debug(self.dataset[list(self.dataset.keys())[0]][0]) + logging.debug(self.dataset[list(self.dataset.keys())[0]][0].x[0]) + logging.debug(self.dataset[list(self.dataset.keys())[0]][0].y[0]) + + if str(self.rank) not in ("cpu", "cuda"): + logging.debug(self.model[0].module) + else: + logging.debug(self.model[0]) + + @classmethod + def from_config(cls, config): + """Class method used to initialize BaseTrainer from a config object + config has the following sections: + trainer + task + model + optim + scheduler + dataset + """ + + cls.set_seed(config["task"].get("seed")) + + if config["task"]["parallel"] == True: + # os.environ["MASTER_ADDR"] = "localhost" + # os.environ["MASTER_PORT"] = "12355" + local_world_size = os.environ.get("LOCAL_WORLD_SIZE", None) + local_world_size = int(local_world_size) + dist.init_process_group( + "nccl", world_size=local_world_size, init_method="env://" + ) + rank = int(dist.get_rank()) + else: + rank = torch.device("cuda" if torch.cuda.is_available() else "cpu") + local_world_size = 1 + dataset = cls._load_dataset(config["dataset"], config["task"]["run_mode"]) + model = cls._load_model(config["model"], config["dataset"]["preprocess_params"], dataset, local_world_size, rank) + optimizer = cls._load_optimizer(config["optim"], model, local_world_size) + sampler = cls._load_sampler(config["optim"], dataset, local_world_size, rank) + data_loader = cls._load_dataloader( + config["optim"], + config["dataset"], + dataset, + sampler, + config["task"]["run_mode"], + config["model"] + ) + + scheduler = cls._load_scheduler(config["optim"]["scheduler"], optimizer) + loss = cls._load_loss(config["optim"]["loss"]) + max_epochs = config["optim"]["max_epochs"] + clip_grad_norm = config["optim"].get("clip_grad_norm", None) + verbosity = config["optim"].get("verbosity", None) + batch_tqdm = config["optim"].get("batch_tqdm", False) + write_output = config["task"].get("write_output", []) + output_frequency = config["task"].get("output_frequency", 0) + model_save_frequency = config["task"].get("model_save_frequency", 0) + max_checkpoint_epochs = config["optim"].get("max_checkpoint_epochs", None) + identifier = config["task"].get("identifier", None) + + # pass in custom results home dir and load in prev checkpoint dir + save_dir = config["task"].get("save_dir", None) + checkpoint_path = config["task"].get("checkpoint_path", None) + + if local_world_size > 1: + dist.barrier() + + return cls( + model=model, + dataset=dataset, + optimizer=optimizer, + sampler=sampler, + scheduler=scheduler, + data_loader=data_loader, + loss=loss, + max_epochs=max_epochs, + clip_grad_norm=clip_grad_norm, + max_checkpoint_epochs=max_checkpoint_epochs, + identifier=identifier, + verbosity=verbosity, + batch_tqdm=batch_tqdm, + write_output=write_output, + output_frequency=output_frequency, + model_save_frequency=model_save_frequency, + save_dir=save_dir, + checkpoint_path=checkpoint_path, + use_amp=config["task"].get("use_amp", False), + ) + + @staticmethod + def _load_dataset(dataset_config, task): + """Loads the dataset if from a config file.""" + if dataset_config.get("dataset_device", "cpu"): + logging.info("Loading dataset to "+dataset_config.get("dataset_device", "cpu")) + else: + logging.info("Loading dataset to default device") + + dataset_path = dataset_config["pt_path"] + dataset = {} + if isinstance(dataset_config["src"], dict): + if dataset_config["src"].get("train"): + dataset["train"] = get_dataset( + dataset_path, + processed_file_name="data_train.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + if dataset_config["src"].get("val"): + dataset["val"] = get_dataset( + dataset_path, + processed_file_name="data_val.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + if dataset_config["src"].get("test"): + dataset["test"] = get_dataset( + dataset_path, + processed_file_name="data_test.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + if dataset_config["src"].get("predict"): + dataset["predict"] = get_dataset( + dataset_path, + processed_file_name="data_predict.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + + else: + if task != "predict": + dataset_full = get_dataset( + dataset_path, + processed_file_name="data.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + train_ratio = dataset_config["train_ratio"] + val_ratio = dataset_config["val_ratio"] + test_ratio = dataset_config["test_ratio"] + dataset["train"], dataset["val"], dataset["test"] = dataset_split( + dataset_full, + train_ratio, + val_ratio, + test_ratio, + ) + else: + # if running in predict mode, then no data splitting is performed + dataset["predict"] = get_dataset( + dataset_path, + processed_file_name="data.pt", + transform_list=dataset_config.get("transforms", []), + dataset_device=dataset_config.get("dataset_device", "cpu"), + ) + + return dataset + + @staticmethod + def _load_model(model_config, graph_config, dataset, world_size, rank): + """Loads the model if from a config file.""" + + if dataset.get("train"): + dataset = dataset["train"] + else: + dataset = dataset[list(dataset.keys())[0]] + + if isinstance(dataset, torch.utils.data.Subset): + dataset = dataset.dataset + + model_list = [] + # Obtain node, edge, and output dimensions for model initialization + for mod in range(model_config["model_ensemble"]): + rand_seed = random.randint(1,10000) + random.seed(rand_seed) + np.random.seed(rand_seed) + torch.manual_seed(rand_seed) + torch.cuda.manual_seed_all(rand_seed) + #torch.autograd.set_detect_anomaly(True) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + if graph_config["node_dim"]: + node_dim = graph_config["node_dim"] + else: + node_dim = dataset.num_features + edge_dim = graph_config["edge_dim"] + if dataset[0]["y"].ndim == 0: + output_dim = 1 + else: + output_dim = dataset[0]["y"].shape[1] + + # Determine if this is a node or graph level model + if dataset[0]["y"].shape[0] == dataset[0]["z"].shape[0]: + model_config["prediction_level"] = "node" + elif dataset[0]["y"].shape[0] == 1: + model_config["prediction_level"] = "graph" + else: + raise ValueError( + "Target labels do not have the correct dimensions for node or graph-level prediction." + ) + + model_cls = registry.get_model_class(model_config["name"]) + model = model_cls( + node_dim=node_dim, + edge_dim=edge_dim, + output_dim=output_dim, + cutoff_radius=graph_config["cutoff_radius"], + n_neighbors=graph_config["n_neighbors"], + graph_method=graph_config["edge_calc_method"], + num_offsets=graph_config["num_offsets"], + **model_config + ) + model = model.to(rank) + + # model = torch_geometric.compile(model) + # if model_config["load_model"] == True: + # checkpoint = torch.load(model_config["model_path"]) + # model.load_state_dict(checkpoint["state_dict"]) + + if world_size > 1: + model = DistributedDataParallel( + model, device_ids=[rank], find_unused_parameters=False + ) + + model_list.append(model) + + return model_list + + @staticmethod + def _load_optimizer(optim_config, model, world_size): + # Some issues with DDP learning rate + # Unclear regarding the best practice + # Currently, effective batch size per epoch is batch_size * world_size + # Some discussions here: + # https://github.com/Lightning-AI/lightning/discussions/3706 + # https://discuss.pytorch.org/t/should-we-split-batch-size-according-to-ngpu-per-node-when-distributeddataparallel/72769/15 + optim_list = [] + for i in range(len(model)): + if world_size > 1: + optim_config["lr"] = optim_config["lr"] * world_size + + optimizer = getattr(optim, optim_config["optimizer"]["optimizer_type"])( + model[i].parameters(), + lr=optim_config["lr"], + **optim_config["optimizer"].get("optimizer_args", {}), + ) + optim_list.append(optimizer) + + return optim_list + + @staticmethod + def _load_sampler(optim_config, dataset, world_size, rank): + # TODO: write sampler, look into BalancedBatchSampler in + # OCP for their implementation of train_sampler batches + # (part of self.train_loader) + # TODO: update sampler with more attributes like rank and num_replicas (world_size) + if dataset.get("train"): + dataset = dataset["train"] + else: + dataset = dataset[list(dataset.keys())[0]] + + if world_size > 1: + sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank) + else: + sampler = None + + return sampler + + @staticmethod + def _load_dataloader(optim_config, dataset_config, dataset, sampler, run_mode, model_config): + data_loader = [{} for _ in range(model_config["model_ensemble"])] + + batch_size = optim_config.get("batch_size") + + for i in range(model_config["model_ensemble"]): + if dataset.get("train"): + data_loader[i]["train_loader"] = get_dataloader( + dataset["train"], batch_size=batch_size, num_workers=dataset_config.get("num_workers", 0), sampler=sampler + ) + if dataset.get("val"): + data_loader[i]["val_loader"] = get_dataloader( + dataset["val"], batch_size=batch_size, num_workers=dataset_config.get("num_workers", 0), sampler=None + ) + if dataset.get("test"): + data_loader[i]["test_loader"] = get_dataloader( + dataset["test"], batch_size=batch_size, num_workers=dataset_config.get("num_workers", 0), sampler=None + ) + if run_mode == "predict" and dataset.get("predict"): + data_loader[i]["predict_loader"] = get_dataloader( + dataset["predict"], batch_size=batch_size, num_workers=dataset_config.get("num_workers", 0), sampler=None + ) + + return data_loader + + @staticmethod + def _load_scheduler(scheduler_config, optimizer): + scheduler_type = scheduler_config["scheduler_type"] + scheduler_args = scheduler_config["scheduler_args"] + scheduler = [] + for i in range(len(optimizer)): + scheduler.append(LRScheduler(optimizer[i], scheduler_type, scheduler_args)) + + return scheduler + + @staticmethod + def _load_loss(loss_config): + """Loads the loss from either the TorchLossWrapper or custom loss functions in matdeeplearn""" + loss_cls = registry.get_loss_class(loss_config["loss_type"]) + # if there are other params for loss type, include in call + if loss_config.get("loss_args"): + return loss_cls(**loss_config["loss_args"]) + else: + return loss_cls() + + @abstractmethod + def _load_task(self): + """Initializes task-specific info. Implemented by derived classes.""" + + @abstractmethod + def train(self): + """Implemented by derived classes.""" + + @abstractmethod + def validate(self): + """Implemented by derived classes.""" + + @abstractmethod + def predict(self): + """Implemented by derived classes.""" + + def update_best_model(self, metric, index=None, write_model=False, write_csv=False): + """Updates the best val metric and model, saves the best model, and saves the best model predictions""" + self.best_metric[index] = metric[type(self.loss_fn).__name__]["metric"] + + if str(self.rank) not in ("cpu", "cuda"): + self.best_model_state[index] = copy.deepcopy(self.model[index].module.state_dict()) + else: + self.best_model_state[index] = copy.deepcopy(self.model[index].state_dict()) + + if write_model == True: + self.save_model("best_checkpoint.pt", index, metric, True) + + if write_csv == True: + logging.debug( + f"Saving prediction results for epoch {self.epoch} to: /results/{self.timestamp_id}/train_results/" + ) + + if "train" in self.write_output: + self.predict(self.data_loader[index]["train_loader"], "train") + if "val" in self.write_output and self.data_loader[index].get("val_loader"): + self.predict(self.data_loader[index]["val_loader"], "val") + if "test" in self.write_output and self.data_loader[index].get("test_loader"): + self.predict(self.data_loader[index]["test_loader"], "test") + + def save_model(self, checkpoint_file, index=None, metric=None, training_state=True): + """Saves the model state dict""" + if index != None: + if str(self.rank) not in ("cpu", "cuda"): + if training_state: + state = { + "epoch": self.epoch, + "step": self.step, + "state_dict": self.model[index].module.state_dict(), + "optimizer": self.optimizer[index].state_dict(), + "scheduler": self.scheduler[index].scheduler.state_dict(), + "scaler":self.scaler.state_dict(), + "best_metric": self.best_metric[index], + "identifier": self.timestamp_id, + "seed": torch.random.initial_seed(), + } + else: + state = {"state_dict": self.model[index].module.state_dict(), "metric": metric} + else: + if training_state: + state = { + "epoch": self.epoch, + "step": self.step, + "state_dict": self.model[index].state_dict(), + "optimizer": self.optimizer[index].state_dict(), + "scheduler": self.scheduler[index].scheduler.state_dict(), + "scaler":self.scaler.state_dict(), + "best_metric": self.best_metric[index], + "identifier": self.timestamp_id, + "seed": torch.random.initial_seed(), + } + else: + state = {"state_dict": self.model[index].state_dict(), "metric": metric} + + num = str(index) + curr_checkpt_dir = os.path.join( + self.save_dir, "results", self.timestamp_id, f"checkpoint_{num}" + ) + os.makedirs(curr_checkpt_dir, exist_ok=True) + filename = os.path.join(curr_checkpt_dir, checkpoint_file) + + torch.save(state, filename) + del state + else: + state = [] + for i in range(len(self.model)): + if str(self.rank) not in ("cpu", "cuda"): + if training_state: + state.append({ + "epoch": self.epoch, + "step": self.step, + "state_dict": self.model.module[i].state_dict(), + "optimizer": self.optimizer[i].state_dict(), + "scheduler": self.scheduler[i].scheduler.state_dict(), + "scaler":self.scaler.state_dict(), + "best_metric": self.best_metric[i], + "identifier": self.timestamp_id, + "seed": torch.random.initial_seed(), + }) + else: + state.append({"state_dict": self.model[i].module.state_dict(), "metric": metric[i]}) + else: + if training_state: + state.append({ + "epoch": self.epoch, + "step": self.step, + "state_dict": self.model[i].state_dict(), + "optimizer": self.optimizer[i].state_dict(), + "scheduler": self.scheduler[i].scheduler.state_dict(), + "scaler": self.scaler.state_dict(), + "best_metric": self.best_metric[i], + "identifier": self.timestamp_id, + "seed": torch.random.initial_seed(), + }) + else: + state.append({"state_dict": self.model[i].state_dict(), "metric": metric}) + + for x in range(len(self.model)): + num = str(x) + curr_checkpt_dir = os.path.join( + self.save_dir, "results", self.timestamp_id, f"checkpoint_{num}" + ) + os.makedirs(curr_checkpt_dir, exist_ok=True) + filename = os.path.join(curr_checkpt_dir, checkpoint_file) + + torch.save(state[x], filename) + del state + + return filename + + def save_results(self, output, results_dir, filename, node_level_predictions=False, labels=True, std=False): + results_path = os.path.join( + self.save_dir, "results", self.timestamp_id, results_dir + ) + os.makedirs(results_path, exist_ok=True) + filename = os.path.join(results_path, filename) + shape = output.shape + + id_headers = ["structure_id"] + if node_level_predictions: + id_headers += ["node_id"] + + if labels==True: + if std == True: + num_cols = (shape[1] - len(id_headers)) // 3 + headers = id_headers + ["target"] * num_cols + ["prediction"] * num_cols + ["std"] * num_cols + else: + num_cols = (shape[1] - len(id_headers)) // 2 + headers = id_headers + ["target"] * num_cols + ["prediction"] * num_cols + else: + if std == True: + num_cols = (shape[1] - len(id_headers)) // 2 + headers = id_headers + ["prediction"] * num_cols + ["std"] * num_cols + else: + num_cols = (shape[1] - len(id_headers)) + headers = id_headers + ["prediction"] * num_cols + + with open(filename, "w") as f: + csvwriter = csv.writer(f) + for i in range(0, len(output)+1): + if i == 0: + csvwriter.writerow(headers) + elif i > 0: + csvwriter.writerow(output[i - 1, :]) + return filename + + # TODO: streamline this from PR #12 + def load_checkpoint(self, load_training_state=True): + """Loads the model from a checkpoint.pt file""" + + if not self.checkpoint_path: + raise ValueError("No checkpoint directory specified in config.") + + # checkpoint_path = glob.glob(os.path.join(self.checkpoint_path, "results", "*")) + # checkpoint_file = os.path.join(checkpoint_path, "checkpoint", "checkpoint.pt") + + # Load params from checkpoint + self.checkpoint_path = self.checkpoint_path.split(",") + checkpoint = [torch.load(i) for i in self.checkpoint_path] + + for n, check in enumerate(checkpoint): + if str(self.rank) not in ("cpu", "cuda"): + self.model[n].module.load_state_dict(checkpoint[n]["state_dict"]) + self.best_model_state[n] = copy.deepcopy(self.model[n].module.state_dict()) + else: + self.model[n].load_state_dict(checkpoint[n]["state_dict"]) + self.best_model_state[n] = copy.deepcopy(self.model[n].state_dict()) + if load_training_state == True: + if checkpoint[n].get("optimizer"): + self.optimizer[n].load_state_dict(checkpoint[n]["optimizer"]) + if checkpoint[n].get("scheduler"): + self.scheduler[n].scheduler.load_state_dict(checkpoint[n]["scheduler"]) + self.scheduler[n].update_lr() + if checkpoint[n].get("epoch"): + self.epoch = checkpoint[n]["epoch"] + if checkpoint[n].get("step"): + self.step = checkpoint[n]["step"] + if checkpoint[n].get("best_metric"): + self.best_metric[n] = checkpoint[n]["best_metric"] + if checkpoint[n].get("seed"): + seed = checkpoint[n]["seed"] + self.set_seed(seed) + if checkpoint[n].get("scaler"): + self.scaler.load_state_dict(checkpoint[n]["scaler"]) + #todo: load dataset to recreate the same split as the prior run + #self._load_dataset(dataset_config, task) + + # Loads portion of model dict into a new model for fine tuning + def load_pre_trained_weights(self, load_training_state=False): + """Loads the pre-trained model from a checkpoint.pt file""" + + if not self.checkpoint_path: + raise ValueError("No checkpoint directory specified in config.") + checkpoints_folder = os.path.join(self.fine_tune_from, 'checkpoint') + + self.checkpoint_path = self.checkpoint_path.split(",") + load_model = [torch.load(i, map_location=self.device) for i in self.checkpoint_path] + load_state = [i["state_dict"] for i in load_model] + model_state = [i.state_dict() for i in self.model] + + for x in range(len(self.model)): + for name, param in load_state[x].items(): + #if name not in model_state or name.split('.')[0] in "post_lin_list": + if name not in model_state[x]: + logging.debug('NOT loaded: %s', name) + continue + else: + logging.debug('loaded: %s', name) + if isinstance(param, torch.nn.parameter.Parameter): + # backwards compatibility for serialized parameters + param = param.data + model_state[x][name].copy_(param) + logging.info("Loaded pre-trained model with success.") + if load_training_state == True: + if checkpoint.get("optimizer"): + self.optimizer[x].load_state_dict(checkpoint["optimizer"]) + if checkpoint.get("scheduler"): + self.scheduler[x].scheduler.load_state_dict(checkpoint["scheduler"]) + self.scheduler[x].update_lr() + #if checkpoint.get("epoch"): + # self.epoch = checkpoint["epoch"] + #if checkpoint.get("step"): + # self.step = checkpoint["step"] + #if checkpoint.get("best_metric"): + # self.best_metric = checkpoint["best_metric"] + if checkpoint.get("seed"): + seed = checkpoint["seed"] + self.set_seed(seed) + if checkpoint.get("scaler"): + self.scaler.load_state_dict(checkpoint["scaler"]) + + @staticmethod + def set_seed(seed): + # https://pytorch.org/docs/stable/notes/randomness.html + if seed is None: + return + + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + #torch.autograd.set_detect_anomaly(True) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/matdeeplearn/trainers/property_trainer.py b/matdeeplearn/trainers/property_trainer.py new file mode 100644 index 00000000..06916021 --- /dev/null +++ b/matdeeplearn/trainers/property_trainer.py @@ -0,0 +1,541 @@ +import logging +import time + +import numpy as np +import torch +from torch import distributed as dist +from torch.cuda.amp import autocast + +from tqdm import tqdm +from matdeeplearn.common.data import get_dataloader +from matdeeplearn.common.registry import registry +from matdeeplearn.modules.evaluator import Evaluator +from matdeeplearn.trainers.base_trainer import BaseTrainer + + +@registry.register_trainer("property") +class PropertyTrainer(BaseTrainer): + def __init__( + self, + model, + dataset, + optimizer, + sampler, + scheduler, + data_loader, + loss, + max_epochs, + clip_grad_norm, + max_checkpoint_epochs, + identifier, + verbosity, + batch_tqdm, + write_output, + output_frequency, + model_save_frequency, + save_dir, + checkpoint_path, + use_amp, + ): + super().__init__( + model, + dataset, + optimizer, + sampler, + scheduler, + data_loader, + loss, + max_epochs, + clip_grad_norm, + max_checkpoint_epochs, + identifier, + verbosity, + batch_tqdm, + write_output, + output_frequency, + model_save_frequency, + save_dir, + checkpoint_path, + use_amp, + ) + + def train(self): + # Start training over epochs loop + # Calculate start_epoch from step instead of loading the epoch number + # to prevent inconsistencies due to different batch size in checkpoint. + # start_epoch = self.step // len(self.train_loader) + start_epoch = int(self.epoch) + + if str(self.rank) not in ("cpu", "cuda"): + dist.barrier() + + end_epoch = ( + self.max_checkpoint_epochs + start_epoch + if self.max_checkpoint_epochs + else self.max_epochs + ) + + if self.train_verbosity: + logging.info("Starting regular training") + if str(self.rank) not in ("cpu", "cuda"): + logging.info( + f"Running for {end_epoch - start_epoch} epochs on {type(self.model[0].module).__name__} model" + ) + else: + logging.info( + f"Running for {end_epoch - start_epoch} epochs on {type(self.model[0]).__name__} model" + ) + + for epoch in range(start_epoch, end_epoch): + epoch_start_time = time.time() + if self.train_sampler: + self.train_sampler.set_epoch(epoch) + # skip_steps = self.step % len(self.train_loader) + train_loader_iter = [] + for i in range(len(self.model)): + train_loader_iter.append(iter(self.data_loader[i]["train_loader"])) + # metrics for every epoch + _metrics = [{} for _ in range(len(self.model))] + + #for i in range(skip_steps, len(self.train_loader)): + pbar = tqdm(range(0, len(self.data_loader[0]["train_loader"])), disable=not self.batch_tqdm) + for i in pbar: + #self.epoch = epoch + (i + 1) / len(self.train_loader) + #self.step = epoch * len(self.train_loader) + i + 1 + #print(i, torch.cuda.memory_allocated() / (1024 * 1024), torch.cuda.memory_cached() / (1024 * 1024)) + batch = [] + for n, mod in enumerate(self.model): + mod.train() + batch.append(next(train_loader_iter[n]).to(self.rank)) + # Get a batch of train data + # batch = next(train_loader_iter).to(self.rank) + # print(epoch, i, torch.cuda.memory_allocated() / (1024 * 1024), torch.cuda.memory_cached() / (1024 * 1024), torch.sum(batch.n_atoms)) + # Compute forward, loss, backward + with autocast(enabled=self.use_amp): + out_list = self._forward(batch) + loss = self._compute_loss(out_list, batch) + #print(i, torch.cuda.memory_allocated() / (1024 * 1024), torch.cuda.memory_cached() / (1024 * 1024)) + grad_norm = [] + for i in range(len(self.model)): + grad_norm.append(self._backward(loss[i], i)) + pbar.set_description("Batch Loss {:.4f}, grad norm {:.4f}".format(torch.mean(torch.stack(loss)).item(), torch.mean(torch.stack(grad_norm)).item())) + # Compute metrics + # TODO: revert _metrics to be empty per batch, so metrics are logged per batch, not per epoch + # keep option to log metrics per epoch + for n in range(len(self.model)): + _metrics[n] = self._compute_metrics(out_list[n], batch[n], _metrics[n]) + self.metrics[n] = self.evaluator.update("loss", loss[n].item(), out_list[n]["output"].shape[0], _metrics[n]) + + self.epoch = epoch + 1 + + if str(self.rank) not in ("cpu", "cuda"): + dist.barrier() + + # TODO: could add param to eval and save on increments instead of every time + + # Save current model + torch.cuda.empty_cache() + if str(self.rank) in ("0", "cpu", "cuda"): + if self.model_save_frequency == 1: + self.save_model(checkpoint_file="checkpoint.pt", training_state=True) + + # Evaluate on validation set if it exists + if self.data_loader[0].get("val_loader"): + metric = self.validate("val") + else: + metric = self.metrics + + # Train loop timings + self.epoch_time = time.time() - epoch_start_time + # Log metrics + if epoch % self.train_verbosity == 0: + if self.data_loader[0].get("val_loader"): + self._log_metrics(metric) + else: + self._log_metrics() + + # Update best val metric and model, and save best model and predicted outputs + for i in range(len(self.model)): + if metric[i][type(self.loss_fn).__name__]["metric"] < self.best_metric[i]: + if self.output_frequency == 0: + if self.model_save_frequency == 1: + self.update_best_model(metric[i], i, write_model=True, write_csv=False) + else: + self.update_best_model(metric[i], i, write_model=False, write_csv=False) + elif self.output_frequency == 1: + if self.model_save_frequency == 1: + self.update_best_model(metric[i], i, write_model=True, write_csv=True) + else: + self.update_best_model(metric[i], i, write_model=False, write_csv=True) + + self._scheduler_step() + + + torch.cuda.empty_cache() + + if self.best_model_state: + for i in range(len(self.model)): + if str(self.rank) in "0": + self.model[i].module.load_state_dict(self.best_model_state[i]) + elif str(self.rank) in ("cpu", "cuda"): + self.model[i].load_state_dict(self.best_model_state[i]) + #if self.data_loader.get("test_loader"): + # metric = self.validate("test") + # test_loss = metric[type(self.loss_fn).__name__]["metric"] + #else: + # test_loss = "N/A" + if self.model_save_frequency != -1: + self.save_model("best_checkpoint.pt", index=None, metric=metric, training_state=True) + logging.info("Final Losses: ") + if "train" in self.write_output: + self.predict(self.data_loader[0]["train_loader"], "train") + if "val" in self.write_output and self.data_loader[0].get("val_loader"): + self.predict(self.data_loader[0]["val_loader"], "val") + if "test" in self.write_output and self.data_loader[0].get("test_loader"): + self.predict(self.data_loader[0]["test_loader"], "test") + + return self.best_model_state + + @torch.no_grad() + def validate(self, split="val"): + for i in range(len(self.model)): + self.model[i].eval() + + evaluator, metrics = Evaluator(), [{} for _ in range(len(self.model))] + + loader_iter = [] + for i in range(len(self.model)): + if split == "val": + loader_iter.append(iter(self.data_loader[i]["val_loader"])) + elif split == "test": + loader_iter.append(iter(self.data_loader[i]["test_loader"])) + elif split == "train": + loader_iter.append(iter(self.data_loader[i]["train_loader"])) + + for i in range(0, len(loader_iter[0])): + #print(i, torch.cuda.memory_allocated() / (1024 * 1024), torch.cuda.memory_cached() / (1024 * 1024)) + batch = [] + for i in range(len(self.model)): + batch.append(next(loader_iter[i]).to(self.rank)) + + out_list = self._forward(batch) + loss = self._compute_loss(out_list, batch) + # Compute metrics + #print(i, torch.cuda.memory_allocated() / (1024 * 1024), torch.cuda.memory_cached() / (1024 * 1024)) + for n in range(len(self.model)): + metrics[n] = self._compute_metrics(out_list[n], batch[n], metrics[n]) + metrics[n] = evaluator.update("loss", loss[n].item(), out_list[n]["output"].shape[0], metrics[n]) + del loss, batch, out_list + + torch.cuda.empty_cache() + + return metrics + + @torch.no_grad() + def predict(self, loader, split, results_dir="train_results", write_output=True, labels=True): + for mod in self.model: + mod.eval() + + # assert isinstance(loader, torch.utils.data.dataloader.DataLoader) + + # TODO: make this compatible with model ensemble + if str(self.rank) not in ("cpu", "cuda"): + loader = get_dataloader( + loader.dataset, batch_size=loader.batch_size, sampler=None + ) + + evaluator, metrics = Evaluator(), {} + predict, target = None, None + ids = [] + ids_pos_grad = [] + target_pos_grad = None + ids_cell_grad = [] + target_cell_grad = None + node_level = False + + loader_iter = iter(loader) + for i in range(0, len(loader_iter)): + batch = next(loader_iter).to(self.rank) + out_list = self._forward([batch]) + + out = {} + out_stack={} + for key in out_list[0].keys(): + temp = [o[key] for o in out_list] + if temp[0] is not None: + out_stack[key] = torch.stack(temp) + out[key] = torch.mean(out_stack[key], dim=0) + out[key+"_std"] = torch.std(out_stack[key], dim=0) + else: + out[key] = None + out[key+"_std"] = None + + + batch_p = [o["output"].data.cpu().numpy() for o in out_list] + batch_p_mean = out["output"].cpu().numpy() + batch_ids = batch.structure_id + batch_stds = out["output_std"].cpu().numpy() + + if labels == True: + loss = self._compute_loss(out, batch) + metrics = self._compute_metrics(out, batch, metrics) + metrics = evaluator.update( + "loss", loss.item(), out["output"].shape[0], metrics + ) + if str(self.rank) not in ("cpu", "cuda"): + batch_t = batch[self.model[0].module.target_attr].cpu().numpy() + else: + batch_t = batch[self.model[0].target_attr].cpu().numpy() + + # Node level prediction + if batch_p[0].shape[0] > loader.batch_size: + node_level = True + node_ids = batch.z.cpu().numpy() + structure_ids = np.repeat( + batch.structure_id, batch.n_atoms.cpu().numpy(), axis=0 + ) + batch_ids = np.column_stack((structure_ids, node_ids)) + + if out.get("pos_grad") != None: + batch_p_pos_grad = out["pos_grad"].data.cpu().numpy() + batch_p_pos_grad_std = out["pos_grad_std"].data.cpu().numpy() + node_ids_pos_grad = batch.z.cpu().numpy() + structure_ids_pos_grad = np.repeat( + batch.structure_id, batch.n_atoms.cpu().numpy(), axis=0 + ) + batch_ids_pos_grad = np.column_stack((structure_ids_pos_grad, node_ids_pos_grad)) + ids_pos_grad = batch_ids_pos_grad if i == 0 else np.row_stack((ids_pos_grad, batch_ids_pos_grad)) + predict_pos_grad = batch_p_pos_grad if i == 0 else np.concatenate((predict_pos_grad, batch_p_pos_grad), axis=0) + predict_pos_grad_std = batch_p_pos_grad_std if i == 0 else np.concatenate((predict_pos_grad_std, batch_p_pos_grad_std), axis=0) + if "forces" in batch: + batch_t_pos_grad = batch["forces"].cpu().numpy() + target_pos_grad = batch_t_pos_grad if i == 0 else np.concatenate((target_pos_grad, batch_t_pos_grad), axis=0) + + if out.get("cell_grad") != None: + batch_p_cell_grad = out["cell_grad"].data.view(out["cell_grad"].data.size(0), -1).cpu().numpy() + batch_p_cell_grad_std = out["cell_grad_std"].data.view(out["cell_grad"].data.size(0), -1).cpu().numpy() + batch_ids_cell_grad = batch.structure_id + ids_cell_grad = batch_ids_cell_grad if i == 0 else np.row_stack((ids_cell_grad, batch_ids_cell_grad)) + predict_cell_grad = batch_p_cell_grad if i == 0 else np.concatenate((predict_cell_grad, batch_p_cell_grad), axis=0) + predict_cell_grad_std = batch_p_cell_grad_std if i == 0 else np.concatenate((predict_cell_grad_std, batch_p_cell_grad_std), axis=0) + if "stress" in batch: + batch_t_cell_grad = batch["stress"].view(out["cell_grad"].data.size(0), -1).cpu().numpy() + target_cell_grad = batch_t_cell_grad if i == 0 else np.concatenate((target_cell_grad, batch_t_cell_grad), axis=0) + + ids = batch_ids if i == 0 else np.row_stack((ids, batch_ids)) + predict_mean = batch_p_mean if i == 0 else np.concatenate((predict_mean, batch_p_mean), axis=0) + stds = batch_stds if i == 0 else np.row_stack((stds, batch_stds)) + if i == 0: + predict = [0 for _ in range(len(self.model))] + for x in range(len(self.model)): + predict[x] = batch_p[x] if i == 0 else np.concatenate((predict[x], batch_p[x]), axis=0) + + if labels == True: + target = batch_t if i == 0 else np.concatenate((target, batch_t), axis=0) + + if labels == True: + del loss, batch, out + else: + del batch, out + + if write_output == True: + if labels == True: + if len(self.model) > 1: + self.save_results( + np.column_stack((ids, target, predict_mean, stds)), results_dir, f"{split}_predictions.csv", node_level, True, std=True, + ) + for x in range(len(self.model)): + mod = str(x) + self.save_results( + np.column_stack((ids, target, predict[x])), results_dir, f"{split}_predictions_{mod}.csv", node_level, True, std=False, + ) + else: + self.save_results( + np.column_stack((ids, target, predict_mean)), results_dir, f"{split}_predictions.csv", node_level, True, std=False, + ) + else: + if len(self.model) > 1: + self.save_results( + np.column_stack((ids, predict_mean, stds)), results_dir, f"{split}_predictions.csv", node_level, False, std=True, + ) + for x in range(len(self.model)): + mod = str(x) + self.save_results( + np.column_stack((ids, predict[x])), results_dir, f"{split}_predictions_{mod}.csv", node_level, False, std=False, + ) + else: + self.save_results( + np.column_stack((ids, predict_mean)), results_dir, f"{split}_predictions.csv", node_level, False, std=False, + ) + #if out.get("pos_grad") != None: + if len(ids_pos_grad) > 0: + if isinstance(target_pos_grad, np.ndarray): + if len(self.model) > 1: + self.save_results( + np.column_stack((ids_pos_grad, target_pos_grad, predict_pos_grad, predict_pos_grad_std)), results_dir, f"{split}_predictions_pos_grad.csv", True, True, std=True + ) + else: + self.save_results( + np.column_stack((ids_pos_grad, target_pos_grad, predict_pos_grad)), results_dir, f"{split}_predictions_pos_grad.csv", True, True, std=False + ) + else: + self.save_results( + np.column_stack((ids_pos_grad, predict_pos_grad)), results_dir, f"{split}_predictions_pos_grad.csv", True, False, std=False + ) + #if out.get("cell_grad") != None: + if len(ids_cell_grad) > 0: + if isinstance(target_cell_grad, np.ndarray): + if len(self.model) > 1: + self.save_results( + np.column_stack((ids_cell_grad, target_cell_grad, predict_cell_grad, predict_cell_grad_std)), results_dir, f"{split}_predictions_cell_grad.csv", False, True, std=True + ) + else: + self.save_results( + np.column_stack((ids_cell_grad, target_cell_grad, predict_cell_grad)), results_dir, f"{split}_predictions_cell_grad.csv", False, True, std=False + ) + else: + self.save_results( + np.column_stack((ids_cell_grad, predict_cell_grad)), results_dir, f"{split}_predictions_cell_grad.csv", False, False, std=False + ) + + if labels == True: + predict_loss = metrics[type(self.loss_fn).__name__]["metric"] + logging.info("Saved {:s} error: {:.5f}".format(split, predict_loss)) + if len(self.model) > 1: + predictions = {"ids":ids, "predict":predict_mean, "target":target, "std": stds} + else: + predictions = {"ids":ids, "predict":predict_mean, "target":target} + else: + if len(self.model) > 1: + predictions = {"ids":ids, "predict":predict_mean, "std": stds} + else: + predictions = {"ids":ids, "predict":predict_mean} + + torch.cuda.empty_cache() + + return predictions + + def predict_by_calculator(self, loader): + for x, mod in self.model: + mod.eval() + + assert isinstance(loader, torch.utils.data.dataloader.DataLoader) + assert len(loader) == 1, f"Predicting by calculator only allows one structure at a time, but got {len(loader)} structures." + + if str(self.rank) not in ("cpu", "cuda"): + loader = get_dataloader( + loader.dataset, batch_size=loader.batch_size, sampler=None + ) + + results = [] + loader_iter = iter(loader) + for i in range(0, len(loader_iter)): + batch = next(loader_iter).to(self.rank) + out_list = self._forward(batch.to(self.rank)) + out = {} + out_stack={} + for key in out_list[0].keys(): + temp = [o[key] for o in out_list] + if temp[0] is not None: + out_stack[key] = torch.stack(temp) + out[key] = torch.mean(out_stack[key], dim=0) + else: + out[key] = None + + energy = None if out.get('output') is None else out.get('output').data.cpu().numpy() + stress = None if out.get('cell_grad') is None else out.get('cell_grad').view(-1, 3).data.cpu().numpy() + forces = None if out.get('pos_grad') is None else out.get('pos_grad').data.cpu().numpy() + + results = {'energy': energy, 'stress': stress, 'forces': forces} + + return results + + def _forward(self, batch_data): + if len(batch_data) > 1: + output = [] + for i in range(len(self.model)): + output.append(self.model[i](batch_data[i])) + else: + output = [] + for i in range(len(self.model)): + output.append(self.model[i](batch_data[0])) + return output + + def _compute_loss(self, out, batch_data): + if isinstance(out, list): + loss = [] + for i in range(len(out)): + loss.append(self.loss_fn(out[i], batch_data[i])) + else: + loss = self.loss_fn(out, batch_data) + return loss + + def _backward(self, loss, index=None): + self.optimizer[index].zero_grad(set_to_none=True) + self.scaler.scale(loss).backward() + if self.clip_grad_norm: + grad_norm = torch.nn.utils.clip_grad_norm_( + self.model[index].parameters(), + max_norm=self.clip_grad_norm, + ) + self.scaler.step(self.optimizer[index]) + self.scaler.update() + + return grad_norm + + + def _compute_metrics(self, out, batch_data, metrics): + # TODO: finish this method + try: + property_target = batch_data.to(self.rank) + except: + property_target = batch_data + + metrics = self.evaluator.eval( + out, property_target, self.loss_fn, prev_metrics=metrics + ) + + return metrics + + def _log_metrics(self, val_metrics=None): + train_loss = [torch.tensor(i[type(self.loss_fn).__name__]["metric"]) for i in self.metrics] + train_loss = torch.mean(torch.stack(train_loss)).item() + lr = self.scheduler[0].lr + if not val_metrics: + val_loss = "N/A" + logging.info( + "Epoch: {:04d}, Learning Rate: {:.6f}, Training Error: {:.5f}, Val Error: {}, Time per epoch (s): {:.5f}".format( + int(self.epoch - 1), + lr, + train_loss, + val_loss, + self.epoch_time, + ) + ) + else: + val_loss = [torch.tensor(i[type(self.loss_fn).__name__]["metric"]) for i in val_metrics] + val_loss = torch.mean(torch.stack(val_loss)).item() + lr = self.scheduler[0].lr + logging.info( + "Epoch: {:04d}, Learning Rate: {:.6f}, Training Error: {:.5f}, Val Error: {:.5f}, Time per epoch (s): {:.5f}".format( + int(self.epoch - 1), + lr, + train_loss, + val_loss, + self.epoch_time, + ) + ) + + + def _load_task(self): + """Initializes task-specific info. Implemented by derived classes.""" + pass + + def _scheduler_step(self): + for i in range(len(self.model)): + if self.scheduler[i].scheduler_type == "ReduceLROnPlateau": + self.scheduler[i].step( + metrics=self.metrics[i][type(self.loss_fn).__name__]["metric"] + ) + else: + self.scheduler[i].step() From d77e132c923e538a8656e25f5865b98b0654f792 Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Mon, 8 Jan 2024 18:07:49 -0800 Subject: [PATCH 4/6] Support ensemble model calculation. --- configs/config_calculator.yml | 1 + matdeeplearn/common/ase_utils.py | 83 +++++++++++++++++++------------- matdeeplearn/models/__init__.py | 7 --- 3 files changed, 50 insertions(+), 41 deletions(-) diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml index d16505f5..0002543b 100644 --- a/configs/config_calculator.yml +++ b/configs/config_calculator.yml @@ -23,6 +23,7 @@ model: otf_edge_attr: True # Compute node attributes on the fly in the model forward otf_node_attr: False + model_ensemble: 1 # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True gradient: True diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py index 70a42a11..cce8849b 100644 --- a/matdeeplearn/common/ase_utils.py +++ b/matdeeplearn/common/ase_utils.py @@ -1,4 +1,5 @@ import torch +import random import numpy as np import yaml from ase import Atoms, units @@ -61,7 +62,7 @@ def __init__(self, config, rank='cuda:0'): assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True." self.device = rank if torch.cuda.is_available() else 'cpu' - self.model = MDLCalculator._load_model(config, self.device) + self.models = MDLCalculator._load_model(config, self.device) self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250) def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None) -> None: @@ -97,12 +98,19 @@ def calculate(self, atoms: Atoms, properties=implemented_properties, system_chan data_list = [data] loader = DataLoader(data_list, batch_size=1) loader_iter = iter(loader) - batch = next(loader_iter).to(self.device) - out = self.model(batch) + batch = next(loader_iter).to(self.device) - self.results['energy'] = out['output'].detach().cpu().numpy() - self.results['forces'] = out['pos_grad'].detach().cpu().numpy() - self.results['stress'] = out['cell_grad'].squeeze().detach().cpu().numpy() + out_list = [] + for model in self.models: + out_list.append(model(batch)) + + energy = torch.stack([entry["output"] for entry in out_list]).mean(dim=0) + forces = torch.stack([entry["pos_grad"] for entry in out_list]).mean(dim=0) + stresses = torch.stack([entry["cell_grad"] for entry in out_list]).mean(dim=0) + + self.results['energy'] = energy.detach().cpu().numpy() + self.results['forces'] = forces.detach().cpu().numpy() + self.results['stress'] = stresses.squeeze().detach().cpu().numpy() @staticmethod def data_to_atoms_list(data: Data) -> List[Atoms]: @@ -133,7 +141,7 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: return atoms_list @staticmethod - def _load_model(config: dict, rank: str) -> BaseModel: + def _load_model(config: dict, rank: str) -> List[BaseModel]: """ This static method loads a machine learning model based on the provided configuration. @@ -142,40 +150,47 @@ def _load_model(config: dict, rank: str) -> BaseModel: - rank: Rank information for distributed training. Returns: - - model: Loaded model for further calculations. - - Raises: - - ValueError: If the 'checkpoint.pt' file is not found, the method issues a warning and uses an untrained model for prediction. + - model_list: A list of loaded models. """ graph_config = config['dataset']['preprocess_params'] model_config = config['model'] - node_dim = graph_config["node_dim"] - edge_dim = graph_config["edge_dim"] - + + model_list = [] model_name = 'matdeeplearn.models.' + model_config["name"] logging.info(f'MDLCalculator: setting up {model_name} for calculation') - model_cls = registry.get_model_class(model_name) - model = model_cls( - node_dim=node_dim, - edge_dim=edge_dim, - output_dim=1, - cutoff_radius=graph_config["cutoff_radius"], - n_neighbors=graph_config["n_neighbors"], - graph_method=graph_config["edge_calc_method"], - num_offsets=graph_config["num_offsets"], - **model_config - ) - model = model.to(rank) - checkpoint = torch.load(config['task']["checkpoint_path"]) + # Obtain node, edge, and output dimensions for model initialization + for _ in range(model_config["model_ensemble"]): + node_dim = graph_config["node_dim"] + edge_dim = graph_config["edge_dim"] + + model_cls = registry.get_model_class(model_name) + model = model_cls( + node_dim=node_dim, + edge_dim=edge_dim, + output_dim=1, + cutoff_radius=graph_config["cutoff_radius"], + n_neighbors=graph_config["n_neighbors"], + graph_method=graph_config["edge_calc_method"], + num_offsets=graph_config["num_offsets"], + **model_config + ) + model = model.to(rank) + model_list.append(model) - try: - model.load_state_dict(checkpoint["state_dict"]) - logging.info(f'MDLCalculator: model weights loaded from {config["task"]["checkpoint_path"]}') - except ValueError: - logging.warning("MDLCalculator: No checkpoint.pt file is found, and an untrained model is used for prediction.") - - return model + checkpoints = config['task']["checkpoint_path"].split(',') + if len(checkpoints) == 0: + logging.warning("MDLCalculator: No checkpoint.pt file is found, and untrained models are used for prediction.") + else: + for i in range(len(checkpoints)): + try: + checkpoint = torch.load(checkpoints[i]) + model_list[i].load_state_dict(checkpoint["state_dict"]) + logging.info(f'MDLCalculator: weights for model No.{i+1} loaded from {checkpoints[i]}') + except ValueError: + logging.warning(f"MDLCalculator: No checkpoint.pt file is found for model No.{i+1}, and an untrained model is used for prediction.") + + return model_list class StructureOptimizer: diff --git a/matdeeplearn/models/__init__.py b/matdeeplearn/models/__init__.py index 6f10a24f..613fbe71 100644 --- a/matdeeplearn/models/__init__.py +++ b/matdeeplearn/models/__init__.py @@ -1,9 +1,3 @@ -<<<<<<< HEAD -__all__ = ["BaseModel", 'CGCNN'] - -from .base_model import BaseModel -from .cgcnn import CGCNN -======= __all__ = ["BaseModel", "CGCNN", "MPNN", "SchNet", "TorchMD_ET"] from .base_model import BaseModel @@ -11,4 +5,3 @@ from .mpnn import MPNN from .schnet import SchNet from .torchmd_etEarly import TorchMD_ET ->>>>>>> 133fc4080f30855462cb023fac24be6355994053 From 1469e2aca48bdcbf477e12944199969e392ac76c Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Mon, 8 Jan 2024 18:16:32 -0800 Subject: [PATCH 5/6] Remove predict_by_calculator --- matdeeplearn/common/ase_utils.py | 212 ++----------------------------- 1 file changed, 10 insertions(+), 202 deletions(-) diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py index cce8849b..ecf2acb0 100644 --- a/matdeeplearn/common/ase_utils.py +++ b/matdeeplearn/common/ase_utils.py @@ -1,27 +1,18 @@ -import torch -import random +from typing import List +import logging + import numpy as np +import torch import yaml -from ase import Atoms, units +from ase import Atoms from ase.geometry import Cell from ase.calculators.calculator import Calculator -from matdeeplearn.preprocessor.helpers import generate_node_features -from matdeeplearn.models.base_model import BaseModel from torch_geometric.data.data import Data from torch_geometric.loader import DataLoader -import logging -from typing import List, Tuple + from matdeeplearn.common.registry import registry -from ase.io.trajectory import Trajectory -from ase.optimize import FIRE -from ase.constraints import ExpCellFilter -from ase.io import read, write -from ase.md.langevin import Langevin -from ase.md.verlet import VelocityVerlet -from ase.md.npt import NPT -from ase.md.velocitydistribution import MaxwellBoltzmannDistribution -import os -from time import time +from matdeeplearn.models.base_model import BaseModel +from matdeeplearn.preprocessor.helpers import generate_node_features logging.basicConfig(level=logging.INFO) @@ -143,7 +134,7 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: @staticmethod def _load_model(config: dict, rank: str) -> List[BaseModel]: """ - This static method loads a machine learning model based on the provided configuration. + This static method loads a model based on the provided configuration. Parameters: - config (dict): Configuration dictionary containing model and dataset parameters. @@ -190,187 +181,4 @@ def _load_model(config: dict, rank: str) -> List[BaseModel]: except ValueError: logging.warning(f"MDLCalculator: No checkpoint.pt file is found for model No.{i+1}, and an untrained model is used for prediction.") - return model_list - - -class StructureOptimizer: - """ - This class provides functionality to optimize the structure of an Atoms object using a specified calculator. - """ - def __init__(self, - calculator, - relax_cell: bool = False, - ): - """ - Initialize the StructureOptimizer. - - Parameters: - - calculator (Calculator): A calculator object for performing energy and force calculations. - - relax_cell (bool): If True, the cell will be relaxed in addition to positions during the optimization process. - """ - self.calculator = calculator - self.relax_cell = relax_cell - - def optimize(self, atoms: Atoms, logfile=None, write_traj_name=None) -> Tuple[Atoms, float]: - """ - This method optimizes the structure of the given Atoms object using the specified calculator. - If `relax_cell` is True, the cell will be relaxed. Trajectory information can be written to a file. - - Parameters: - - atoms: An Atoms object representing the structure to be optimized. - - logfile: File to write optimization log information. - - write_traj_name: File to write trajectory of the optimization. - - Returns: - - atoms: The optimized Atoms object. - - time_per_step: The average time taken per optimization step. - """ - atoms.calc = self.calculator - if self.relax_cell: - atoms = ExpCellFilter(atoms) - - optimizer = FIRE(atoms, logfile=logfile) - - if write_traj_name is not None: - traj = Trajectory(write_traj_name + '.traj', 'w', atoms) - optimizer.attach(traj.write, interval=1) - - start_time = time() - optimizer.run(fmax=0.001, steps=500) - end_time = time() - num_steps = optimizer.get_number_of_steps() - - if isinstance(atoms, ExpCellFilter): - atoms = atoms.atoms - time_per_step = (end_time - start_time) / num_steps if num_steps != 0 else 0 - return atoms, time_per_step - - @staticmethod - def save_results(original, optimized, structure_ids, file_path, header=None) -> None: - """ - This static method saves the original and optimized atomic positions along with structure IDs to a file. - - Parameters: - - original: List of original atomic positions or a single set of positions. - - optimized: List of optimized atomic positions or a single set of positions. - - structure_ids: List of structure IDs corresponding to each set of positions. - - file_path: Path to the file where results will be saved. - - header: Optional header for the saved file. - """ - if header == None: - header = "strucure_id, original_pos, original_pos, original_pos, optimized_pos, optimized_pos, optimized_pos" - for i in range(len(original)): - with open(file_path, 'a') as f: - if i == 0: - f.write(header + '\n') - n = len(original[i]) if isinstance(original, list) else 1 - ids = np.array([structure_ids[i]] * n).reshape(-1, 1) - data = np.hstack((ids, original[i], optimized[i])) - np.savetxt(f, data, fmt='%s', delimiter=', ') - - -class MDSimulator: - """ - This class provides a simple interface for running molecular dynamics simulations. - It currently supports microcanonical ('NVE'), canonical ('NVT'), or isothermal-isobaric ('NPT') simulations. - """ - def __init__(self, - simulation_constant_type: str, - timestep: float, - temperature: float, - calculator: Calculator - ): - """ - Initialize a Molecular Dynamics Simulator. - - Parameters: - - simulation_constant_type (str): Type of molecular dynamics simulation ('NVE', 'NVT', 'NPT'). - - timestep (float): Time step for the simulation in femtoseconds. - - temperature (float): Initial temperature for the simulation in Kelvin. - - calculator (Calculator): A calculator object for energy and force calculations. - """ - self.simulation = simulation_constant_type - self.timestep = timestep - self.temperature = temperature - self.calculator = calculator - self.results = [] - - def run_simulation(self, - atoms: Atoms, - num_steps: int, - log_console: int = 0, - save_energy: bool = False, - **kwargs) -> None: - """ - This method runs a molecular dynamics simulation for the specified number of steps using the chosen ensemble. - It currently supports the 'NVE', 'NVT', and 'NPT' ensembles. Energy information can be logged to the console and saved. - - Parameters: - - atoms (Atoms): An Atoms object representing the molecular system. - - num_steps (int): Number of simulation steps to run. - - log_console (int): Interval for logging energy information to the console (0 for no logging). - - save_energy (bool): If True, save energy information during the simulation. - - **kwargs: Additional parameters specific to the chosen simulation ensemble. - """ - atoms.set_calculator(self.calculator) - MaxwellBoltzmannDistribution(atoms, temperature_K=self.temperature) - - common_valid_params = ['trajectory', 'logfile', 'loginterval', 'append_trajectory', 'temperature_K'] - valid_params = common_valid_params - - if self.simulation == 'NVE': - valid_params.append('dt') - simulation_class = VelocityVerlet - elif self.simulation == 'NVT': - valid_params += ['friction', 'fixcm', 'communicator', 'rng'] - kwargs.setdefault('friction', 1e-3) - kwargs.setdefault('temperature_K', self.temperature) - simulation_class = Langevin - elif self.simulation == 'NPT': - valid_params += ['externalstress', 'ttime', 'pfactor', 'mask'] - kwargs.setdefault('externalstress', 0.01623) - kwargs.setdefault('pfactor', 0.6) - kwargs.setdefault('temperature_K', self.temperature) - simulation_class = NPT - else: - raise NotImplementedError("Currently unimplemented simulation type") - - invalid_params = [key for key in kwargs.keys() if key not in valid_params] - if invalid_params: - raise ValueError(f"Invalid parameter(s): {', '.join(invalid_params)}") - - dyn = simulation_class(atoms, timestep=self.timestep * units.fs, **kwargs) - time_ps = 0 - - if log_console: - def printenergy(a=atoms): - epot = a.get_potential_energy() - ekin = a.get_kinetic_energy() - temperature = 2 * ekin / (3 * len(a) * units.kB) - if save_energy: - self.results.append({'time': time_ps, 'Etot': epot + ekin, - 'Epot': epot, 'Ekin': ekin, 'T': temperature}) - print('Energy of system: Epot = %.3feV Ekin = %.3feV (T=%3.0fK) ' - 'Etot = %.3feV' % (epot, ekin, temperature, epot + ekin)) - time_ps += 0.005 - dyn.attach(printenergy, interval=log_console) - - start = time() - dyn.run(steps=num_steps) - end = time() - print(f"Time: {end-start:.4f}") - - @staticmethod - def traj_to_xdatcar(traj_file) -> None: - """ - This static method reads a trajectory file and saves it in the XDATCAR format. - - Parameters: - - traj_file (str): Path to the trajectory file. - - Returns: - - None - """ - traj = read(traj_file, index=':') - file_name, _ = os.path.splitext(traj_file) - write(file_name + '.XDATCAR', traj) \ No newline at end of file + return model_list \ No newline at end of file From c3d9b2be36286364f91689aea56b5e560f5cf1b8 Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Mon, 8 Jan 2024 18:57:36 -0800 Subject: [PATCH 6/6] Remove extra files --- scripts/optimization.py | 206 ---------------------------------------- 1 file changed, 206 deletions(-) delete mode 100644 scripts/optimization.py diff --git a/scripts/optimization.py b/scripts/optimization.py deleted file mode 100644 index a27d8b72..00000000 --- a/scripts/optimization.py +++ /dev/null @@ -1,206 +0,0 @@ -from ase import Atoms -from matdeeplearn.common.ase_utils import MDLCalculator, StructureOptimizer -import numpy as np -from time import time -import numpy as np -import logging -import json -from ase.geometry import Cell -import copy -import os -from scipy.stats import norm -from pymatgen.optimization.neighbors import find_points_in_spheres - -logging.basicConfig(level=logging.INFO) - - -def get_rdf(structure: Atoms, σ = 0.05, dr = 0.01, max_r = 12.0): - rmax = max_r + 3 * σ + dr - rs = np.arange(0.5, rmax + dr, dr) - nr = len(rs) - 1 - natoms = len(structure) - - normalization = 4 / structure.get_cell().volume * np.pi - normalization *= (natoms * rs[0:-1]) ** 2 - - rdf = np.zeros(nr, dtype = int) - lattice_matrix = np.array(structure.get_cell(), dtype=float) - cart_coords = np.array(structure.get_positions(), dtype=float) - - for i in range(natoms): - rdf += np.histogram(find_points_in_spheres( - all_coords = cart_coords, - center_coords = np.array([cart_coords[i]], dtype=float), - r = rmax, - pbc = np.array([1, 1, 1], dtype=int), - lattice = lattice_matrix, - tol = 1e-8, - )[3], rs)[0] - - return np.convolve(rdf / normalization, - norm.pdf(np.arange(-3 * σ, 3 * σ + dr, dr), 0.0, σ), - mode="same")[0:(nr - int((3 * σ) / dr) - 1)] - -def rdf_mse(rdf1, rdf2): - if isinstance(rdf1, list): - rdf1 = np.array(rdf1) - if isinstance(rdf2, list): - rdf2 = np.array(rdf2) - return np.mean((rdf1 - rdf2) ** 2) - -def wright_factor(rdf, rdf_ref): - return np.sum((rdf - rdf_ref) ** 2) / np.sum(rdf_ref ** 2) - -def cartesian_distance(original, optimized): - original, optimized = np.array(original), np.array(optimized) - dist = np.sqrt(np.sum((original - optimized) ** 2, axis=1)) - return np.mean(dist) - -def build_atoms_list(num=-1): - with open('./data/torchmd_test_data/data.json', 'r') as f: - data = json.load(f) - total = num if num != -1 else len(data) - unrelaxed = [] - dft_unrelaxed = [] - dft_unrelaxed_energy = [] - relaxed = [] - unrelaxed_energy = [] - relaxed_energy = [] - ids = [] - for structure in data[:total]: - if 'type' not in structure.keys(): - continue - - # unrelaxed structures: need its unrelaxed and ground truth relaxed information - if structure['type'] == 'unrelaxed_in_test': - unrelaxed_atoms = Atoms(symbols=structure['atomic_numbers'], - positions=structure['unrelaxed_positions'], - cell=Cell(structure['unrelaxed_cell'])) - dft_atoms = Atoms(symbols=structure['atomic_numbers'], - positions=structure['relaxed_positions'], - cell=Cell(structure['relaxed_cell'])) - - unrelaxed_atoms.structure_id = structure['structure_id'] - dft_atoms.structure_id = structure['structure_id'] - unrelaxed.append(unrelaxed_atoms) - dft_unrelaxed.append(dft_atoms) - unrelaxed_energy.append(structure['unrelaxed_energy']) - dft_unrelaxed_energy.append(structure['relaxed_energy']) - - # relaxed structures: only need its ground truth relaxed information - if structure['type'] == 'relaxed_in_test': - relaxed_atoms = Atoms(symbols=structure['atomic_numbers'], - positions=structure['relaxed_positions'], - cell=Cell(structure['relaxed_cell'])) - relaxed_atoms.structure_id = structure['structure_id'] - relaxed.append(relaxed_atoms) - relaxed_energy.append(structure['relaxed_energy']) - - ids.append(structure['structure_id']) - return ids, unrelaxed, relaxed, dft_unrelaxed,\ - unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy - - -def evaluate(task, original_atoms, optimized_atoms, times, dft_relaxed=None, - save=False, optim=None, folder=None, filename=None): - original_energy = [atoms.get_potential_energy() for atoms in original_atoms] - optimized_energy = [atoms.get_potential_energy() for atoms in optimized_atoms] - n_atoms = [len(atoms.get_atomic_numbers()) for atoms in optimized_atoms] - original_pos = [atoms.get_positions() for atoms in original_atoms] - optimized_pos = [atoms.get_positions() for atoms in optimized_atoms] - original_cell = [atoms.get_cell() for atoms in original_atoms] - optimized_cell = [atoms.get_cell() for atoms in optimized_atoms] - - result = None - - e_drop = np.mean((np.array(original_energy) - np.array(optimized_energy)) / n_atoms) - - if task == 'relaxed': - e_drop = np.mean((np.array(original_energy) - np.array(optimized_energy)) / n_atoms) - pos_delta = np.mean([cartesian_distance(pos1, pos2) for pos1, pos2 in zip(original_pos, optimized_pos)]) - cell_delta = np.mean([cartesian_distance(c1, c2) for c1, c2 in zip(original_cell, optimized_cell)]) - - rdf_optimized = [get_rdf(atoms) for atoms in optimized_atoms] - rdf_original = [get_rdf(atoms) for atoms in original_atoms] - rdf_compared_to_dft = np.mean([wright_factor(x, y) for x, y in zip(rdf_optimized, rdf_original)]) - - result = { - 'Average energy drop per atom': e_drop, - 'Average change in atoms cartesian positions': pos_delta, - 'Average change in cell parameters': cell_delta, - 'MSE for RDF': rdf_mse(rdf_original, rdf_optimized), - 'Wright Factor': rdf_compared_to_dft, - 'Averge time per step': np.mean(times) - } - elif task == 'unrelaxed': - assert dft_relaxed is not None - dft_pos = [atoms.get_positions() for atoms in dft_relaxed] - pos_compared_to_dft = np.mean([cartesian_distance(pos1, pos2) for pos1, pos2 in zip(optimized_pos, dft_pos)]) - - dft_cell = [atoms.get_cell()[:] for atoms in dft_relaxed] - cell_compared_to_dft = np.mean([cartesian_distance(c1, c2) for c1, c2 in zip(optimized_cell, dft_cell)]) - - rdf_optimized = [get_rdf(atoms) for atoms in optimized_atoms] - rdf_dft = [get_rdf(atoms) for atoms in dft_relaxed] - rdf_compared_to_dft = np.mean([wright_factor(x, y) for x, y in zip(rdf_dft, rdf_optimized)]) - - result = { - 'Average energy drop per atom': e_drop, - 'Average cartesian distance for atom positions compared to dft': pos_compared_to_dft, - 'Average cartesian distance for cell positions compared to dft': cell_compared_to_dft, - 'MSE for RDF': rdf_mse(rdf_dft, rdf_optimized), - 'Wright Factor': rdf_compared_to_dft, - 'Averge time per step': np.mean(times) - } - else: - raise NotImplementedError("Not implemented evaluate type.") - - if optim is not None and save: - os.makedirs(folder, exist_ok=True) - optim.save_results(original_energy, optimized_energy, structure_ids, f"{folder}/{filename}_energy.csv", header="strucure_id, original_energy, optimized_energy") - optim.save_results(original_pos, optimized_pos, structure_ids, f"{folder}/{filename}_positions.csv", header="strucure_id, original_pos, original_pos, original_pos, optimized_pos, optimized_pos, optimized_pos") - optim.save_results(original_cell, optimized_cell, structure_ids, f"{folder}/{filename}_cell.csv", header="strucure_id, original_cell, original_cell, original_cell, optimized_cell, optimized_cell, optimized_cell") - return result - - -if __name__ == '__main__': - structure_ids, unrelaxed, relaxed, dft,\ - unrelaxed_energy, relaxed_energy, dft_unrelaxed_energy = build_atoms_list() - - # Configurations below - calc_str = './configs/calculator/config_cgcnn_lj.yml' - - save = True - folder = './train_outs/unrelaxed_data' - optim_type = 'unrelax' - # Configurations above - - calculator = MDLCalculator(config=calc_str) - - logging.info(f"Saving optimizaion results for {optim_type}ed structures to: {folder}") - - original_atoms, optimized_atoms = [], [] - - optim = StructureOptimizer(calculator, relax_cell=True) - start = time() - times = [] - - to_optim = relaxed if optim_type == 'relax' else unrelaxed - - for idx, atoms in enumerate(to_optim): - atoms.set_calculator(calculator) - original_atoms.append(atoms) - to_optim = copy.deepcopy(atoms) - optimized, time_per_step = optim.optimize(to_optim) - times.append(time_per_step) - optimized_atoms.append(optimized) - if (idx + 1) % 20 == 0: - logging.info(f"Completed optimizing {idx + 1} structures.") - end = time() - - print(f"Time elapsed: {end - start:.3f}") - result = evaluate(optim_type, original_atoms, optimized_atoms, times, dft_relaxed=dft, - save=True, optim=optim, folder=folder, filename=optim_type) - - for key in result.keys(): - print(f"{key}: {result[key]:.4f}") \ No newline at end of file