From 5a52ee2314c508e3b4bef0ef627d4953add271b0 Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Wed, 20 Sep 2023 17:14:08 -0400 Subject: [PATCH 1/2] Upload necessary files --- configs/config_calculator.yml | 130 +++++++++++++++++++++++++++++++ configs/config_forces.yml | 40 ++++++---- matdeeplearn/common/ase_utils.py | 122 +++++++++++++++++++++++++++++ 3 files changed, 279 insertions(+), 13 deletions(-) create mode 100644 configs/config_calculator.yml create mode 100644 matdeeplearn/common/ase_utils.py diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml new file mode 100644 index 00000000..64ba8b7e --- /dev/null +++ b/configs/config_calculator.yml @@ -0,0 +1,130 @@ +trainer: matdeeplearn.trainers.PropertyTrainer + +task: + run_mode: train + identifier: my_train_job + parallel: False + # If seed is not set, then it will be random every time + seed: 12345678 + # Defaults to run directory if not specified + save_dir: + # continue from a previous job + continue_job: False + # spefcify if the training state is loaded: epochs, learning rate, etc + load_training_state: False + # Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file. + checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt + # E.g. ["train", "val", "test"] + write_output: [train, val, test] + # Specify if labels are provided for the predict task + # labels: True + use_amp: True + +model: + name: CGCNN + # model attributes + dim1: 100 + dim2: 150 + pre_fc_count: 1 + gc_count: 4 + post_fc_count: 3 + pool: global_add_pool + pool_order: early + batch_norm: False + batch_track_stats: True + act: silu + dropout_rate: 0.0 + # Compute edge indices on the fly in the model forward + otf_edge_index: True + # Compute edge attributes on the fly in the model forward + otf_edge_attr: True + # Compute node attributes on the fly in the model forward + otf_node_attr: True + # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True + gradient: True + +optim: + max_epochs: 40 + max_checkpoint_epochs: 0 + lr: 0.002 + # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper + loss: + loss_type: TorchLossWrapper + loss_args: {loss_fn: l1_loss} + # gradient clipping value + clip_grad_norm: 10 + batch_size: 100 + optimizer: + optimizer_type: AdamW + optimizer_args: {} + scheduler: + scheduler_type: ReduceLROnPlateau + scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002} + #Training print out frequency (print per n number of epochs) + verbosity: 5 + # tdqm progress bar per batch in the epoch + batch_tqdm: False + +dataset: + name: test_data + # Whether the data has already been processed and a data.pt file is present from a previous run + processed: False + # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path} + src: data/force_data/data.json + # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file + # Example: target_path: "data/raw_graph_scalar/targets.csv" + target_path: + # Path to save processed data.pt file + pt_path: data/force_data/ + # Either "node" or "graph" level + prediction_level: graph + + transforms: + - name: GetY + args: + # index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset + # For example, an index: 0 (default) will use the first entry in the target vector + # if all values are to be predicted simultaneously, then specify index: -1 + index: -1 + otf_transform: True # Optional parameter, default is True + # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html) + data_format: json + # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress] + additional_attributes: + # Print out processing info + verbose: True + # Index of target column in targets.csv + # graph specific settings + preprocess_params: + # one of mdl (minimum image convention), ocp (all neighbors included) + edge_calc_method: ocp + # determine if edges are computed, if false, then they need to be computed on the fly + preprocess_edges: False + # determine if edge attributes are computed during processing, if false, then they need to be computed on the fly + preprocess_edge_features: False + # determine if node attributes are computed during processing, if false, then they need to be computed on the fly + preprocess_node_features: False + # distance cutoff to determine if two atoms are connected by an edge + cutoff_radius : 8.0 + # maximum number of neighbors to consider (usually an arbitrarily high number to consider all neighbors) + n_neighbors : 250 + # number of pbc offsets to consider when determining neighbors (usually not changed) + num_offsets: 2 + # dimension of node attributes + node_dim : 100 + # dimension of edge attributes + edge_dim : 50 + # whether or not to add self-loops + self_loop: True + # Method of obtaining atom dictionary: available: (onehot) + node_representation: onehot + all_neighbors: True + + # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html + num_workers: 0 + # Where the dataset is loaded; either "cpu" or "cuda" + dataset_device: cpu + # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data) + train_ratio: 0.9 + val_ratio: 0.05 + test_ratio: 0.05 diff --git a/configs/config_forces.yml b/configs/config_forces.yml index 761a963e..1d8bf283 100644 --- a/configs/config_forces.yml +++ b/configs/config_forces.yml @@ -16,6 +16,10 @@ task: checkpoint_path: # E.g. [train, val, test] write_output: [val, test] + # Frequency of writing to file; 0 denotes writing only at the end, 1 denotes writing every time + output_frequency: 1 + # Frequency of saving model .pt file; 0 denotes saving only at the end, 1 denotes saving every time, -1 denotes never saving; this controls both checkpoint and best_checkpoint + model_save_frequency: 1 # Specify if labels are provided for the predict task # labels: True use_amp: False @@ -34,15 +38,19 @@ model: batch_track_stats: True act: silu dropout_rate: 0.0 - # Compute edge features on the fly - otf_edge: True - # compute gradients w.r.t to positions and cell, requires otf_edge=True + # Compute edge indices on the fly in the model forward + otf_edge_index: True + # Compute edge attributes on the fly in the model forward + otf_edge_attr: True + # Compute node attributes on the fly in the model forward + otf_node_attr: False + # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True gradient: True optim: - max_epochs: 40 + max_epochs: 400 max_checkpoint_epochs: 0 - lr: 0.002 + lr: 0.001 # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper loss: #loss_type: "TorchLossWrapper" @@ -69,12 +77,12 @@ dataset: name: test_data processed: False # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path} - src: /global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_forces/raw/data.json + src: data/force_data/data.json # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file # Example: target_path: "data/test_data/raw_graph_scalar/targets.csv" target_path: # Path to save processed data.pt file - pt_path: data/ + pt_path: data/force_data/ # Either "node" or "graph" prediction_level: graph @@ -103,18 +111,24 @@ dataset: # determine if edge attributes are computed during processing, if false, then they need to be computed on the fly preprocess_edge_features: False # determine if node attributes are computed during processing, if false, then they need to be computed on the fly - preprocess_nodes: True + preprocess_node_features: True cutoff_radius : 8.0 n_neighbors : 250 num_offsets: 2 - edge_steps : 50 + # dimension of node attributes + node_dim : 100 + # dimension of edge attributes + edge_dim : 50 self_loop: True # Method of obtaining atom dictionary: available: (onehot) node_representation: onehot all_neighbors: True + + # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html + num_workers: 0 + # Where the dataset is loaded; either "cpu" or "cuda" + dataset_device: cpu # Ratios for train/val/test split out of a total of less than 1 - train_ratio: 0.8 + train_ratio: 0.9 val_ratio: 0.05 - test_ratio: 0.015 - - + test_ratio: 0.05 \ No newline at end of file diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py new file mode 100644 index 00000000..c25047c3 --- /dev/null +++ b/matdeeplearn/common/ase_utils.py @@ -0,0 +1,122 @@ +import torch +import numpy as np +import yaml +from ase import Atoms +from ase.geometry import Cell +from ase.calculators.calculator import Calculator +from matdeeplearn.preprocessor.helpers import generate_node_features +from torch_geometric.data.data import Data +from torch_geometric.loader import DataLoader +import logging +from typing import List +from matdeeplearn.common.registry import registry + + +logging.basicConfig(level=logging.INFO) + + +class MDLCalculator(Calculator): + implemented_properties = ["energy", "forces", "stress"] + + def __init__(self, config): + """ + Initialize the MDLCalculator instance. + + Args: + config (str or dict): Configuration settings for the MDLCalculator. + + Raises: + AssertionError: If the trainer name is not in the correct format or if the trainer class is not found. + """ + Calculator.__init__(self) + if isinstance(config, str): + with open(config, "r") as yaml_file: + config = yaml.safe_load(yaml_file) + + gradient = config["model"].get("gradient", False) + otf_edge_index = config["model"].get("otf_edge_index", False) + otf_edge_attr = config["model"].get("otf_edge_attr", False) + self.otf_node_attr = config["model"].get("otf_node_attr", False) + assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True." + + trainer_name = config.get("trainer", "matdeeplearn.trainers.PropertyTrainer") + assert trainer_name.count(".") >= 1, "Trainer name should be in format {module}.{trainer_name}, like matdeeplearn.trainers.PropertyTrainer" + + trainer_cls = registry.get_trainer_class(trainer_name) + load_state = config['task'].get('checkpoint_path', None) + assert trainer_cls is not None, "Trainer not found" + self.trainer = trainer_cls.from_config(config) + + try: + self.trainer.load_checkpoint() + except ValueError: + logging.warning("No checkpoint.pt file is found, and an untrained model is used for prediction.") + + self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250) + self.device = 'cpu' + + def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None): + """ + Calculate energy, forces, and stress for a given ase.Atoms object. + + Args: + atoms (ase.Atoms): The atomic structure for which calculations are to be performed. + properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress']. + system_changes: Not supported in the current implementation. + + Returns: + None: The results are stored in the instance variable 'self.results'. + + Note: + This method performs energy, forces, and stress calculations using a neural network-based calculator. + The results are stored in the instance variable 'self.results' as 'energy', 'forces', and 'stress'. + """ + Calculator.calculate(self, atoms, properties, system_changes) + + cell = torch.tensor(atoms.cell.array, dtype=torch.float32) + pos = torch.tensor(atoms.positions, dtype=torch.float32) + atomic_numbers = torch.LongTensor(atoms.get_atomic_numbers()) + + data = Data(n_atoms=len(atomic_numbers), pos=pos, cell=cell.unsqueeze(dim=0), + z=atomic_numbers, structure_id=atoms.info.get('structure_id', None)) + + # Generate node features + if not self.otf_node_attr: + generate_node_features(data, self.n_neighbors, device=self.device) + data.x = data.x.to(torch.float32) + + data_list = [data] + loader = DataLoader(data_list, batch_size=1) + + out = self.trainer.predict_by_calculator(loader) + self.results['energy'] = out['energy'] + self.results['forces'] = out['forces'] + self.results['stress'] = out['stress'] + + @staticmethod + def data_to_atoms_list(data: Data) -> List[Atoms]: + """ + This helper method takes a 'torch_geometric.data.Data' object containing information about atomic structures + and converts it into a list of 'ase.Atoms' objects. Each 'Atoms' object represents an atomic structure + with its associated properties such as positions and cell. + + Args: + data (Data): A data object containing information about atomic structures. + + Returns: + List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure + with positions and associated properties. + """ + cells = data.cell.numpy() + + split_indices = np.cumsum(data.n_atoms)[:-1] + positions_per_structure = np.split(data.pos.numpy(), split_indices) + symbols_per_structure = np.split(data.z.numpy(), split_indices) + + atoms_list = [Atoms( + symbols=symbols_per_structure[i], + positions=positions_per_structure[i], + cell=Cell(cells[i])) for i in range(len(data.structure_id))] + for i in range(len(data.structure_id)): + atoms_list[i].structure_id = data.structure_id[i][0] + return atoms_list From b377bafb7d06e8b27dc8818278fb6295581906b7 Mon Sep 17 00:00:00 2001 From: qzheng75 Date: Wed, 20 Sep 2023 17:17:03 -0400 Subject: [PATCH 2/2] Add prediction module in trainer --- matdeeplearn/trainers/property_trainer.py | 26 ++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/matdeeplearn/trainers/property_trainer.py b/matdeeplearn/trainers/property_trainer.py index b68fac25..3fd71839 100644 --- a/matdeeplearn/trainers/property_trainer.py +++ b/matdeeplearn/trainers/property_trainer.py @@ -334,7 +334,31 @@ def predict(self, loader, split, results_dir="train_results", write_output=True, torch.cuda.empty_cache() - return predictions + return predictions + + def predict_by_calculator(self, loader): + self.model.eval() + + assert isinstance(loader, torch.utils.data.dataloader.DataLoader) + assert len(loader) == 1, f"Predicting by calculator only allows one structure at a time, but got {len(loader)} structures." + + if str(self.rank) not in ("cpu", "cuda"): + loader = get_dataloader( + loader.dataset, batch_size=loader.batch_size, sampler=None + ) + + results = [] + loader_iter = iter(loader) + for i in range(0, len(loader_iter)): + batch = next(loader_iter).to(self.rank) + out = self._forward(batch.to(self.rank)) + + energy = None if out.get('output') is None else out.get('output').data.cpu().numpy() + stress = None if out.get('cell_grad') is None else out.get('cell_grad').view(-1, 3).data.cpu().numpy() + forces = None if out.get('pos_grad') is None else out.get('pos_grad').data.cpu().numpy() + + results = {'energy': energy, 'stress': stress, 'forces': forces} + return results def _forward(self, batch_data): output = self.model(batch_data)