diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml index 64ba8b7e..0002543b 100644 --- a/configs/config_calculator.yml +++ b/configs/config_calculator.yml @@ -1,26 +1,9 @@ -trainer: matdeeplearn.trainers.PropertyTrainer - task: - run_mode: train - identifier: my_train_job - parallel: False - # If seed is not set, then it will be random every time - seed: 12345678 - # Defaults to run directory if not specified - save_dir: - # continue from a previous job - continue_job: False - # spefcify if the training state is loaded: epochs, learning rate, etc - load_training_state: False # Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file. - checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt - # E.g. ["train", "val", "test"] - write_output: [train, val, test] - # Specify if labels are provided for the predict task - # labels: True - use_amp: True + checkpoint_path: ./checkpoints/cgcnn_checkpoint.pt model: + # Model used by the calculator name: CGCNN # model attributes dim1: 100 @@ -39,62 +22,12 @@ model: # Compute edge attributes on the fly in the model forward otf_edge_attr: True # Compute node attributes on the fly in the model forward - otf_node_attr: True + otf_node_attr: False + model_ensemble: 1 # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True gradient: True -optim: - max_epochs: 40 - max_checkpoint_epochs: 0 - lr: 0.002 - # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper - loss: - loss_type: TorchLossWrapper - loss_args: {loss_fn: l1_loss} - # gradient clipping value - clip_grad_norm: 10 - batch_size: 100 - optimizer: - optimizer_type: AdamW - optimizer_args: {} - scheduler: - scheduler_type: ReduceLROnPlateau - scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002} - #Training print out frequency (print per n number of epochs) - verbosity: 5 - # tdqm progress bar per batch in the epoch - batch_tqdm: False - -dataset: - name: test_data - # Whether the data has already been processed and a data.pt file is present from a previous run - processed: False - # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path} - src: data/force_data/data.json - # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file - # Example: target_path: "data/raw_graph_scalar/targets.csv" - target_path: - # Path to save processed data.pt file - pt_path: data/force_data/ - # Either "node" or "graph" level - prediction_level: graph - - transforms: - - name: GetY - args: - # index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset - # For example, an index: 0 (default) will use the first entry in the target vector - # if all values are to be predicted simultaneously, then specify index: -1 - index: -1 - otf_transform: True # Optional parameter, default is True - # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html) - data_format: json - # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress] - additional_attributes: - # Print out processing info - verbose: True - # Index of target column in targets.csv - # graph specific settings +dataset: preprocess_params: # one of mdl (minimum image convention), ocp (all neighbors included) edge_calc_method: ocp @@ -118,13 +51,4 @@ dataset: self_loop: True # Method of obtaining atom dictionary: available: (onehot) node_representation: onehot - all_neighbors: True - - # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html - num_workers: 0 - # Where the dataset is loaded; either "cpu" or "cuda" - dataset_device: cpu - # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data) - train_ratio: 0.9 - val_ratio: 0.05 - test_ratio: 0.05 + all_neighbors: True diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py index c25047c3..ecf2acb0 100644 --- a/matdeeplearn/common/ase_utils.py +++ b/matdeeplearn/common/ase_utils.py @@ -1,37 +1,50 @@ -import torch +from typing import List +import logging + import numpy as np +import torch import yaml from ase import Atoms from ase.geometry import Cell from ase.calculators.calculator import Calculator -from matdeeplearn.preprocessor.helpers import generate_node_features from torch_geometric.data.data import Data from torch_geometric.loader import DataLoader -import logging -from typing import List + from matdeeplearn.common.registry import registry +from matdeeplearn.models.base_model import BaseModel +from matdeeplearn.preprocessor.helpers import generate_node_features logging.basicConfig(level=logging.INFO) class MDLCalculator(Calculator): + """ + A neural networked based Calculator that calculates the energy, forces and stress of a crystal structure. + """ implemented_properties = ["energy", "forces", "stress"] - def __init__(self, config): + def __init__(self, config, rank='cuda:0'): """ Initialize the MDLCalculator instance. Args: - config (str or dict): Configuration settings for the MDLCalculator. + - config (str or dict): Configuration settings for the MDLCalculator. + - rank (str): Rank of device the calculator calculates properties. Defaults to 'cuda:0' Raises: - AssertionError: If the trainer name is not in the correct format or if the trainer class is not found. + - AssertionError: If the trainer name is not in the correct format or if the trainer class is not found. """ Calculator.__init__(self) + if isinstance(config, str): + logging.info(f'MDLCalculator instantiated from config: {config}') with open(config, "r") as yaml_file: config = yaml.safe_load(yaml_file) + elif isinstance(config, dict): + logging.info('MDLCalculator instantiated from a dictionary.') + else: + raise NotImplementedError('Unsupported config type.') gradient = config["model"].get("gradient", False) otf_edge_index = config["model"].get("otf_edge_index", False) @@ -39,36 +52,24 @@ def __init__(self, config): self.otf_node_attr = config["model"].get("otf_node_attr", False) assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True." - trainer_name = config.get("trainer", "matdeeplearn.trainers.PropertyTrainer") - assert trainer_name.count(".") >= 1, "Trainer name should be in format {module}.{trainer_name}, like matdeeplearn.trainers.PropertyTrainer" - - trainer_cls = registry.get_trainer_class(trainer_name) - load_state = config['task'].get('checkpoint_path', None) - assert trainer_cls is not None, "Trainer not found" - self.trainer = trainer_cls.from_config(config) - - try: - self.trainer.load_checkpoint() - except ValueError: - logging.warning("No checkpoint.pt file is found, and an untrained model is used for prediction.") - + self.device = rank if torch.cuda.is_available() else 'cpu' + self.models = MDLCalculator._load_model(config, self.device) self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250) - self.device = 'cpu' - def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None): + def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None) -> None: """ Calculate energy, forces, and stress for a given ase.Atoms object. Args: - atoms (ase.Atoms): The atomic structure for which calculations are to be performed. - properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress']. - system_changes: Not supported in the current implementation. + - atoms (ase.Atoms): The atomic structure for which calculations are to be performed. + - properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress']. + - system_changes: Not supported in the current implementation. Returns: - None: The results are stored in the instance variable 'self.results'. + - None: The results are stored in the instance variable 'self.results'. Note: - This method performs energy, forces, and stress calculations using a neural network-based calculator. + - This method performs energy, forces, and stress calculations using a neural network-based calculator. The results are stored in the instance variable 'self.results' as 'energy', 'forces', and 'stress'. """ Calculator.calculate(self, atoms, properties, system_changes) @@ -87,11 +88,20 @@ def calculate(self, atoms: Atoms, properties=implemented_properties, system_chan data_list = [data] loader = DataLoader(data_list, batch_size=1) + loader_iter = iter(loader) + batch = next(loader_iter).to(self.device) + + out_list = [] + for model in self.models: + out_list.append(model(batch)) - out = self.trainer.predict_by_calculator(loader) - self.results['energy'] = out['energy'] - self.results['forces'] = out['forces'] - self.results['stress'] = out['stress'] + energy = torch.stack([entry["output"] for entry in out_list]).mean(dim=0) + forces = torch.stack([entry["pos_grad"] for entry in out_list]).mean(dim=0) + stresses = torch.stack([entry["cell_grad"] for entry in out_list]).mean(dim=0) + + self.results['energy'] = energy.detach().cpu().numpy() + self.results['forces'] = forces.detach().cpu().numpy() + self.results['stress'] = stresses.squeeze().detach().cpu().numpy() @staticmethod def data_to_atoms_list(data: Data) -> List[Atoms]: @@ -101,11 +111,11 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: with its associated properties such as positions and cell. Args: - data (Data): A data object containing information about atomic structures. + - data (Data): A data object containing information about atomic structures. Returns: - List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure - with positions and associated properties. + - List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure + with positions and associated properties. """ cells = data.cell.numpy() @@ -120,3 +130,55 @@ def data_to_atoms_list(data: Data) -> List[Atoms]: for i in range(len(data.structure_id)): atoms_list[i].structure_id = data.structure_id[i][0] return atoms_list + + @staticmethod + def _load_model(config: dict, rank: str) -> List[BaseModel]: + """ + This static method loads a model based on the provided configuration. + + Parameters: + - config (dict): Configuration dictionary containing model and dataset parameters. + - rank: Rank information for distributed training. + + Returns: + - model_list: A list of loaded models. + """ + + graph_config = config['dataset']['preprocess_params'] + model_config = config['model'] + + model_list = [] + model_name = 'matdeeplearn.models.' + model_config["name"] + logging.info(f'MDLCalculator: setting up {model_name} for calculation') + # Obtain node, edge, and output dimensions for model initialization + for _ in range(model_config["model_ensemble"]): + node_dim = graph_config["node_dim"] + edge_dim = graph_config["edge_dim"] + + model_cls = registry.get_model_class(model_name) + model = model_cls( + node_dim=node_dim, + edge_dim=edge_dim, + output_dim=1, + cutoff_radius=graph_config["cutoff_radius"], + n_neighbors=graph_config["n_neighbors"], + graph_method=graph_config["edge_calc_method"], + num_offsets=graph_config["num_offsets"], + **model_config + ) + model = model.to(rank) + model_list.append(model) + + checkpoints = config['task']["checkpoint_path"].split(',') + if len(checkpoints) == 0: + logging.warning("MDLCalculator: No checkpoint.pt file is found, and untrained models are used for prediction.") + else: + for i in range(len(checkpoints)): + try: + checkpoint = torch.load(checkpoints[i]) + model_list[i].load_state_dict(checkpoint["state_dict"]) + logging.info(f'MDLCalculator: weights for model No.{i+1} loaded from {checkpoints[i]}') + except ValueError: + logging.warning(f"MDLCalculator: No checkpoint.pt file is found for model No.{i+1}, and an untrained model is used for prediction.") + + return model_list \ No newline at end of file