From 7124294d29938064415fa63db2b0323451b460af Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Mon, 22 Jan 2024 12:46:31 -0500 Subject: [PATCH 01/10] Allow None for dataset and data_loader --- matdeeplearn/trainers/base_trainer.py | 31 ++++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 318d31b1..9f7ea8b0 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -30,11 +30,11 @@ class BaseTrainer(ABC): def __init__( self, model: BaseModel, - dataset: Dataset, + dataset: Dataset | None, optimizer: Optimizer, sampler: DistributedSampler, scheduler: LRScheduler, - data_loader: DataLoader, + data_loader: DataLoader | None, loss: nn.Module, max_epochs: int, clip_grad_norm: float = None, @@ -101,17 +101,18 @@ def __init__( logging.info( f"GPU is available: {torch.cuda.is_available()}, Quantity: {os.environ.get('LOCAL_WORLD_SIZE', None)}" ) - logging.info("Dataset(s) used:") - for key in self.dataset: - logging.info(f"Dataset length: {key, len(self.dataset[key])}") - if self.dataset.get("train"): - logging.debug(self.dataset["train"][0]) - logging.debug(self.dataset["train"][0].z[0]) - logging.debug(self.dataset["train"][0].y[0]) - else: - logging.debug(self.dataset[list(self.dataset.keys())[0]][0]) - logging.debug(self.dataset[list(self.dataset.keys())[0]][0].x[0]) - logging.debug(self.dataset[list(self.dataset.keys())[0]][0].y[0]) + if !(self.dataset is None): + logging.info("Dataset(s) used:") + for key in self.dataset: + logging.info(f"Dataset length: {key, len(self.dataset[key])}") + if self.dataset.get("train"): + logging.debug(self.dataset["train"][0]) + logging.debug(self.dataset["train"][0].z[0]) + logging.debug(self.dataset["train"][0].y[0]) + else: + logging.debug(self.dataset[list(self.dataset.keys())[0]][0]) + logging.debug(self.dataset[list(self.dataset.keys())[0]][0].x[0]) + logging.debug(self.dataset[list(self.dataset.keys())[0]][0].y[0]) if str(self.rank) not in ("cpu", "cuda"): logging.debug(self.model[0].module) @@ -144,7 +145,7 @@ def from_config(cls, config): else: rank = torch.device("cuda" if torch.cuda.is_available() else "cpu") local_world_size = 1 - dataset = cls._load_dataset(config["dataset"], config["task"]["run_mode"]) + dataset = cls._load_dataset(config["dataset"], config["task"]["run_mode"]) if hasattr(config["dataset"], "src") else None model = cls._load_model(config["model"], config["dataset"]["preprocess_params"], dataset, local_world_size, rank) optimizer = cls._load_optimizer(config["optim"], model, local_world_size) sampler = cls._load_sampler(config["optim"], dataset, local_world_size, rank) @@ -155,7 +156,7 @@ def from_config(cls, config): sampler, config["task"]["run_mode"], config["model"] - ) + ) if hasattr(config["dataset"], "src") else None scheduler = cls._load_scheduler(config["optim"]["scheduler"], optimizer) loss = cls._load_loss(config["optim"]["loss"]) From 4266f2c7cd2e7b004347c8c8b903857c229724d0 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Mon, 22 Jan 2024 13:17:18 -0500 Subject: [PATCH 02/10] Allow _load_model to have dataset = None --- matdeeplearn/trainers/base_trainer.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 9f7ea8b0..6bbca077 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -271,10 +271,11 @@ def _load_dataset(dataset_config, task): def _load_model(model_config, graph_config, dataset, world_size, rank): """Loads the model if from a config file.""" - if dataset.get("train"): - dataset = dataset["train"] - else: - dataset = dataset[list(dataset.keys())[0]] + if !(dataset is None): + if dataset.get("train"): + dataset = dataset["train"] + else: + dataset = dataset[list(dataset.keys())[0]] if isinstance(dataset, torch.utils.data.Subset): dataset = dataset.dataset @@ -296,13 +297,17 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): else: node_dim = dataset.num_features edge_dim = graph_config["edge_dim"] - if dataset[0]["y"].ndim == 0: + if graph_config["output_dim"]: + output_dim = graph_config["output_dim"] + elif dataset[0]["y"].ndim == 0: output_dim = 1 else: output_dim = dataset[0]["y"].shape[1] # Determine if this is a node or graph level model - if dataset[0]["y"].shape[0] == dataset[0]["z"].shape[0]: + if graph_config["prediction_level"]: + model_config["prediction_level"] = graph_config["prediction_level"] + elif dataset[0]["y"].shape[0] == dataset[0]["z"].shape[0]: model_config["prediction_level"] = "node" elif dataset[0]["y"].shape[0] == 1: model_config["prediction_level"] = "graph" From 1b8606075620b9fb9336a80e484451b997ecb653 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Mon, 22 Jan 2024 13:21:16 -0500 Subject: [PATCH 03/10] Allow sampler to be none too --- matdeeplearn/trainers/base_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 6bbca077..f437ed31 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -32,7 +32,7 @@ def __init__( model: BaseModel, dataset: Dataset | None, optimizer: Optimizer, - sampler: DistributedSampler, + sampler: DistributedSampler | None, scheduler: LRScheduler, data_loader: DataLoader | None, loss: nn.Module, @@ -148,7 +148,7 @@ def from_config(cls, config): dataset = cls._load_dataset(config["dataset"], config["task"]["run_mode"]) if hasattr(config["dataset"], "src") else None model = cls._load_model(config["model"], config["dataset"]["preprocess_params"], dataset, local_world_size, rank) optimizer = cls._load_optimizer(config["optim"], model, local_world_size) - sampler = cls._load_sampler(config["optim"], dataset, local_world_size, rank) + sampler = cls._load_sampler(config["optim"], dataset, local_world_size, rank) if hasattr(config["dataset"], "src") else None data_loader = cls._load_dataloader( config["optim"], config["dataset"], From 2bd2281eb1845b6876c8052fe5420e2306b80383 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:31:19 -0500 Subject: [PATCH 04/10] Fix conditional --- matdeeplearn/trainers/base_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index f437ed31..4f4ce724 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -271,7 +271,7 @@ def _load_dataset(dataset_config, task): def _load_model(model_config, graph_config, dataset, world_size, rank): """Loads the model if from a config file.""" - if !(dataset is None): + if not (dataset is None): if dataset.get("train"): dataset = dataset["train"] else: From 988c1c4d60346a3b89948f33ff0d2a42f6e2491f Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:34:35 -0500 Subject: [PATCH 05/10] Fix another conditional --- matdeeplearn/trainers/base_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 4f4ce724..3fff7dc7 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -101,7 +101,7 @@ def __init__( logging.info( f"GPU is available: {torch.cuda.is_available()}, Quantity: {os.environ.get('LOCAL_WORLD_SIZE', None)}" ) - if !(self.dataset is None): + if not (self.dataset is None): logging.info("Dataset(s) used:") for key in self.dataset: logging.info(f"Dataset length: {key, len(self.dataset[key])}") From 22b3580dd5e19133bde30cad48a08ba4140f2547 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:37:20 -0500 Subject: [PATCH 06/10] '| None' appears to be unnecessary and breaking --- matdeeplearn/trainers/base_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 3fff7dc7..880cef6d 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -30,11 +30,11 @@ class BaseTrainer(ABC): def __init__( self, model: BaseModel, - dataset: Dataset | None, + dataset: Dataset, optimizer: Optimizer, - sampler: DistributedSampler | None, + sampler: DistributedSampler, scheduler: LRScheduler, - data_loader: DataLoader | None, + data_loader: DataLoader, loss: nn.Module, max_epochs: int, clip_grad_norm: float = None, From 8c5e08146439951fc045fb3cfac19059611ed853 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:44:43 -0500 Subject: [PATCH 07/10] Use dataset unless it is None --- matdeeplearn/trainers/base_trainer.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 880cef6d..6d63ce4b 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -292,17 +292,20 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - if graph_config["node_dim"]: - node_dim = graph_config["node_dim"] + if not (dataset is None):: + node_dim = dataset.num_features else: - node_dim = dataset.num_features - edge_dim = graph_config["edge_dim"] + node_dim = graph_config["node_dim"] + edge_dim = graph_config["edge_dim"] if graph_config["output_dim"]: output_dim = graph_config["output_dim"] - elif dataset[0]["y"].ndim == 0: - output_dim = 1 + if not (dataset is None): + if dataset[0]["y"].ndim == 0: + output_dim = 1 + else: + output_dim = dataset[0]["y"].shape[1] else: - output_dim = dataset[0]["y"].shape[1] + output_dim = graph_config["output_dim"] # Determine if this is a node or graph level model if graph_config["prediction_level"]: From f5faffdf8195f9664dc71ec84e547464c8937ba8 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:46:11 -0500 Subject: [PATCH 08/10] Fix typo --- matdeeplearn/trainers/base_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 6d63ce4b..6d1d42c8 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -292,7 +292,7 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - if not (dataset is None):: + if not (dataset is None): node_dim = dataset.num_features else: node_dim = graph_config["node_dim"] From 24d69703d7762c5ee6eb9c3ccd8bdcdfea61fac8 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 15:49:13 -0500 Subject: [PATCH 09/10] Another priority of dataset over config --- matdeeplearn/trainers/base_trainer.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 6d1d42c8..89602f95 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -297,8 +297,6 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): else: node_dim = graph_config["node_dim"] edge_dim = graph_config["edge_dim"] - if graph_config["output_dim"]: - output_dim = graph_config["output_dim"] if not (dataset is None): if dataset[0]["y"].ndim == 0: output_dim = 1 @@ -308,16 +306,17 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): output_dim = graph_config["output_dim"] # Determine if this is a node or graph level model - if graph_config["prediction_level"]: - model_config["prediction_level"] = graph_config["prediction_level"] - elif dataset[0]["y"].shape[0] == dataset[0]["z"].shape[0]: - model_config["prediction_level"] = "node" - elif dataset[0]["y"].shape[0] == 1: - model_config["prediction_level"] = "graph" + if not (dataset is None): + if dataset[0]["y"].shape[0] == dataset[0]["z"].shape[0]: + model_config["prediction_level"] = "node" + elif dataset[0]["y"].shape[0] == 1: + model_config["prediction_level"] = "graph" + else: + raise ValueError( + "Target labels do not have the correct dimensions for node or graph-level prediction." + ) else: - raise ValueError( - "Target labels do not have the correct dimensions for node or graph-level prediction." - ) + model_config["prediction_level"] = graph_config["prediction_level"] model_cls = registry.get_model_class(model_config["name"]) model = model_cls( From e992766c78a4674ca70f323802542c1f0ada4fd7 Mon Sep 17 00:00:00 2001 From: Ian Slagle Date: Wed, 24 Jan 2024 21:11:53 -0500 Subject: [PATCH 10/10] Keep original node_dim determination --- matdeeplearn/trainers/base_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/matdeeplearn/trainers/base_trainer.py b/matdeeplearn/trainers/base_trainer.py index 89602f95..ba837e46 100644 --- a/matdeeplearn/trainers/base_trainer.py +++ b/matdeeplearn/trainers/base_trainer.py @@ -292,10 +292,10 @@ def _load_model(model_config, graph_config, dataset, world_size, rank): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - if not (dataset is None): - node_dim = dataset.num_features - else: + if graph_config["node_dim"]: node_dim = graph_config["node_dim"] + else: + node_dim = dataset.num_features edge_dim = graph_config["edge_dim"] if not (dataset is None): if dataset[0]["y"].ndim == 0: