From 019a6d8cf42decab584743048d6519a3bb778226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 3 Apr 2024 22:49:02 +0200 Subject: [PATCH 01/63] perf: Add special case to `Table.add_rows` to increase performance --- src/safeds/data/tabular/containers/_table.py | 2 ++ .../safeds/data/tabular/containers/_table/test_add_rows.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 7fdb3a6b4..6e991655e 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1069,6 +1069,8 @@ def add_rows(self, rows: list[Row] | Table) -> Table: key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, ), ) + if self.number_of_rows == 0: + return rows new_df = pd.concat([self._data, rows._data]).infer_objects() new_df.columns = self.column_names diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index 8cbd001c5..9ed6fe116 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -63,8 +63,13 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None: Table({"col1": [5, "7"], "col2": [6, None]}), Table({"col1": [1, 2, 1, 5, "7"], "col2": [1, 2, 4, 6, None]}), ), + ( + Table({"col1": [], "yikes": []}), + Table({"col1": [2], "yikes": [5]}), + Table({"col1": [2], "yikes": [5]}), + ), ], - ids=["Rows from table", "add empty to table", "add on empty table", "rowless", "different schema"], + ids=["Rows from table", "add empty to table", "add on empty table", "rowless", "different schema", "same schema no rows"], ) def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Table) -> None: table1 = table1.add_rows(table2) From 5471a03df163874f665bee3555bfa5614d1f1787 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 3 Apr 2024 20:51:33 +0000 Subject: [PATCH 02/63] style: apply automated linter fixes --- .../data/tabular/containers/_table/test_add_rows.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index 9ed6fe116..d6291569d 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -25,7 +25,7 @@ Table({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}), [], Table({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}), - ) + ), ], ids=["Rows with string and integer values", "different schema", "empty", "add empty"], ) @@ -69,7 +69,14 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None: Table({"col1": [2], "yikes": [5]}), ), ], - ids=["Rows from table", "add empty to table", "add on empty table", "rowless", "different schema", "same schema no rows"], + ids=[ + "Rows from table", + "add empty to table", + "add on empty table", + "rowless", + "different schema", + "same schema no rows", + ], ) def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Table) -> None: table1 = table1.add_rows(table2) From 0802e0e80a594018ae9c8d7e1ffc239c2837e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 3 Apr 2024 22:54:47 +0200 Subject: [PATCH 03/63] perf: change number_of_rows to number_of_columns in `add_rows` as 0 columns => 0 rows --- src/safeds/data/tabular/containers/_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 6e991655e..a0b924fc7 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1057,7 +1057,7 @@ def add_rows(self, rows: list[Row] | Table) -> Table: 2 5 6 """ if isinstance(rows, Table): - if rows.number_of_rows == 0: + if rows.number_of_columns == 0: return self if self.number_of_columns == 0: return rows From a177d9718f0a96b6a2010e51074513cf38c2dbdd Mon Sep 17 00:00:00 2001 From: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Date: Wed, 3 Apr 2024 23:42:04 +0200 Subject: [PATCH 04/63] perf: special case if rows has columns but no rows Co-authored-by: WinPlay02 --- src/safeds/data/tabular/containers/_table.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index a0b924fc7..53281af15 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1069,6 +1069,8 @@ def add_rows(self, rows: list[Row] | Table) -> Table: key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, ), ) + if rows.number_of_rows == 0: + return self if self.number_of_rows == 0: return rows From 8450dc6cc7ee2b124ce5655b16a52f25012119ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 3 Apr 2024 23:45:05 +0200 Subject: [PATCH 05/63] test: Added test for `Table.add_rows` for "same schema add no rows" --- .../data/tabular/containers/_table/test_add_rows.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index d6291569d..de6cdd71b 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -68,6 +68,11 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None: Table({"col1": [2], "yikes": [5]}), Table({"col1": [2], "yikes": [5]}), ), + ( + Table({"col1": [2], "yikes": [5]}), + Table({"col1": [], "yikes": []}), + Table({"col1": [2], "yikes": [5]}), + ), ], ids=[ "Rows from table", @@ -75,7 +80,8 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None: "add on empty table", "rowless", "different schema", - "same schema no rows", + "same schema add to no rows", + "same schema add no rows", ], ) def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Table) -> None: From bae8e4d6a25121bd3a7fa0c43d3d28116a38581c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 4 Apr 2024 14:03:52 +0200 Subject: [PATCH 06/63] perf: suggested performance upgrades for nn._fnn_layer and nn._model --- src/safeds/ml/nn/_fnn_layer.py | 5 +- src/safeds/ml/nn/_model.py | 85 ++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 41 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index a74df8ff3..9ba1a887b 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -1,4 +1,4 @@ -from torch import nn +from torch import nn, Tensor from safeds.exceptions import ClosedBound, OutOfBoundsError @@ -17,7 +17,8 @@ def __init__(self, input_size: int, output_size: int, activation_function: str): case _: raise ValueError("Unknown Activation Function: " + activation_function) - def forward(self, x: float) -> float: + # def forward(self, x: float) -> float: + def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2eaece27f..a805ec87e 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -67,7 +67,7 @@ def fit( loss_sum = 0.0 number_of_batches_done = 0 for epoch in range(epoch_size): - for x, y in dataloader: + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) @@ -116,8 +116,9 @@ def predict(self, test_data: Table) -> TaggedTable: with torch.no_grad(): for x in dataloader: elem = self._model(x) - for item in range(len(elem)): - predictions.append(elem[item].item()) + predictions += elem.squeeze(dim=1).tolist() + # for item in range(len(elem)): + # predictions.append(elem[item].item()) return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property @@ -194,27 +195,28 @@ def fit( loss_sum = 0.0 number_of_batches_done = 0 for epoch in range(epoch_size): - for x, y in dataloader: + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) - if self._is_multi_class: - pred_size = Tensor.size(pred, dim=1) - predictions_for_all_items_of_batch = [] - for value in range(len(y)): - list_of_probabilities_for_each_category = [] - class_index = y[value].item() - for index in range(pred_size): - if index is int(class_index): - list_of_probabilities_for_each_category.append(1.0) - else: - list_of_probabilities_for_each_category.append(0.0) - predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) - - y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) - - loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) - else: - loss = loss_fn(pred, y) + # if self._is_multi_class: + # pred_size = Tensor.size(pred, dim=1) + # predictions_for_all_items_of_batch = [] + # for value in range(len(y)): + # list_of_probabilities_for_each_category = [] + # class_index = y[value].item() + # for index in range(pred_size): + # if index is int(class_index): + # list_of_probabilities_for_each_category.append(1.0) + # else: + # list_of_probabilities_for_each_category.append(0.0) + # predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) + # + # y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) + # + # loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) + # else: + # loss = loss_fn(pred, y) + loss = loss_fn(pred, y) loss_sum += loss.item() loss.backward() optimizer.step() @@ -258,22 +260,26 @@ def predict(self, test_data: Table) -> TaggedTable: with torch.no_grad(): for x in dataloader: elem = self._model(x) - for item in range(len(elem)): - if not self._is_multi_class: - if elem[item].item() < 0.5: - predicted_class = 0 # pragma: no cover - else: # pragma: no cover - predicted_class = 1 # pragma: no cover - predictions.append(predicted_class) - else: - values = elem[item].tolist() - highest_value = 0 - category_of_highest_value = 0 - for index in range(len(values)): - if values[index] > highest_value: - highest_value = values[index] - category_of_highest_value = index - predictions.append(category_of_highest_value) + if self._is_multi_class: + predictions += torch.argmax(elem, dim=1).tolist() + else: + predictions += elem.round().tolist() + # for item in range(len(elem)): + # if not self._is_multi_class: + # if elem[item].item() < 0.5: + # predicted_class = 0 # pragma: no cover + # else: # pragma: no cover + # predicted_class = 1 # pragma: no cover + # predictions.append(predicted_class) + # else: + # values = elem[item].tolist() + # highest_value = 0 + # category_of_highest_value = 0 + # for index in range(len(values)): + # if values[index] > highest_value: + # highest_value = values[index] + # category_of_highest_value = index + # predictions.append(category_of_highest_value) return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property @@ -308,7 +314,8 @@ def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> N internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="softmax")) else: internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="sigmoid")) - self._pytorch_layers = nn.ModuleList(internal_layers) + self._pytorch_layers = nn.Sequential(*internal_layers) + # self._pytorch_layers = nn.ModuleList(internal_layers) def forward(self, x: float) -> float: for layer in self._pytorch_layers: From 4ae795e255cf92ff6ade508032a904eab4e514d9 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 16:19:29 +0200 Subject: [PATCH 07/63] make dataloader shuffle data each epoch --- src/safeds/data/tabular/containers/_table.py | 2 +- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 7fdb3a6b4..d1e2adbe4 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2413,7 +2413,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): # def _into_dataloader(self, batch_size: int) -> DataLoader: """ - Return a Dataloader for the data stored in this table, used for training neural networks. + Return a Dataloader for the data stored in this table, used for predicting with neural networks. The original table is not modified. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c5b72e591..e7159815a 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -901,7 +901,7 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: for column_name in row: new_item.append(row.get_value(column_name)) all_rows.append(new_item.copy()) - return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) + return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size, shuffle=True) class _CustomDataset(Dataset): From 350b771083195fe17f50ec177e120a61f36d0625 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 17:01:48 +0200 Subject: [PATCH 08/63] add learning_rate parameter to fit() function --- src/safeds/ml/nn/_model.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2eaece27f..64c31af87 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -21,6 +21,7 @@ def fit( train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, + learning_rate=0.05, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -63,11 +64,11 @@ def fit( loss_fn = nn.MSELoss() - optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) loss_sum = 0.0 number_of_batches_done = 0 for epoch in range(epoch_size): - for x, y in dataloader: + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) @@ -145,6 +146,7 @@ def fit( train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, + learning_rate = 0.05, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -190,11 +192,11 @@ def fit( else: loss_fn = nn.BCELoss() - optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) loss_sum = 0.0 number_of_batches_done = 0 for epoch in range(epoch_size): - for x, y in dataloader: + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) if self._is_multi_class: From 905f103ee3bd217c53ca4faba71cb8174d4c60b8 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 19:09:37 +0200 Subject: [PATCH 09/63] raise an Error if test data doesnt match format of train data --- src/safeds/exceptions/__init__.py | 2 ++ src/safeds/exceptions/_ml.py | 11 +++++++++++ src/safeds/ml/nn/_model.py | 20 +++++++++++++++++--- tests/safeds/ml/nn/test_model.py | 23 ++++++++++++++++++++++- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index ca193270b..583de04d6 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -30,6 +30,7 @@ ModelNotFittedError, PredictionError, UntaggedTableError, + TestTrainDataMismatchError, ) __all__ = [ @@ -57,6 +58,7 @@ "LearningError", "ModelNotFittedError", "PredictionError", + "TestTrainDataMismatchError", "UntaggedTableError", # Other "Bound", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 1cc83ebc0..96be6647e 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -68,6 +68,17 @@ def __init__(self, reason: str): super().__init__(f"Error occurred while predicting: {reason}") +class TestTrainDataMismatchError(Exception): + """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data.""" + + def __init__(self) -> None: + super().__init__( + ( + "The column names in the test table do not match with the feature columns names of the training data." + ), + ) + + class UntaggedTableError(Exception): """Raised when an untagged table is used instead of a TaggedTable in a regression or classification.""" diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 64c31af87..e2cf77f81 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -6,7 +6,7 @@ from torch import Tensor, nn from safeds.data.tabular.containers import Column, Table, TaggedTable -from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError +from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError from safeds.ml.nn._fnn_layer import FNNLayer @@ -15,13 +15,14 @@ def __init__(self, layers: list): self._model = _PytorchModel(layers, is_for_classification=False) self._batch_size = 1 self._is_fitted = False + self._feature_names = None def fit( self, train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, - learning_rate=0.05, + learning_rate: float = 0.001, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -38,6 +39,8 @@ def fit( The number of times the training cycle should be done. batch_size The size of data batches that should be loaded at one time. + learning_rate + The learning rate of the neural network. callback_on_batch_completion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. callback_on_epoch_completion @@ -58,7 +61,9 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + self._feature_names = train_data.features.column_names copied_model = copy.deepcopy(self) + copied_model._batch_size = batch_size dataloader = train_data._into_dataloader(copied_model._batch_size) @@ -112,6 +117,8 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError + if not sorted(test_data.column_names).__eq__(sorted(self._feature_names)): + raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] with torch.no_grad(): @@ -140,13 +147,14 @@ def __init__(self, layers: list[FNNLayer]): self._batch_size = 1 self._is_fitted = False self._is_multi_class = layers[-1].output_size > 1 + self._feature_names = None def fit( self, train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, - learning_rate = 0.05, + learning_rate: float = 0.001, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -163,6 +171,8 @@ def fit( The number of times the training cycle should be done. batch_size The size of data batches that should be loaded at one time. + learning_rate + The learning rate of the neural network. callback_on_batch_completion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. callback_on_epoch_completion @@ -183,7 +193,9 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + self._feature_names = train_data.features.column_names copied_model = copy.deepcopy(self) + copied_model._batch_size = batch_size dataloader = train_data._into_dataloader(copied_model._batch_size) @@ -255,6 +267,8 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError + if not sorted(test_data.column_names).__eq__(sorted(self._feature_names)): + raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] with torch.no_grad(): diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 939978aee..a96b6a659 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ModelNotFittedError, OutOfBoundsError +from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError from safeds.ml.nn import FNNLayer, NeuralNetworkClassifier, NeuralNetworkRegressor @@ -87,6 +87,17 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classificatio ) assert model.is_fitted + def test_should_raise_if__test_and_train_data_mismatch(self) -> None: + model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + ) + with pytest.raises(TestTrainDataMismatchError, match="The column names in the test table do not match with the feature columns names of the training data."): + model.predict( + Table.from_dict({"a": [1], "c": [2]}), + ) + + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) @@ -186,6 +197,16 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: ) assert model.is_fitted + def test_should_raise_if__test_and_train_data_mismatch(self) -> None: + model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + ) + with pytest.raises(TestTrainDataMismatchError, match="The column names in the test table do not match with the feature columns names of the training data."): + model.predict( + Table.from_dict({"a": [1], "c": [2]}), + ) + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) From 75932045783b78dddccd812996fe8b7df8b98690 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 19:30:35 +0200 Subject: [PATCH 10/63] add abstract layer class --- src/safeds/ml/nn/_fnn_layer.py | 14 +++++++++++++- src/safeds/ml/nn/_model.py | 18 +++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index a74df8ff3..6601af2a6 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -1,3 +1,5 @@ +from abc import ABC, abstractmethod + from torch import nn from safeds.exceptions import ClosedBound, OutOfBoundsError @@ -21,7 +23,17 @@ def forward(self, x: float) -> float: return self._fn(self._layer(x)) -class FNNLayer: +class Layer(ABC): + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def output_size(self) -> int: + pass + + +class FNNLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): """ Create a FNN Layer. diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index e2cf77f81..d8bd01c80 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -7,11 +7,11 @@ from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError -from safeds.ml.nn._fnn_layer import FNNLayer +from safeds.ml.nn._fnn_layer import FNNLayer, Layer class NeuralNetworkRegressor: - def __init__(self, layers: list): + def __init__(self, layers: list[Layer | FNNLayer]): self._model = _PytorchModel(layers, is_for_classification=False) self._batch_size = 1 self._is_fitted = False @@ -142,7 +142,7 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier: - def __init__(self, layers: list[FNNLayer]): + def __init__(self, layers: list[Layer | FNNLayer]): self._model = _PytorchModel(layers, is_for_classification=True) self._batch_size = 1 self._is_fitted = False @@ -306,13 +306,13 @@ def is_fitted(self) -> bool: class _PytorchModel(nn.Module): - def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> None: + def __init__(self, layers: list[Layer | FNNLayer], is_for_classification: bool) -> None: super().__init__() - self._layer_list = fnn_layers + self._layer_list = layers internal_layers = [] previous_output_size = None - for layer in fnn_layers: + for layer in layers: if previous_output_size is not None: layer._set_input_size(previous_output_size) internal_layers.append(layer._get_internal_layer(activation_function="relu")) @@ -320,10 +320,10 @@ def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> N if is_for_classification: internal_layers.pop() - if fnn_layers[-1].output_size > 2: - internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="softmax")) + if layers[-1].output_size > 2: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) else: - internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="sigmoid")) + internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) self._pytorch_layers = nn.ModuleList(internal_layers) def forward(self, x: float) -> float: From f5f291e44a4d72c3c6e562c9dcf96442fa4133ba Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 19:38:24 +0200 Subject: [PATCH 11/63] make forward return tensor instead of float and change method to build the model --- src/safeds/ml/nn/_fnn_layer.py | 4 ++-- src/safeds/ml/nn/_model.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index 6601af2a6..e931f6108 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from torch import nn +from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError @@ -19,7 +19,7 @@ def __init__(self, input_size: int, output_size: int, activation_function: str): case _: raise ValueError("Unknown Activation Function: " + activation_function) - def forward(self, x: float) -> float: + def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index d8bd01c80..c192aecc6 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -324,9 +324,9 @@ def __init__(self, layers: list[Layer | FNNLayer], is_for_classification: bool) internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) else: internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) - self._pytorch_layers = nn.ModuleList(internal_layers) + self._pytorch_layers = nn.Sequential(*internal_layers) - def forward(self, x: float) -> float: + def forward(self, x: Tensor) -> Tensor: for layer in self._pytorch_layers: x = layer(x) return x From 7284c89f4d94f446b2b1cf4f41abace11e017415 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 20:00:08 +0200 Subject: [PATCH 12/63] remove uncoverable lines from codecov --- src/safeds/ml/nn/_fnn_layer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index e931f6108..0604466ee 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -25,13 +25,12 @@ def forward(self, x: Tensor) -> Tensor: class Layer(ABC): @abstractmethod - def __init__(self): - pass + def __init__(self) -> None: + pass # pragma: no cover @abstractmethod def output_size(self) -> int: - pass - + pass # pragma: no cover class FNNLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): From 0572cb4c6d439f12e635e3fd23cfc5a9ed724d3a Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 20:07:29 +0200 Subject: [PATCH 13/63] small change --- src/safeds/ml/nn/_fnn_layer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index 0604466ee..cc0a36c45 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -28,10 +28,6 @@ class Layer(ABC): def __init__(self) -> None: pass # pragma: no cover - @abstractmethod - def output_size(self) -> int: - pass # pragma: no cover - class FNNLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): """ From 0eefee7b56b424f4179b0f4158cb9046294b986c Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 9 Apr 2024 20:11:00 +0200 Subject: [PATCH 14/63] small change --- src/safeds/ml/nn/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index c192aecc6..afea1f52c 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -15,7 +15,7 @@ def __init__(self, layers: list[Layer | FNNLayer]): self._model = _PytorchModel(layers, is_for_classification=False) self._batch_size = 1 self._is_fitted = False - self._feature_names = None + self._feature_names: None | list[str] = None def fit( self, @@ -117,7 +117,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not sorted(test_data.column_names).__eq__(sorted(self._feature_names)): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] From f5bdf22cf1d7c8dbb812709523d457c4a6e58fd9 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:10:19 +0200 Subject: [PATCH 15/63] add abstract functions --- src/safeds/ml/nn/_fnn_layer.py | 15 ++++++++++++++- src/safeds/ml/nn/_model.py | 10 +++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index cc0a36c45..13ef556f6 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -26,7 +26,20 @@ def forward(self, x: Tensor) -> Tensor: class Layer(ABC): @abstractmethod def __init__(self) -> None: - pass # pragma: no cover + pass # pragma: no cover + + @abstractmethod + def _get_internal_layer(self, activation_function: str) -> _InternalLayer: + pass # pragma: no cover + + @abstractmethod + @property + def output_size(self) -> int: + pass # pragma: no cover + + @abstractmethod + def _set_input_size(self, input_size: int) -> None: + pass # pragma: no cover class FNNLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index afea1f52c..3fcb36c01 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -7,11 +7,11 @@ from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError -from safeds.ml.nn._fnn_layer import FNNLayer, Layer +from safeds.ml.nn._fnn_layer import Layer class NeuralNetworkRegressor: - def __init__(self, layers: list[Layer | FNNLayer]): + def __init__(self, layers: list[Layer]): self._model = _PytorchModel(layers, is_for_classification=False) self._batch_size = 1 self._is_fitted = False @@ -142,7 +142,7 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier: - def __init__(self, layers: list[Layer | FNNLayer]): + def __init__(self, layers: list[Layer]): self._model = _PytorchModel(layers, is_for_classification=True) self._batch_size = 1 self._is_fitted = False @@ -267,7 +267,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not sorted(test_data.column_names).__eq__(sorted(self._feature_names)): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -306,7 +306,7 @@ def is_fitted(self) -> bool: class _PytorchModel(nn.Module): - def __init__(self, layers: list[Layer | FNNLayer], is_for_classification: bool) -> None: + def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: super().__init__() self._layer_list = layers internal_layers = [] From e3543fba1d0fc54ccb77ca87f6b13dc12afb11bd Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:17:22 +0200 Subject: [PATCH 16/63] change for linter --- src/safeds/ml/nn/_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 3fcb36c01..2645f2b0a 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -117,7 +117,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] From 41bdd6a4b2e75e231631710b0f66e3a1b6a78479 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:21:28 +0200 Subject: [PATCH 17/63] change for linter --- src/safeds/ml/nn/_fnn_layer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index 13ef556f6..9dfd21aaf 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -33,7 +33,6 @@ def _get_internal_layer(self, activation_function: str) -> _InternalLayer: pass # pragma: no cover @abstractmethod - @property def output_size(self) -> int: pass # pragma: no cover From 1487ea28cf3755edf18fdbaaecec5ccee1f2cd95 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:27:31 +0200 Subject: [PATCH 18/63] change for linter --- src/safeds/ml/nn/_fnn_layer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index 9dfd21aaf..17935de50 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -32,6 +32,7 @@ def __init__(self) -> None: def _get_internal_layer(self, activation_function: str) -> _InternalLayer: pass # pragma: no cover + @property @abstractmethod def output_size(self) -> int: pass # pragma: no cover From 8df1f26d82259d02df505069c203e80ba7d04264 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:31:10 +0200 Subject: [PATCH 19/63] change for linter --- src/safeds/ml/nn/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2645f2b0a..808504ffd 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -147,7 +147,7 @@ def __init__(self, layers: list[Layer]): self._batch_size = 1 self._is_fitted = False self._is_multi_class = layers[-1].output_size > 1 - self._feature_names = None + self._feature_names: None | list[str] = None def fit( self, @@ -267,7 +267,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] From 03907fdd5bc783cf2b14fa678745c6b252f80c3a Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:36:48 +0000 Subject: [PATCH 20/63] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 4 +++- src/safeds/exceptions/__init__.py | 2 +- src/safeds/exceptions/_ml.py | 4 +--- src/safeds/ml/nn/_fnn_layer.py | 1 + src/safeds/ml/nn/_model.py | 8 ++++++-- tests/safeds/ml/nn/test_model.py | 11 ++++++++--- 6 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 3ba65ab64..98b5fc8c4 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -899,7 +899,9 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: for column_name in row: new_item.append(row.get_value(column_name)) all_rows.append(new_item.copy()) - return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size, shuffle=True) + return DataLoader( + dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size, shuffle=True, + ) class _CustomDataset(Dataset): diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 2d9fdba68..21fb3bccf 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -30,8 +30,8 @@ ModelNotFittedError, NonTimeSeriesError, PredictionError, - UntaggedTableError, TestTrainDataMismatchError, + UntaggedTableError, ) __all__ = [ diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 153a1e326..e63d451b4 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -73,9 +73,7 @@ class TestTrainDataMismatchError(Exception): def __init__(self) -> None: super().__init__( - ( - "The column names in the test table do not match with the feature columns names of the training data." - ), + ("The column names in the test table do not match with the feature columns names of the training data."), ) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index 17935de50..aa76b0618 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -41,6 +41,7 @@ def output_size(self) -> int: def _set_input_size(self, input_size: int) -> None: pass # pragma: no cover + class FNNLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): """ diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 808504ffd..928df632d 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -117,7 +117,9 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): + if not (test_data.column_names.sort()).__eq__( + self._feature_names.sort() if self._feature_names is not None else None, + ): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -267,7 +269,9 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): + if not (test_data.column_names.sort()).__eq__( + self._feature_names.sort() if self._feature_names is not None else None, + ): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index a96b6a659..d4adda974 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -92,12 +92,14 @@ def test_should_raise_if__test_and_train_data_mismatch(self) -> None: model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) - with pytest.raises(TestTrainDataMismatchError, match="The column names in the test table do not match with the feature columns names of the training data."): + with pytest.raises( + TestTrainDataMismatchError, + match="The column names in the test table do not match with the feature columns names of the training data.", + ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) - def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) @@ -202,7 +204,10 @@ def test_should_raise_if__test_and_train_data_mismatch(self) -> None: model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) - with pytest.raises(TestTrainDataMismatchError, match="The column names in the test table do not match with the feature columns names of the training data."): + with pytest.raises( + TestTrainDataMismatchError, + match="The column names in the test table do not match with the feature columns names of the training data.", + ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) From 7f56aa268fbd8d477e3c5c8072dd5099112f89bb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:39:31 +0000 Subject: [PATCH 21/63] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 98b5fc8c4..05a30774f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -900,7 +900,9 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: new_item.append(row.get_value(column_name)) all_rows.append(new_item.copy()) return DataLoader( - dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size, shuffle=True, + dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), + batch_size=batch_size, + shuffle=True, ) From 0923989de1c3aff81426fd5ba27ddbe3f918c811 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:44:08 +0200 Subject: [PATCH 22/63] change for linter --- src/safeds/ml/nn/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 808504ffd..04302fd73 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -117,7 +117,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -267,7 +267,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort() if self._feature_names is not None else None): + if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] From 625c0b6c16f7cb9735bd9afb77b742d5d10bfa25 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 16:58:35 +0200 Subject: [PATCH 23/63] change for linter --- src/safeds/ml/nn/_model.py | 4 ++-- tests/safeds/ml/nn/test_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 04302fd73..e3bd33134 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -117,7 +117,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): + if not (sorted(test_data.column_names)).__eq__(sorted(self._feature_names) if self._feature_names is not None else None): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -267,7 +267,7 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (test_data.column_names.sort()).__eq__(self._feature_names.sort()): + if not (sorted(test_data.column_names)).__eq__(sorted(self._feature_names) if self._feature_names is not None else None): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index d4adda974..8a16cbdc9 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -87,7 +87,7 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classificatio ) assert model.is_fitted - def test_should_raise_if__test_and_train_data_mismatch(self) -> None: + def test_should_raise_if_test_and_train_data_mismatch(self) -> None: model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), From 5feeff576f4489128855ecb3eaac4255b36e1fdf Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:00:11 +0000 Subject: [PATCH 24/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index e3bd33134..20c4cff83 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -117,7 +117,9 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__(sorted(self._feature_names) if self._feature_names is not None else None): + if not (sorted(test_data.column_names)).__eq__( + sorted(self._feature_names) if self._feature_names is not None else None, + ): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -267,7 +269,9 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__(sorted(self._feature_names) if self._feature_names is not None else None): + if not (sorted(test_data.column_names)).__eq__( + sorted(self._feature_names) if self._feature_names is not None else None, + ): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] From 3c98c23f21b628bcab9e10ebe4c4903bb2dce353 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 20:02:41 +0200 Subject: [PATCH 25/63] accumulate epoch and batch counters and loss over all fit-calls --- src/safeds/ml/nn/_model.py | 39 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index e3bd33134..6f41705ef 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -16,6 +16,9 @@ def __init__(self, layers: list[Layer]): self._batch_size = 1 self._is_fitted = False self._feature_names: None | list[str] = None + self._total_number_of_batches_done = 0 + self._total_number_of_epochs_done = 0 + self._loss_sum = 0.0 def fit( self, @@ -62,34 +65,33 @@ def fit( if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) self._feature_names = train_data.features.column_names + self._batch_size = batch_size copied_model = copy.deepcopy(self) - copied_model._batch_size = batch_size dataloader = train_data._into_dataloader(copied_model._batch_size) loss_fn = nn.MSELoss() optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) - loss_sum = 0.0 - number_of_batches_done = 0 - for epoch in range(epoch_size): + for _ in range(epoch_size): for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) loss = loss_fn(pred, y) - loss_sum += loss.item() + self._loss_sum += loss.item() loss.backward() optimizer.step() - number_of_batches_done += 1 + self._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - number_of_batches_done, - loss_sum / (number_of_batches_done * batch_size), + self._total_number_of_batches_done, + self._loss_sum / (self._total_number_of_batches_done * batch_size), ) + self._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: - callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + callback_on_epoch_completion(self._total_number_of_epochs_done, self._loss_sum / (self._total_number_of_batches_done * batch_size)) copied_model._is_fitted = True copied_model._model.eval() return copied_model @@ -148,6 +150,9 @@ def __init__(self, layers: list[Layer]): self._is_fitted = False self._is_multi_class = layers[-1].output_size > 1 self._feature_names: None | list[str] = None + self._total_number_of_batches_done = 0 + self._total_number_of_epochs_done = 0 + self._loss_sum = 0.0 def fit( self, @@ -194,9 +199,9 @@ def fit( if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) self._feature_names = train_data.features.column_names + self._batch_size = batch_size copied_model = copy.deepcopy(self) - copied_model._batch_size = batch_size dataloader = train_data._into_dataloader(copied_model._batch_size) if self._is_multi_class: @@ -205,9 +210,7 @@ def fit( loss_fn = nn.BCELoss() optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) - loss_sum = 0.0 - number_of_batches_done = 0 - for epoch in range(epoch_size): + for _ in range(epoch_size): for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) @@ -229,17 +232,17 @@ def fit( loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) else: loss = loss_fn(pred, y) - loss_sum += loss.item() + self._loss_sum += loss.item() loss.backward() optimizer.step() - number_of_batches_done += 1 + self._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - number_of_batches_done, - loss_sum / (number_of_batches_done * batch_size), + self._total_number_of_batches_done, + self._loss_sum / (self._total_number_of_batches_done * batch_size), ) if callback_on_epoch_completion is not None: - callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + callback_on_epoch_completion(self._total_number_of_epochs_done + 1, self._loss_sum / (self._total_number_of_batches_done * batch_size)) copied_model._is_fitted = True copied_model._model.eval() return copied_model From 8191fc4cd7407b31b6b2b75bd37c81f63af360f6 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 18:04:21 +0000 Subject: [PATCH 26/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 65f99b486..083abf965 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -91,7 +91,10 @@ def fit( ) self._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: - callback_on_epoch_completion(self._total_number_of_epochs_done, self._loss_sum / (self._total_number_of_batches_done * batch_size)) + callback_on_epoch_completion( + self._total_number_of_epochs_done, + self._loss_sum / (self._total_number_of_batches_done * batch_size), + ) copied_model._is_fitted = True copied_model._model.eval() return copied_model @@ -244,7 +247,10 @@ def fit( self._loss_sum / (self._total_number_of_batches_done * batch_size), ) if callback_on_epoch_completion is not None: - callback_on_epoch_completion(self._total_number_of_epochs_done + 1, self._loss_sum / (self._total_number_of_batches_done * batch_size)) + callback_on_epoch_completion( + self._total_number_of_epochs_done + 1, + self._loss_sum / (self._total_number_of_batches_done * batch_size), + ) copied_model._is_fitted = True copied_model._model.eval() return copied_model From 908409a3d0ee637760f6d53df767ee392767df11 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 20:36:45 +0200 Subject: [PATCH 27/63] add input_size property to Layer --- src/safeds/ml/nn/_fnn_layer.py | 17 +++++++++++++++++ src/safeds/ml/nn/_model.py | 12 +++++++++--- tests/safeds/ml/nn/test_fnn_layer.py | 12 ++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py index aa76b0618..e18ea15d4 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_fnn_layer.py @@ -32,6 +32,11 @@ def __init__(self) -> None: def _get_internal_layer(self, activation_function: str) -> _InternalLayer: pass # pragma: no cover + @property + @abstractmethod + def input_size(self) -> int: + pass # pragma: no cover + @property @abstractmethod def output_size(self) -> int: @@ -70,6 +75,18 @@ def __init__(self, output_size: int, input_size: int | None = None): def _get_internal_layer(self, activation_function: str) -> _InternalLayer: return _InternalLayer(self._input_size, self._output_size, activation_function) + @property + def input_size(self) -> int: + """ + Get the input_size of this layer. + + Returns + ------- + result : + The amount of values being passed into this layer. + """ + return self._input_size + @property def output_size(self) -> int: """ diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 65f99b486..2f1ed3a45 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -12,7 +12,8 @@ class NeuralNetworkRegressor: def __init__(self, layers: list[Layer]): - self._model = _PytorchModel(layers, is_for_classification=False) + self._model = _InternalModel(layers, is_for_classification=False) + self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False self._feature_names: None | list[str] = None @@ -147,7 +148,8 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier: def __init__(self, layers: list[Layer]): - self._model = _PytorchModel(layers, is_for_classification=True) + self._model = _InternalModel(layers, is_for_classification=True) + self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False self._is_multi_class = layers[-1].output_size > 1 @@ -312,7 +314,7 @@ def is_fitted(self) -> bool: return self._is_fitted -class _PytorchModel(nn.Module): +class _InternalModel(nn.Module): def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: super().__init__() self._layer_list = layers @@ -333,6 +335,10 @@ def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) self._pytorch_layers = nn.Sequential(*internal_layers) + @property + def input_size(self) -> int: + return self._layer_list[0].input_size + def forward(self, x: Tensor) -> Tensor: for layer in self._pytorch_layers: x = layer(x) diff --git a/tests/safeds/ml/nn/test_fnn_layer.py b/tests/safeds/ml/nn/test_fnn_layer.py index e75488bc8..0dfc4b32e 100644 --- a/tests/safeds/ml/nn/test_fnn_layer.py +++ b/tests/safeds/ml/nn/test_fnn_layer.py @@ -18,6 +18,18 @@ def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: FNNLayer(output_size=1, input_size=input_size) +@pytest.mark.parametrize( + "input_size", + [ + 1, + 20, + ], + ids=["one", "twenty"], +) +def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: + assert FNNLayer(output_size=1, input_size=input_size).input_size == input_size + + @pytest.mark.parametrize( "activation_function", [ From 846a36d4214493997b7c6dd9c13207be5588141c Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Apr 2024 21:09:41 +0200 Subject: [PATCH 28/63] raise InputSizeError if input size and table size mismatch --- src/safeds/exceptions/__init__.py | 2 ++ src/safeds/exceptions/_ml.py | 11 ++++++++++- src/safeds/ml/nn/_model.py | 12 +++++++++++- tests/safeds/ml/nn/test_model.py | 20 +++++++++++++++++++- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 21fb3bccf..44f45fd40 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -26,6 +26,7 @@ DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, + InputSizeError, LearningError, ModelNotFittedError, NonTimeSeriesError, @@ -56,6 +57,7 @@ "DatasetContainsTargetError", "DatasetMissesDataError", "DatasetMissesFeaturesError", + "InputSizeError", "LearningError", "ModelNotFittedError", "NonTimeSeriesError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index e63d451b4..58bd78402 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -73,7 +73,16 @@ class TestTrainDataMismatchError(Exception): def __init__(self) -> None: super().__init__( - ("The column names in the test table do not match with the feature columns names of the training data."), + "The column names in the test table do not match with the feature columns names of the training data.", + ) + + +class InputSizeError(Exception): + """Raised when the amount of features being passed to a network does not match with its input size.""" + + def __init__(self, table_size: int, input_layer_size: int) -> None: + super().__init__( + f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", ) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index f83e3987b..a265c34f7 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -6,7 +6,13 @@ from torch import Tensor, nn from safeds.data.tabular.containers import Column, Table, TaggedTable -from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError +from safeds.exceptions import ( + ClosedBound, + InputSizeError, + ModelNotFittedError, + OutOfBoundsError, + TestTrainDataMismatchError, +) from safeds.ml.nn._fnn_layer import Layer @@ -65,6 +71,8 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + if train_data.features.number_of_columns is not self._input_size: + raise InputSizeError(train_data.features.number_of_columns, self._input_size) self._feature_names = train_data.features.column_names self._batch_size = batch_size copied_model = copy.deepcopy(self) @@ -205,6 +213,8 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + if train_data.features.number_of_columns is not self._input_size: + raise InputSizeError(train_data.features.number_of_columns, self._input_size) self._feature_names = train_data.features.column_names self._batch_size = batch_size copied_model = copy.deepcopy(self) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 8a16cbdc9..4c4f7c7b6 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError +from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError, InputSizeError from safeds.ml.nn import FNNLayer, NeuralNetworkClassifier, NeuralNetworkRegressor @@ -100,6 +100,15 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: + model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + with pytest.raises( + InputSizeError, + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + ) + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) @@ -212,6 +221,15 @@ def test_should_raise_if__test_and_train_data_mismatch(self) -> None: Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: + model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + with pytest.raises( + InputSizeError, + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + ) + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) From c4c0965399ffc89c7bdbc5eeb37509cab0d194f0 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:11:17 +0000 Subject: [PATCH 29/63] style: apply automated linter fixes --- src/safeds/exceptions/_ml.py | 2 +- tests/safeds/ml/nn/test_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 58bd78402..4512bd34f 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -82,7 +82,7 @@ class InputSizeError(Exception): def __init__(self, table_size: int, input_layer_size: int) -> None: super().__init__( - f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", + f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", ) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 4c4f7c7b6..d3526d18b 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError, InputSizeError +from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError from safeds.ml.nn import FNNLayer, NeuralNetworkClassifier, NeuralNetworkRegressor From fe842e8a74bafec8a30efb95c49dadc3443b6d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 12 Apr 2024 17:12:34 +0200 Subject: [PATCH 30/63] perf: suggested performance upgrades for dataloader in TaggedTable and model fit and predict --- .../data/tabular/containers/_tagged_table.py | 40 ++++++--- src/safeds/ml/nn/_model.py | 86 +++++++++++-------- 2 files changed, 74 insertions(+), 52 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 035a4373d..0460e8b5f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -5,6 +5,7 @@ import numpy as np import torch +from torch import Tensor from torch.utils.data import DataLoader, Dataset from safeds._utils import _structural_hash @@ -876,7 +877,8 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg feature_names=self.features.column_names, ) - def _into_dataloader(self, batch_size: int) -> DataLoader: + # def _into_dataloader(self, batch_size: int) -> DataLoader: + def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> DataLoader: """ Return a Dataloader for the data stored in this table, used for training neural networks. @@ -893,24 +895,34 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: The DataLoader. """ - feature_rows = self.features.to_rows() - all_rows = [] - for row in feature_rows: - new_item = [] - for column_name in row: - new_item.append(row.get_value(column_name)) - all_rows.append(new_item.copy()) - return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) + # feature_rows = self.features.to_rows() + # all_rows = [] + # for row in feature_rows: + # new_item = [] + # for column_name in row: + # new_item.append(row.get_value(column_name)) + # all_rows.append(new_item.copy()) + # return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) + if num_of_classes <= 2: + return DataLoader(dataset=_CustomDataset(torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1)), batch_size=batch_size) + else: + return DataLoader(dataset=_CustomDataset(torch.Tensor(self.features._data.values), torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes)), batch_size=batch_size) class _CustomDataset(Dataset): - def __init__(self, features: np.array, target: np.array): - self.X = torch.from_numpy(features.astype(np.float32)) - self.Y = torch.from_numpy(target.astype(np.float32)) - self.len = self.X.shape[0] + + # def __init__(self, features: np.array, target: np.array): + def __init__(self, features: Tensor, target: Tensor): + # self.X = torch.from_numpy(features.astype(np.float32)) + # self.Y = torch.from_numpy(target.astype(np.float32)) + self.X = features.to(torch.float32) + self.Y = target.to(torch.float32) + # self.len = self.X.shape[0] + self.len = self.X.size(dim=0) def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: - return self.X[item], self.Y[item].unsqueeze(-1) + # return self.X[item], self.Y[item].unsqueeze(-1) + return self.X[item], self.Y[item] def __len__(self) -> int: return self.len diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2eaece27f..0fb8db855 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -59,7 +59,8 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) copied_model = copy.deepcopy(self) copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader(copied_model._batch_size) + # dataloader = train_data._into_dataloader(copied_model._batch_size) + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, 1) loss_fn = nn.MSELoss() @@ -138,7 +139,8 @@ def __init__(self, layers: list[FNNLayer]): self._model = _PytorchModel(layers, is_for_classification=True) self._batch_size = 1 self._is_fitted = False - self._is_multi_class = layers[-1].output_size > 1 + # self._is_multi_class = layers[-1].output_size > 1 + self._num_of_classes = layers[-1].output_size def fit( self, @@ -183,9 +185,11 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) copied_model = copy.deepcopy(self) copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader(copied_model._batch_size) + # dataloader = train_data._into_dataloader(copied_model._batch_size) + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) - if self._is_multi_class: + # if self._is_multi_class: + if self._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() else: loss_fn = nn.BCELoss() @@ -197,24 +201,26 @@ def fit( for x, y in dataloader: optimizer.zero_grad() pred = copied_model._model(x) - if self._is_multi_class: - pred_size = Tensor.size(pred, dim=1) - predictions_for_all_items_of_batch = [] - for value in range(len(y)): - list_of_probabilities_for_each_category = [] - class_index = y[value].item() - for index in range(pred_size): - if index is int(class_index): - list_of_probabilities_for_each_category.append(1.0) - else: - list_of_probabilities_for_each_category.append(0.0) - predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) - - y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) - - loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) - else: - loss = loss_fn(pred, y) + # if self._is_multi_class: + # pred_size = Tensor.size(pred, dim=1) + # predictions_for_all_items_of_batch = [] + # for value in range(len(y)): + # list_of_probabilities_for_each_category = [] + # class_index = y[value].item() + # for index in range(pred_size): + # if index is int(class_index): + # list_of_probabilities_for_each_category.append(1.0) + # else: + # list_of_probabilities_for_each_category.append(0.0) + # predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) + # + # y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) + # + # loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) + # else: + # loss = loss_fn(pred, y) + loss = loss_fn(pred, y) + loss_sum += loss.item() loss.backward() optimizer.step() @@ -258,22 +264,26 @@ def predict(self, test_data: Table) -> TaggedTable: with torch.no_grad(): for x in dataloader: elem = self._model(x) - for item in range(len(elem)): - if not self._is_multi_class: - if elem[item].item() < 0.5: - predicted_class = 0 # pragma: no cover - else: # pragma: no cover - predicted_class = 1 # pragma: no cover - predictions.append(predicted_class) - else: - values = elem[item].tolist() - highest_value = 0 - category_of_highest_value = 0 - for index in range(len(values)): - if values[index] > highest_value: - highest_value = values[index] - category_of_highest_value = index - predictions.append(category_of_highest_value) + if self._num_of_classes > 1: + predictions += torch.argmax(elem, dim=1).tolist() + else: + predictions += elem.round().squeeze().tolist() + # for item in range(len(elem)): + # if not self._is_multi_class: + # if elem[item].item() < 0.5: + # predicted_class = 0 # pragma: no cover + # else: # pragma: no cover + # predicted_class = 1 # pragma: no cover + # predictions.append(predicted_class) + # else: + # values = elem[item].tolist() + # highest_value = 0 + # category_of_highest_value = 0 + # for index in range(len(values)): + # if values[index] > highest_value: + # highest_value = values[index] + # category_of_highest_value = index + # predictions.append(category_of_highest_value) return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property From 4892d5dddac9d4f287ce7d17a032cd2646a487b0 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 13 Apr 2024 15:39:53 +0200 Subject: [PATCH 31/63] rename FNNLayer to Forward Layer and put it in separate File --- src/safeds/ml/nn/__init__.py | 4 +- .../nn/{_fnn_layer.py => _forward_layer.py} | 29 +----------- src/safeds/ml/nn/_layer.py | 29 ++++++++++++ src/safeds/ml/nn/_model.py | 2 +- ...est_fnn_layer.py => test_forward_layer.py} | 12 ++--- tests/safeds/ml/nn/test_layer.py | 0 tests/safeds/ml/nn/test_model.py | 46 +++++++++---------- 7 files changed, 63 insertions(+), 59 deletions(-) rename src/safeds/ml/nn/{_fnn_layer.py => _forward_layer.py} (79%) create mode 100644 src/safeds/ml/nn/_layer.py rename tests/safeds/ml/nn/{test_fnn_layer.py => test_forward_layer.py} (76%) create mode 100644 tests/safeds/ml/nn/test_layer.py diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 53b1f98d4..9481e591e 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -1,10 +1,10 @@ """Classes for classification tasks.""" -from ._fnn_layer import FNNLayer +from ._forward_layer import ForwardLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor __all__ = [ - "FNNLayer", + "ForwardLayer", "NeuralNetworkClassifier", "NeuralNetworkRegressor", ] diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_forward_layer.py similarity index 79% rename from src/safeds/ml/nn/_fnn_layer.py rename to src/safeds/ml/nn/_forward_layer.py index e18ea15d4..8164f9e6c 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,8 +1,7 @@ -from abc import ABC, abstractmethod - from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError +from safeds.ml.nn._layer import Layer class _InternalLayer(nn.Module): @@ -23,31 +22,7 @@ def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) -class Layer(ABC): - @abstractmethod - def __init__(self) -> None: - pass # pragma: no cover - - @abstractmethod - def _get_internal_layer(self, activation_function: str) -> _InternalLayer: - pass # pragma: no cover - - @property - @abstractmethod - def input_size(self) -> int: - pass # pragma: no cover - - @property - @abstractmethod - def output_size(self) -> int: - pass # pragma: no cover - - @abstractmethod - def _set_input_size(self, input_size: int) -> None: - pass # pragma: no cover - - -class FNNLayer(Layer): +class ForwardLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): """ Create a FNN Layer. diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py new file mode 100644 index 000000000..fdbd7bf63 --- /dev/null +++ b/src/safeds/ml/nn/_layer.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod + +from torch import nn + + +class Layer(ABC): + @abstractmethod + def __init__(self) -> None: + pass # pragma: no cover + + @abstractmethod + def _get_internal_layer(self, activation_function: str) -> nn.Module: + pass # pragma: no cover + + @property + @abstractmethod + def input_size(self) -> int: + pass # pragma: no cover + + @property + @abstractmethod + def output_size(self) -> int: + pass # pragma: no cover + + @abstractmethod + def _set_input_size(self, input_size: int) -> None: + pass # pragma: no cover + + diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index a265c34f7..b8fce6ac4 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -13,7 +13,7 @@ OutOfBoundsError, TestTrainDataMismatchError, ) -from safeds.ml.nn._fnn_layer import Layer +from safeds.ml.nn._layer import Layer class NeuralNetworkRegressor: diff --git a/tests/safeds/ml/nn/test_fnn_layer.py b/tests/safeds/ml/nn/test_forward_layer.py similarity index 76% rename from tests/safeds/ml/nn/test_fnn_layer.py rename to tests/safeds/ml/nn/test_forward_layer.py index 0dfc4b32e..5d29022d4 100644 --- a/tests/safeds/ml/nn/test_fnn_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -1,6 +1,6 @@ import pytest from safeds.exceptions import OutOfBoundsError -from safeds.ml.nn import FNNLayer +from safeds.ml.nn import ForwardLayer @pytest.mark.parametrize( @@ -15,7 +15,7 @@ def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: OutOfBoundsError, match=rf"input_size \(={input_size}\) is not inside \[1, \u221e\)\.", ): - FNNLayer(output_size=1, input_size=input_size) + ForwardLayer(output_size=1, input_size=input_size) @pytest.mark.parametrize( @@ -27,7 +27,7 @@ def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: ids=["one", "twenty"], ) def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: - assert FNNLayer(output_size=1, input_size=input_size).input_size == input_size + assert ForwardLayer(output_size=1, input_size=input_size).input_size == input_size @pytest.mark.parametrize( @@ -42,7 +42,7 @@ def test_should_raise_if_unknown_activation_function_is_passed(activation_functi ValueError, match=rf"Unknown Activation Function: {activation_function}", ): - FNNLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) + ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) @pytest.mark.parametrize( @@ -57,7 +57,7 @@ def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None: OutOfBoundsError, match=rf"output_size \(={output_size}\) is not inside \[1, \u221e\)\.", ): - FNNLayer(output_size=output_size, input_size=1) + ForwardLayer(output_size=output_size, input_size=1) @pytest.mark.parametrize( @@ -69,4 +69,4 @@ def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None: ids=["one", "twenty"], ) def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None: - assert FNNLayer(output_size=output_size, input_size=1).output_size == output_size + assert ForwardLayer(output_size=output_size, input_size=1).output_size == output_size diff --git a/tests/safeds/ml/nn/test_layer.py b/tests/safeds/ml/nn/test_layer.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 4c4f7c7b6..cf57e2a2f 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError, InputSizeError -from safeds.ml.nn import FNNLayer, NeuralNetworkClassifier, NeuralNetworkRegressor +from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor class TestClassificationModel: @@ -17,7 +17,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([FNNLayer(1, 1)]).fit( + NeuralNetworkClassifier([ForwardLayer(1, 1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -34,14 +34,14 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], ).fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) @@ -49,7 +49,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], ).fit( Table.from_dict({"a": [1, 0], "b": [0, 1]}).tag_columns("a"), ) @@ -58,7 +58,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=3)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), ) @@ -67,12 +67,12 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), @@ -80,7 +80,7 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se assert model.is_fitted def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), @@ -88,7 +88,7 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classificatio assert model.is_fitted def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) @@ -101,7 +101,7 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) with pytest.raises( InputSizeError, ): @@ -110,7 +110,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -128,7 +128,7 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -159,7 +159,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -176,19 +176,19 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: - fitted_model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), ) assert isinstance(fitted_model, NeuralNetworkRegressor) def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: - fitted_model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), ) predictions = fitted_model.predict(Table.from_dict({"b": [1]})) @@ -196,12 +196,12 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), @@ -209,7 +209,7 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: assert model.is_fitted def test_should_raise_if__test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) @@ -222,7 +222,7 @@ def test_should_raise_if__test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) with pytest.raises( InputSizeError, ): @@ -231,7 +231,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -249,7 +249,7 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False From 0ca1ec84a9291c0c0b306e53f994576fff1319b9 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sat, 13 Apr 2024 13:42:36 +0000 Subject: [PATCH 32/63] style: apply automated linter fixes --- src/safeds/ml/nn/_layer.py | 2 -- tests/safeds/ml/nn/test_model.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index fdbd7bf63..a2ac00d87 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -25,5 +25,3 @@ def output_size(self) -> int: @abstractmethod def _set_input_size(self, input_size: int) -> None: pass # pragma: no cover - - diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index cf57e2a2f..2c6fae170 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError, InputSizeError +from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor From d39ada71194ab29da73f90655e3e934db6f29495 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 14 Apr 2024 15:46:19 +0200 Subject: [PATCH 33/63] remove unnecessary test file --- tests/safeds/ml/nn/test_layer.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/safeds/ml/nn/test_layer.py diff --git a/tests/safeds/ml/nn/test_layer.py b/tests/safeds/ml/nn/test_layer.py deleted file mode 100644 index e69de29bb..000000000 From bf74b67b37b87d3afcbe4c8f4d8cf4b36c7c5e7b Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Apr 2024 13:24:54 +0200 Subject: [PATCH 34/63] merge suggested changes --- .../data/tabular/containers/_tagged_table.py | 14 ----- src/safeds/ml/nn/_model.py | 45 +++------------ tests/safeds/ml/nn/test_model.py | 56 +++++++++++++++---- 3 files changed, 53 insertions(+), 62 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 62fcfade6..7b04abd46 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -877,7 +877,6 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg feature_names=self.features.column_names, ) - # def _into_dataloader(self, batch_size: int) -> DataLoader: def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> DataLoader: """ Return a Dataloader for the data stored in this table, used for training neural networks. @@ -895,14 +894,6 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> The DataLoader. """ - # feature_rows = self.features.to_rows() - # all_rows = [] - # for row in feature_rows: - # new_item = [] - # for column_name in row: - # new_item.append(row.get_value(column_name)) - # all_rows.append(new_item.copy()) - # return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) if num_of_classes <= 2: return DataLoader(dataset=_CustomDataset(torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1)), batch_size=batch_size, shuffle=True) else: @@ -911,17 +902,12 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> class _CustomDataset(Dataset): - # def __init__(self, features: np.array, target: np.array): def __init__(self, features: Tensor, target: Tensor): - # self.X = torch.from_numpy(features.astype(np.float32)) - # self.Y = torch.from_numpy(target.astype(np.float32)) self.X = features.to(torch.float32) self.Y = target.to(torch.float32) - # self.len = self.X.shape[0] self.len = self.X.size(dim=0) def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: - # return self.X[item], self.Y[item].unsqueeze(-1) return self.X[item], self.Y[item] def __len__(self) -> int: diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index dfb39e06d..ab7b12f8d 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -141,8 +141,6 @@ def predict(self, test_data: Table) -> TaggedTable: for x in dataloader: elem = self._model(x) predictions += elem.squeeze(dim=1).tolist() - # for item in range(len(elem)): - # predictions.append(elem[item].item()) return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property @@ -222,7 +220,6 @@ def fit( dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) - # if self._is_multi_class: if self._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() else: @@ -233,28 +230,12 @@ def fit( for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) - #if self._is_multi_class: - # pred_size = Tensor.size(pred, dim=1) - # predictions_for_all_items_of_batch = [] - # for value in range(len(y)): - # list_of_probabilities_for_each_category = [] - # class_index = y[value].item() - # for index in range(pred_size): - # if index is int(class_index): - # list_of_probabilities_for_each_category.append(1.0) - # else: - # list_of_probabilities_for_each_category.append(0.0) - # predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) - # - # y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) - # - # loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) - #else: - # loss = loss_fn(pred, y) + loss = loss_fn(pred, y) self._loss_sum += loss.item() loss.backward() optimizer.step() + self._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( @@ -305,23 +286,11 @@ def predict(self, test_data: Table) -> TaggedTable: if self._num_of_classes > 1: predictions += torch.argmax(elem, dim=1).tolist() else: - predictions += elem.round().squeeze().tolist() - # for item in range(len(elem)): - # if not self._is_multi_class: - # if elem[item].item() < 0.5: - # predicted_class = 0 # pragma: no cover - # else: # pragma: no cover - # predicted_class = 1 # pragma: no cover - # predictions.append(predicted_class) - # else: - # values = elem[item].tolist() - # highest_value = 0 - # category_of_highest_value = 0 - # for index in range(len(values)): - # if values[index] > highest_value: - # highest_value = values[index] - # category_of_highest_value = index - # predictions.append(category_of_highest_value) + p = elem.squeeze().round().tolist() + if isinstance(p, float): + predictions.append(p) + else: + predictions += p return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 2c6fae170..48022d105 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -47,22 +47,40 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: ) assert isinstance(fitted_model, NeuralNetworkClassifier) - def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkClassifier( [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], ).fit( - Table.from_dict({"a": [1, 0], "b": [0, 1]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}).tag_columns("a"), + batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [1, 0]})) assert isinstance(predictions, TaggedTable) - def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self, batch_size: int) -> None: fitted_model = NeuralNetworkClassifier( [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), + batch_size=batch_size, ) - predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + predictions = fitted_model.predict(Table.from_dict({"b": [1, 4, 124]})) assert isinstance(predictions, TaggedTable) def test_should_raise_if_model_has_not_been_fitted(self) -> None: @@ -181,17 +199,35 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None batch_size=batch_size, ) - def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + batch_size=batch_size, ) assert isinstance(fitted_model, NeuralNetworkRegressor) - def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + batch_size=batch_size, ) - predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + predictions = fitted_model.predict(Table.from_dict({"b": [5, 6, 7]})) assert isinstance(predictions, TaggedTable) def test_should_raise_if_model_has_not_been_fitted(self) -> None: @@ -208,7 +244,7 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: ) assert model.is_fitted - def test_should_raise_if__test_and_train_data_mismatch(self) -> None: + def test_should_raise_if_test_and_train_data_mismatch(self) -> None: model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), From 1d0bfa1b030641906fdb9c80a013b8b229f70b32 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 11:29:14 +0000 Subject: [PATCH 35/63] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 18 +++++++++++++++--- tests/safeds/ml/nn/test_model.py | 4 +++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 7b04abd46..c73962f8c 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,6 @@ import sys from typing import TYPE_CHECKING -import numpy as np import torch from torch import Tensor from torch.utils.data import DataLoader, Dataset @@ -895,9 +894,22 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> """ if num_of_classes <= 2: - return DataLoader(dataset=_CustomDataset(torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1)), batch_size=batch_size, shuffle=True) + return DataLoader( + dataset=_CustomDataset( + torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1), + ), + batch_size=batch_size, + shuffle=True, + ) else: - return DataLoader(dataset=_CustomDataset(torch.Tensor(self.features._data.values), torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes)), batch_size=batch_size, shuffle=True) + return DataLoader( + dataset=_CustomDataset( + torch.Tensor(self.features._data.values), + torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes), + ), + batch_size=batch_size, + shuffle=True, + ) class _CustomDataset(Dataset): diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 48022d105..b2fa9cdd0 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -73,7 +73,9 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz ], ids=["one", "two"], ) - def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self, batch_size: int) -> None: + def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification( + self, batch_size: int, + ) -> None: fitted_model = NeuralNetworkClassifier( [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], ).fit( From f61c68756fc3e69dde3f262e16db57d786172acc Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 11:30:44 +0000 Subject: [PATCH 36/63] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 3 ++- tests/safeds/ml/nn/test_model.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c73962f8c..1e55f6961 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -896,7 +896,8 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> if num_of_classes <= 2: return DataLoader( dataset=_CustomDataset( - torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1), + torch.Tensor(self.features._data.values), + torch.Tensor(self.target._data).unsqueeze(dim=-1), ), batch_size=batch_size, shuffle=True, diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index b2fa9cdd0..435af1912 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -74,7 +74,8 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz ids=["one", "two"], ) def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification( - self, batch_size: int, + self, + batch_size: int, ) -> None: fitted_model = NeuralNetworkClassifier( [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], From 9a6d70691a635d557e5c1e1d8e3e97dc984d7aeb Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Apr 2024 13:31:34 +0200 Subject: [PATCH 37/63] update test to cover into_dataloader_with_classes --- .../containers/_table/_tagged_table/test_into_dataloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py index bcef1bd1d..fd9584cba 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py @@ -11,7 +11,7 @@ "A": [1, 4], "B": [2, 5], "C": [3, 6], - "T": [0, 0], + "T": [0, 1], }, "T", ["A", "B", "C"], @@ -27,5 +27,5 @@ def test_should_create_dataloader( feature_names: list[str] | None, ) -> None: tagged_table = Table.from_dict(data).tag_columns(target_name, feature_names) - data_loader = tagged_table._into_dataloader(1) + data_loader = tagged_table._into_dataloader_with_classes(1, 2) assert isinstance(data_loader, DataLoader) From 4100e6d0742e42a689d078cc561086f5e75d3fb6 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 17 Apr 2024 13:03:14 +0200 Subject: [PATCH 38/63] remove inplace modifications of model and reset loss after every epoch --- src/safeds/ml/nn/_model.py | 41 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index ab7b12f8d..b944f3ff5 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -73,10 +73,12 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) if train_data.features.number_of_columns is not self._input_size: raise InputSizeError(train_data.features.number_of_columns, self._input_size) - self._feature_names = train_data.features.column_names - self._batch_size = batch_size + copied_model = copy.deepcopy(self) + copied_model._feature_names = train_data.features.column_names + copied_model._batch_size = batch_size + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, 1) loss_fn = nn.MSELoss() @@ -89,20 +91,20 @@ def fit( pred = copied_model._model(x) loss = loss_fn(pred, y) - self._loss_sum += loss.item() + copied_model._loss_sum += loss.item() loss.backward() optimizer.step() - self._total_number_of_batches_done += 1 + copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - self._total_number_of_batches_done, - self._loss_sum / (self._total_number_of_batches_done * batch_size), + copied_model._total_number_of_batches_done, + copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), ) - self._total_number_of_epochs_done += 1 + copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - self._total_number_of_epochs_done, - self._loss_sum / (self._total_number_of_batches_done * batch_size), + copied_model._total_number_of_epochs_done, + copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), ) copied_model._is_fitted = True copied_model._model.eval() @@ -214,13 +216,15 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) if train_data.features.number_of_columns is not self._input_size: raise InputSizeError(train_data.features.number_of_columns, self._input_size) - self._feature_names = train_data.features.column_names - self._batch_size = batch_size + copied_model = copy.deepcopy(self) + copied_model._feature_names = train_data.features.column_names + copied_model._batch_size = batch_size + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) - if self._num_of_classes > 1: + if copied_model._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() else: loss_fn = nn.BCELoss() @@ -232,21 +236,22 @@ def fit( pred = copied_model._model(x) loss = loss_fn(pred, y) - self._loss_sum += loss.item() + copied_model._loss_sum += loss.item() loss.backward() optimizer.step() - self._total_number_of_batches_done += 1 + copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - self._total_number_of_batches_done, - self._loss_sum / (self._total_number_of_batches_done * batch_size), + copied_model._total_number_of_batches_done, + copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), ) if callback_on_epoch_completion is not None: callback_on_epoch_completion( - self._total_number_of_epochs_done + 1, - self._loss_sum / (self._total_number_of_batches_done * batch_size), + copied_model._total_number_of_epochs_done + 1, + copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), ) + copied_model._loss_sum = 0.0 copied_model._is_fitted = True copied_model._model.eval() return copied_model From 7ecec5b3c0b661c962dd0ad347bb6394e6feac39 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 17 Apr 2024 13:32:12 +0200 Subject: [PATCH 39/63] adjust loss calculation --- src/safeds/ml/nn/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index b944f3ff5..0d1bc3c61 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -244,12 +244,12 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), + copied_model._loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), ) if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done + 1, - copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), + copied_model._loss_sum, ) copied_model._loss_sum = 0.0 copied_model._is_fitted = True From 7833e05f5d19afce8f27c8f6f9d023fa75ea9f38 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:33:53 +0000 Subject: [PATCH 40/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 0d1bc3c61..976735c3d 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -244,7 +244,11 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - copied_model._loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + copied_model._loss_sum + / ( + copied_model._total_number_of_batches_done + - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + ), ) if callback_on_epoch_completion is not None: callback_on_epoch_completion( From b7da6df8e89d177809b4b8d8d8442765a32d77cc Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 17 Apr 2024 13:51:33 +0200 Subject: [PATCH 41/63] loss_sum --- src/safeds/ml/nn/_model.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 0d1bc3c61..c195b0e9a 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -25,7 +25,6 @@ def __init__(self, layers: list[Layer]): self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 - self._loss_sum = 0.0 def fit( self, @@ -85,26 +84,27 @@ def fit( optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) for _ in range(epoch_size): + loss_sum = 0.0 for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) loss = loss_fn(pred, y) - copied_model._loss_sum += loss.item() + loss_sum += loss.item() loss.backward() optimizer.step() copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), + loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done, - copied_model._loss_sum / (copied_model._total_number_of_batches_done * batch_size), + loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), ) copied_model._is_fitted = True copied_model._model.eval() @@ -168,7 +168,6 @@ def __init__(self, layers: list[Layer]): self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 - self._loss_sum = 0.0 def fit( self, @@ -231,12 +230,13 @@ def fit( optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) for _ in range(epoch_size): + loss_sum = 0.0 for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) loss = loss_fn(pred, y) - copied_model._loss_sum += loss.item() + loss_sum += loss.item() loss.backward() optimizer.step() @@ -244,14 +244,13 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - copied_model._loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), ) if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done + 1, - copied_model._loss_sum, + loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), ) - copied_model._loss_sum = 0.0 copied_model._is_fitted = True copied_model._model.eval() return copied_model From 2501c4cdde5306760683210a1da3e5a15b873c22 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:54:54 +0000 Subject: [PATCH 42/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index c195b0e9a..82dc1cb44 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -98,13 +98,21 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + loss_sum + / ( + copied_model._total_number_of_batches_done + - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + ), ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done, - loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + loss_sum + / ( + copied_model._total_number_of_batches_done + - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + ), ) copied_model._is_fitted = True copied_model._model.eval() @@ -244,12 +252,20 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + loss_sum + / ( + copied_model._total_number_of_batches_done + - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + ), ) if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done + 1, - loss_sum / (copied_model._total_number_of_batches_done - (copied_model._total_number_of_epochs_done * copied_model._batch_size)), + loss_sum + / ( + copied_model._total_number_of_batches_done + - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + ), ) copied_model._is_fitted = True copied_model._model.eval() From 8ecb9fd563ae2313455dc94a27193a7071056732 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 17 Apr 2024 14:03:16 +0200 Subject: [PATCH 43/63] fix bug --- src/safeds/ml/nn/_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 82dc1cb44..60881b01f 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -111,7 +111,7 @@ def fit( loss_sum / ( copied_model._total_number_of_batches_done - - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) ), ) copied_model._is_fitted = True @@ -258,13 +258,14 @@ def fit( - (copied_model._total_number_of_epochs_done * copied_model._batch_size) ), ) + copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - copied_model._total_number_of_epochs_done + 1, + copied_model._total_number_of_epochs_done, loss_sum / ( copied_model._total_number_of_batches_done - - (copied_model._total_number_of_epochs_done * copied_model._batch_size) + - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) ), ) copied_model._is_fitted = True From cbd69f040a0b988916dd114341070b4baa56cfce Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:04:56 +0000 Subject: [PATCH 44/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 60881b01f..024ed92c2 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -111,7 +111,7 @@ def fit( loss_sum / ( copied_model._total_number_of_batches_done - - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) + - ((copied_model._total_number_of_epochs_done - 1) * copied_model._batch_size) ), ) copied_model._is_fitted = True @@ -265,7 +265,7 @@ def fit( loss_sum / ( copied_model._total_number_of_batches_done - - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) + - ((copied_model._total_number_of_epochs_done - 1) * copied_model._batch_size) ), ) copied_model._is_fitted = True From 8222b5f237df3b64b87da419462fbce8e7749c9a Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 17 Apr 2024 14:07:35 +0200 Subject: [PATCH 45/63] fix bug --- src/safeds/ml/nn/_model.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 60881b01f..d712d86b7 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -85,6 +85,7 @@ def fit( optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) for _ in range(epoch_size): loss_sum = 0.0 + amount_of_loss_values_calculated = 0 for x, y in iter(dataloader): optimizer.zero_grad() @@ -92,27 +93,20 @@ def fit( loss = loss_fn(pred, y) loss_sum += loss.item() + amount_of_loss_values_calculated += 1 loss.backward() optimizer.step() copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - loss_sum - / ( - copied_model._total_number_of_batches_done - - (copied_model._total_number_of_epochs_done * copied_model._batch_size) - ), + loss_sum / amount_of_loss_values_calculated, ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done, - loss_sum - / ( - copied_model._total_number_of_batches_done - - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) - ), + loss_sum / amount_of_loss_values_calculated ) copied_model._is_fitted = True copied_model._model.eval() @@ -239,12 +233,14 @@ def fit( optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) for _ in range(epoch_size): loss_sum = 0.0 + amount_of_loss_values_calculated = 0 for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) loss = loss_fn(pred, y) loss_sum += loss.item() + amount_of_loss_values_calculated += 1 loss.backward() optimizer.step() @@ -252,21 +248,13 @@ def fit( if callback_on_batch_completion is not None: callback_on_batch_completion( copied_model._total_number_of_batches_done, - loss_sum - / ( - copied_model._total_number_of_batches_done - - (copied_model._total_number_of_epochs_done * copied_model._batch_size) - ), + loss_sum / amount_of_loss_values_calculated ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( copied_model._total_number_of_epochs_done, - loss_sum - / ( - copied_model._total_number_of_batches_done - - ((copied_model._total_number_of_epochs_done-1) * copied_model._batch_size) - ), + loss_sum / amount_of_loss_values_calculated ) copied_model._is_fitted = True copied_model._model.eval() From 99ec26c83eed8ac02512150c8e84b38cba5c03b0 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:12:06 +0000 Subject: [PATCH 46/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index d712d86b7..a8dfd669c 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -105,8 +105,7 @@ def fit( copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - copied_model._total_number_of_epochs_done, - loss_sum / amount_of_loss_values_calculated + copied_model._total_number_of_epochs_done, loss_sum / amount_of_loss_values_calculated, ) copied_model._is_fitted = True copied_model._model.eval() @@ -247,14 +246,12 @@ def fit( copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - copied_model._total_number_of_batches_done, - loss_sum / amount_of_loss_values_calculated + copied_model._total_number_of_batches_done, loss_sum / amount_of_loss_values_calculated, ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - copied_model._total_number_of_epochs_done, - loss_sum / amount_of_loss_values_calculated + copied_model._total_number_of_epochs_done, loss_sum / amount_of_loss_values_calculated, ) copied_model._is_fitted = True copied_model._model.eval() From 02e2996812cf527e2a0ee17f577599311e3c6fd1 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:13:36 +0000 Subject: [PATCH 47/63] style: apply automated linter fixes --- src/safeds/ml/nn/_model.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index a8dfd669c..aa1690ab5 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -105,7 +105,8 @@ def fit( copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - copied_model._total_number_of_epochs_done, loss_sum / amount_of_loss_values_calculated, + copied_model._total_number_of_epochs_done, + loss_sum / amount_of_loss_values_calculated, ) copied_model._is_fitted = True copied_model._model.eval() @@ -246,12 +247,14 @@ def fit( copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - copied_model._total_number_of_batches_done, loss_sum / amount_of_loss_values_calculated, + copied_model._total_number_of_batches_done, + loss_sum / amount_of_loss_values_calculated, ) copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: callback_on_epoch_completion( - copied_model._total_number_of_epochs_done, loss_sum / amount_of_loss_values_calculated, + copied_model._total_number_of_epochs_done, + loss_sum / amount_of_loss_values_calculated, ) copied_model._is_fitted = True copied_model._model.eval() From 265e55c12be5c66990a46d7382e5330a21c96496 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 Date: Wed, 17 Apr 2024 17:37:15 +0200 Subject: [PATCH 48/63] added input and output layer interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Alexander Gréus Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- src/safeds/ml/nn/__init__.py | 4 +++ src/safeds/ml/nn/_input_conversion.py | 31 ++++++++++++++++++++ src/safeds/ml/nn/_input_conversion_table.py | 29 ++++++++++++++++++ src/safeds/ml/nn/_model.py | 13 ++++---- src/safeds/ml/nn/_output_conversion.py | 19 ++++++++++++ src/safeds/ml/nn/_output_conversion_table.py | 16 ++++++++++ tests/safeds/ml/nn/test_model.py | 18 +++++++----- 7 files changed, 115 insertions(+), 15 deletions(-) create mode 100644 src/safeds/ml/nn/_input_conversion.py create mode 100644 src/safeds/ml/nn/_input_conversion_table.py create mode 100644 src/safeds/ml/nn/_output_conversion.py create mode 100644 src/safeds/ml/nn/_output_conversion_table.py diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 9481e591e..ddd9494fd 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -1,10 +1,14 @@ """Classes for classification tasks.""" from ._forward_layer import ForwardLayer +from ._input_conversion_table import InputConversionTable +from ._output_conversion_table import OutputConversionTable from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor __all__ = [ "ForwardLayer", + "InputConversionTable", + "OutputConversionTable", "NeuralNetworkClassifier", "NeuralNetworkRegressor", ] diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py new file mode 100644 index 000000000..198f76901 --- /dev/null +++ b/src/safeds/ml/nn/_input_conversion.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod +from typing import TypeVar, Generic + +from torch.utils.data import DataLoader + +from safeds.data.tabular.containers import Table, TimeSeries + +T = TypeVar('T', Table, TimeSeries) + + +class _InputConversion(Generic[T], ABC): + """ + The input conversion for a neural network, defines the input parameters for the neural network. + """ + + @property + @abstractmethod + def _data_size(self) -> int: + pass + + @abstractmethod + def _data_conversion_fit(self, input_data: T, batch_size: int, num_of_classes: int = 1) -> DataLoader: + pass + + @abstractmethod + def _data_conversion_predict(self, input_data: T, batch_size: int) -> DataLoader: + pass + + @abstractmethod + def _is_data_valid(self, input_data: T) -> bool: + pass diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py new file mode 100644 index 000000000..90981e0b7 --- /dev/null +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -0,0 +1,29 @@ +from torch.utils.data import DataLoader + +from safeds.exceptions import InputSizeError +from safeds.ml.nn._input_conversion import _InputConversion, T + +from safeds.data.tabular.containers import Table + + +class InputConversionTable(_InputConversion[Table]): + """ + The input conversion for a neural network, defines the input parameters for the neural network. + """ + + def __init__(self, feature_names: list[str], target_name: str) -> None: + self._feature_names = feature_names + self._target_name = target_name + + @property + def _data_size(self) -> int: + return len(self._feature_names) + + def _data_conversion_fit(self, input_data: Table, batch_size: int, num_of_classes: int = 1) -> DataLoader: + return input_data.tag_columns(self._target_name, self._feature_names)._into_dataloader_with_classes(batch_size, num_of_classes) + + def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader: + return input_data._into_dataloader(batch_size) + + def _is_data_valid(self, input_data: Table) -> bool: + return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names)) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index aa1690ab5..15e33df75 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -28,7 +28,7 @@ def __init__(self, layers: list[Layer]): def fit( self, - train_data: TaggedTable, + train_data: I, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -260,7 +260,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: Table) -> TaggedTable: + def predict(self, test_data: I) -> O: """ Make a prediction for the given test data. @@ -295,12 +295,9 @@ def predict(self, test_data: Table) -> TaggedTable: if self._num_of_classes > 1: predictions += torch.argmax(elem, dim=1).tolist() else: - p = elem.squeeze().round().tolist() - if isinstance(p, float): - predictions.append(p) - else: - predictions += p - return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + predictions.append(elem.squeeze(dim=1).round()) + #return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property def is_fitted(self) -> bool: diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py new file mode 100644 index 000000000..1a4869105 --- /dev/null +++ b/src/safeds/ml/nn/_output_conversion.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod +from typing import TypeVar, Generic + +from torch import Tensor + +from safeds.data.tabular.containers import TaggedTable, TimeSeries, Table + +I = TypeVar('I', Table, TimeSeries) +O = TypeVar('O', TaggedTable, TimeSeries) + + +class _OutputConversion(Generic[I, O], ABC): + """ + The output conversion for a neural network, defines the output parameters for the neural network. + """ + + @abstractmethod + def _data_conversion(self, input_data: I, output_data: Tensor) -> O: + pass diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py new file mode 100644 index 000000000..998cd190b --- /dev/null +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -0,0 +1,16 @@ +from torch import Tensor + +from safeds.data.tabular.containers import TaggedTable, Table, Column +from safeds.ml.nn._output_conversion import _OutputConversion + + +class OutputConversionTable(_OutputConversion[Table, TaggedTable]): + """ + The output conversion for a neural network, defines the output parameters for the neural network. + """ + + def __init__(self, prediction_name: str = "prediction") -> None: + self._prediction_name = prediction_name + + def _data_conversion(self, input_data: Table, output_data: Tensor) -> TaggedTable: + return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns(self._prediction_name) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 435af1912..1ce8ebf91 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -93,7 +93,8 @@ def test_should_raise_if_model_has_not_been_fitted(self) -> None: ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), @@ -122,7 +123,9 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkClassifier(InputConversionTable(["b", "c"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable()) with pytest.raises( InputSizeError, ): @@ -197,8 +200,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()).fit( + Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, ) @@ -235,15 +239,15 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"),[ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + Table.from_dict({"a": [1], "b": [0]}), ) assert model.is_fitted From 0ef68dc47fc1ec66e95c2316be80069e6630242c Mon Sep 17 00:00:00 2001 From: Gerhardsa0 Date: Wed, 17 Apr 2024 17:39:16 +0200 Subject: [PATCH 49/63] Changes by alexg --- src/safeds/ml/nn/_forward_layer.py | 6 +-- src/safeds/ml/nn/_layer.py | 2 +- src/safeds/ml/nn/_model.py | 53 +++++++++++++------------ tests/safeds/ml/nn/test_model.py | 63 +++++++++++++++++++----------- 4 files changed, 72 insertions(+), 52 deletions(-) diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 8164f9e6c..c9b989cd0 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,7 +1,7 @@ from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError -from safeds.ml.nn._layer import Layer +from safeds.ml.nn._layer import _Layer class _InternalLayer(nn.Module): @@ -22,10 +22,10 @@ def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) -class ForwardLayer(Layer): +class ForwardLayer(_Layer): def __init__(self, output_size: int, input_size: int | None = None): """ - Create a FNN Layer. + Create a FNN _Layer. Parameters ---------- diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index a2ac00d87..9b1e28184 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -3,7 +3,7 @@ from torch import nn -class Layer(ABC): +class _Layer(ABC): @abstractmethod def __init__(self) -> None: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 15e33df75..b5a1dfe51 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -1,11 +1,11 @@ import copy from collections.abc import Callable -from typing import Self +from typing import Self, Generic, TypeVar import torch from torch import Tensor, nn -from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.data.tabular.containers import TaggedTable from safeds.exceptions import ( ClosedBound, InputSizeError, @@ -13,16 +13,22 @@ OutOfBoundsError, TestTrainDataMismatchError, ) -from safeds.ml.nn._layer import Layer +from safeds.ml.nn._input_conversion import _InputConversion +from safeds.ml.nn._layer import _Layer +from safeds.ml.nn._output_conversion import _OutputConversion +I = TypeVar("I") +O = TypeVar("O") -class NeuralNetworkRegressor: - def __init__(self, layers: list[Layer]): + +class NeuralNetworkRegressor(Generic[I, O]): + def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], output_conversion: _OutputConversion[I, O]): + self._input_conversion = input_conversion self._model = _InternalModel(layers, is_for_classification=False) + self._output_conversion = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 @@ -75,10 +81,9 @@ def fit( copied_model = copy.deepcopy(self) - copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, 1) + dataloader = copied_model._input_conversion._data_conversion_fit(train_data, copied_model._batch_size) loss_fn = nn.MSELoss() @@ -112,7 +117,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: Table) -> TaggedTable: + def predict(self, test_data: I) -> O: """ Make a prediction for the given test data. @@ -135,17 +140,19 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__( - sorted(self._feature_names) if self._feature_names is not None else None, - ): + if not self._input_conversion._is_data_valid(test_data): raise TestTrainDataMismatchError - dataloader = test_data._into_dataloader(self._batch_size) + # dataloader = test_data._into_dataloader(self._batch_size) + dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] + # predictions: Tensor with torch.no_grad(): for x in dataloader: elem = self._model(x) - predictions += elem.squeeze(dim=1).tolist() - return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + # predictions += elem.squeeze(dim=1).tolist() + predictions.append(elem.squeeze(dim=1)) + # return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property def is_fitted(self) -> bool: @@ -167,13 +174,12 @@ def __init__(self, layers: list[Layer]): self._batch_size = 1 self._is_fitted = False self._num_of_classes = layers[-1].output_size - self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 def fit( self, - train_data: TaggedTable, + train_data: I, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -215,12 +221,11 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) - if train_data.features.number_of_columns is not self._input_size: - raise InputSizeError(train_data.features.number_of_columns, self._input_size) + if self._input_conversion._data_size is not self._input_size: + raise InputSizeError(self._input_conversion._data_size, self._input_size) copied_model = copy.deepcopy(self) - copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) @@ -283,9 +288,7 @@ def predict(self, test_data: I) -> O: """ if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__( - sorted(self._feature_names) if self._feature_names is not None else None, - ): + if not self._input_conversion._is_data_valid(test_data): raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] @@ -293,7 +296,7 @@ def predict(self, test_data: I) -> O: for x in dataloader: elem = self._model(x) if self._num_of_classes > 1: - predictions += torch.argmax(elem, dim=1).tolist() + predictions.append(torch.argmax(elem, dim=1)) else: predictions.append(elem.squeeze(dim=1).round()) #return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @@ -313,7 +316,7 @@ def is_fitted(self) -> bool: class _InternalModel(nn.Module): - def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: + def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: super().__init__() self._layer_list = layers internal_layers = [] diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 1ce8ebf91..c0c3ff1bc 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,7 +1,8 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError -from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor +from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor, InputConversionTable, \ + OutputConversionTable class TestClassificationModel: @@ -17,8 +18,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([ForwardLayer(1, 1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(1, 1)], + OutputConversionTable()).fit( + Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, ) @@ -34,16 +36,19 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()).fit( + Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, ) def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], + OutputConversionTable() ).fit( - Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + Table.from_dict({"a": [1], "b": [0]}), ) assert isinstance(fitted_model, NeuralNetworkClassifier) @@ -57,9 +62,11 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], + OutputConversionTable() ).fit( - Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}), batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [1, 0]})) @@ -78,7 +85,9 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ batch_size: int, ) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], + OutputConversionTable() ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), batch_size=batch_size, @@ -88,7 +97,8 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()).predict( Table.from_dict({"a": [1]}), ) @@ -97,22 +107,26 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se OutputConversionTable()) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), + Table.from_dict({"a": [1], "b": [0]}), ) assert model.is_fitted def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable()) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), ) assert model.is_fitted def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable()) model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), ) with pytest.raises( TestTrainDataMismatchError, @@ -130,11 +144,12 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: InputSizeError, ): model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}), ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()) class Test: self.was_called = False @@ -147,7 +162,7 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_batch_completion=obj.cb) assert obj.callback_was_called() is True @@ -165,7 +180,7 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_epoch_completion=obj.cb) assert obj.callback_was_called() is True @@ -183,8 +198,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()).fit( + Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, ) @@ -215,8 +231,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None ids=["one", "two"], ) def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + fitted_model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).fit( + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, ) assert isinstance(fitted_model, NeuralNetworkRegressor) @@ -230,8 +247,8 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i ids=["one", "two"], ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + fitted_model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).fit( + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [5, 6, 7]})) From c4252dc5ea7e4778dcd012148c97e9c0f1b73e20 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 Date: Wed, 17 Apr 2024 17:39:38 +0200 Subject: [PATCH 50/63] Changes by Simon --- src/safeds/ml/nn/_model.py | 16 ++++++++++------ tests/safeds/ml/nn/test_model.py | 20 +++++++++++--------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index b5a1dfe51..9fcaea650 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -76,8 +76,8 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) - if train_data.features.number_of_columns is not self._input_size: - raise InputSizeError(train_data.features.number_of_columns, self._input_size) + if self._input_conversion._data_size is not self._input_size: + raise InputSizeError(self._input_conversion._data_size, self._input_size) copied_model = copy.deepcopy(self) @@ -167,9 +167,11 @@ def is_fitted(self) -> bool: return self._is_fitted -class NeuralNetworkClassifier: - def __init__(self, layers: list[Layer]): +class NeuralNetworkClassifier(Generic[I, O]): + def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], output_conversion: _OutputConversion[I, O]): + self._input_conversion = input_conversion self._model = _InternalModel(layers, is_for_classification=True) + self._output_conversion = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -228,7 +230,8 @@ def fit( copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) + #dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) + dataloader = copied_model._input_conversion._data_conversion_fit(train_data, copied_model._batch_size, copied_model._num_of_classes) if copied_model._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() @@ -290,7 +293,8 @@ def predict(self, test_data: I) -> O: raise ModelNotFittedError if not self._input_conversion._is_data_valid(test_data): raise TestTrainDataMismatchError - dataloader = test_data._into_dataloader(self._batch_size) + #dataloader = test_data._into_dataloader(self._batch_size) + dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] with torch.no_grad(): for x in dataloader: diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index c0c3ff1bc..db94ec61c 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -167,7 +167,8 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()) class Test: self.was_called = False @@ -269,9 +270,9 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: assert model.is_fitted def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), ) with pytest.raises( TestTrainDataMismatchError, @@ -282,16 +283,16 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkRegressor(InputConversionTable(["b", "c"], "a"), [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], OutputConversionTable()) with pytest.raises( InputSizeError, ): model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}), ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) class Test: self.was_called = False @@ -304,12 +305,13 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_batch_completion=obj.cb) assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable()) class Test: self.was_called = False @@ -322,6 +324,6 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_epoch_completion=obj.cb) assert obj.callback_was_called() is True From d5015e1e5e6493384c52343bff1431c2571b979e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 17 Apr 2024 19:23:25 +0200 Subject: [PATCH 51/63] refactor: linter --- src/safeds/ml/nn/_input_conversion.py | 4 +--- src/safeds/ml/nn/_input_conversion_table.py | 4 +--- src/safeds/ml/nn/_model.py | 22 +++++++++++--------- src/safeds/ml/nn/_output_conversion.py | 12 +++++------ src/safeds/ml/nn/_output_conversion_table.py | 4 +--- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 198f76901..3b8da4670 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -9,9 +9,7 @@ class _InputConversion(Generic[T], ABC): - """ - The input conversion for a neural network, defines the input parameters for the neural network. - """ + """The input conversion for a neural network, defines the input parameters for the neural network.""" @property @abstractmethod diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index 546253c01..d88ad8b9b 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -6,9 +6,7 @@ class InputConversionTable(_InputConversion[Table]): - """ - The input conversion for a neural network, defines the input parameters for the neural network. - """ + """The input conversion for a neural network, defines the input parameters for the neural network.""" def __init__(self, feature_names: list[str], target_name: str) -> None: self._feature_names = feature_names diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 48d66427b..250ef02a9 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -16,16 +16,18 @@ from torch import Tensor, nn + from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable + from safeds.ml.nn._input_conversion import _InputConversion from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion -I = TypeVar("I") -O = TypeVar("O") +IT = TypeVar("IT", Table, TimeSeries) +OT = TypeVar("OT", TaggedTable, TimeSeries) -class NeuralNetworkRegressor(Generic[I, O]): - def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], output_conversion: _OutputConversion[I, O]): +class NeuralNetworkRegressor(Generic[IT, OT]): + def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): self._input_conversion = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) self._output_conversion = output_conversion @@ -37,7 +39,7 @@ def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], def fit( self, - train_data: I, + train_data: IT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -123,7 +125,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: I) -> O: + def predict(self, test_data: IT) -> OT: """ Make a prediction for the given test data. @@ -171,8 +173,8 @@ def is_fitted(self) -> bool: return self._is_fitted -class NeuralNetworkClassifier(Generic[I, O]): - def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], output_conversion: _OutputConversion[I, O]): +class NeuralNetworkClassifier(Generic[IT, OT]): + def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): self._input_conversion = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) self._output_conversion = output_conversion @@ -185,7 +187,7 @@ def __init__(self, input_conversion: _InputConversion[I], layers: list[_Layer], def fit( self, - train_data: I, + train_data: IT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -274,7 +276,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: I) -> O: + def predict(self, test_data: IT) -> OT: """ Make a prediction for the given test data. diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 1a4869105..b74bebe6b 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -5,15 +5,13 @@ from safeds.data.tabular.containers import TaggedTable, TimeSeries, Table -I = TypeVar('I', Table, TimeSeries) -O = TypeVar('O', TaggedTable, TimeSeries) +IT = TypeVar("IT", Table, TimeSeries) +OT = TypeVar("OT", TaggedTable, TimeSeries) -class _OutputConversion(Generic[I, O], ABC): - """ - The output conversion for a neural network, defines the output parameters for the neural network. - """ +class _OutputConversion(Generic[IT, OT], ABC): + """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: I, output_data: Tensor) -> O: + def _data_conversion(self, input_data: IT, output_data: Tensor) -> OT: pass diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index 998cd190b..d99f925fe 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -5,9 +5,7 @@ class OutputConversionTable(_OutputConversion[Table, TaggedTable]): - """ - The output conversion for a neural network, defines the output parameters for the neural network. - """ + """The output conversion for a neural network, defines the output parameters for the neural network.""" def __init__(self, prediction_name: str = "prediction") -> None: self._prediction_name = prediction_name From d7d41b2808cb14bb334a7d117ac6f8f57c1108ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 17 Apr 2024 19:24:32 +0200 Subject: [PATCH 52/63] refactor: codecov --- src/safeds/ml/nn/_input_conversion.py | 8 ++++---- src/safeds/ml/nn/_output_conversion.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 3b8da4670..17dc0bec2 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -14,16 +14,16 @@ class _InputConversion(Generic[T], ABC): @property @abstractmethod def _data_size(self) -> int: - pass + pass # pragma: no cover @abstractmethod def _data_conversion_fit(self, input_data: T, batch_size: int, num_of_classes: int = 1) -> DataLoader: - pass + pass # pragma: no cover @abstractmethod def _data_conversion_predict(self, input_data: T, batch_size: int) -> DataLoader: - pass + pass # pragma: no cover @abstractmethod def _is_data_valid(self, input_data: T) -> bool: - pass + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index b74bebe6b..7db3dfa6e 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -14,4 +14,4 @@ class _OutputConversion(Generic[IT, OT], ABC): @abstractmethod def _data_conversion(self, input_data: IT, output_data: Tensor) -> OT: - pass + pass # pragma: no cover From d8f2551c1de3a06a4f737c49aafe9c9bcb0d12b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 17 Apr 2024 19:33:35 +0200 Subject: [PATCH 53/63] refactor: linter --- src/safeds/ml/nn/_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 250ef02a9..f90fb0f8d 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -11,13 +11,13 @@ TestTrainDataMismatchError, ) +from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable + if TYPE_CHECKING: from collections.abc import Callable from torch import Tensor, nn - from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable - from safeds.ml.nn._input_conversion import _InputConversion from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion @@ -28,9 +28,9 @@ class NeuralNetworkRegressor(Generic[IT, OT]): def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): - self._input_conversion = input_conversion + self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) - self._output_conversion = output_conversion + self._output_conversion: _OutputConversion[IT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -175,9 +175,9 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier(Generic[IT, OT]): def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): - self._input_conversion = input_conversion + self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) - self._output_conversion = output_conversion + self._output_conversion: _OutputConversion[IT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False From 2e07146fedca1679aa6ed4e1b90f7bd847027569 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:35:13 +0000 Subject: [PATCH 54/63] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 1 - src/safeds/exceptions/__init__.py | 1 - src/safeds/ml/nn/__init__.py | 2 +- src/safeds/ml/nn/_input_conversion.py | 4 +- src/safeds/ml/nn/_input_conversion_table.py | 7 +- src/safeds/ml/nn/_model.py | 17 ++- src/safeds/ml/nn/_output_conversion.py | 4 +- src/safeds/ml/nn/_output_conversion_table.py | 6 +- tests/safeds/ml/nn/test_model.py | 115 ++++++++++++------ 9 files changed, 99 insertions(+), 58 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6312296f7..7c0271c7e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,6 @@ import sys from typing import TYPE_CHECKING - from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 7f1be0d17..96ee06c40 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -4,7 +4,6 @@ import apipkg - if TYPE_CHECKING: from safeds.exceptions._data import ( ColumnIsTargetError, diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index da1163dcd..6158e640d 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -7,8 +7,8 @@ if TYPE_CHECKING: from ._forward_layer import ForwardLayer from ._input_conversion_table import InputConversionTable - from ._output_conversion_table import OutputConversionTable from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor + from ._output_conversion_table import OutputConversionTable apipkg.initpkg( __name__, diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 17dc0bec2..58aa2646e 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,11 +1,11 @@ from abc import ABC, abstractmethod -from typing import TypeVar, Generic +from typing import Generic, TypeVar from torch.utils.data import DataLoader from safeds.data.tabular.containers import Table, TimeSeries -T = TypeVar('T', Table, TimeSeries) +T = TypeVar("T", Table, TimeSeries) class _InputConversion(Generic[T], ABC): diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index d88ad8b9b..b9251325e 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -1,8 +1,7 @@ from torch.utils.data import DataLoader -from safeds.ml.nn._input_conversion import _InputConversion - from safeds.data.tabular.containers import Table +from safeds.ml.nn._input_conversion import _InputConversion class InputConversionTable(_InputConversion[Table]): @@ -17,7 +16,9 @@ def _data_size(self) -> int: return len(self._feature_names) def _data_conversion_fit(self, input_data: Table, batch_size: int, num_of_classes: int = 1) -> DataLoader: - return input_data.tag_columns(self._target_name, self._feature_names)._into_dataloader_with_classes(batch_size, num_of_classes) + return input_data.tag_columns(self._target_name, self._feature_names)._into_dataloader_with_classes( + batch_size, num_of_classes, + ) def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader: return input_data._into_dataloader(batch_size) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index f90fb0f8d..1cf128015 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -1,8 +1,9 @@ from __future__ import annotations import copy -from typing import TYPE_CHECKING, Self, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, Self, TypeVar +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.exceptions import ( ClosedBound, InputSizeError, @@ -11,8 +12,6 @@ TestTrainDataMismatchError, ) -from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable - if TYPE_CHECKING: from collections.abc import Callable @@ -27,7 +26,9 @@ class NeuralNetworkRegressor(Generic[IT, OT]): - def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): + def __init__( + self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT], + ): self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) self._output_conversion: _OutputConversion[IT, OT] = output_conversion @@ -174,7 +175,9 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier(Generic[IT, OT]): - def __init__(self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT]): + def __init__( + self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT], + ): self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) self._output_conversion: _OutputConversion[IT, OT] = output_conversion @@ -239,7 +242,9 @@ def fit( copied_model._batch_size = batch_size - dataloader = copied_model._input_conversion._data_conversion_fit(train_data, copied_model._batch_size, copied_model._num_of_classes) + dataloader = copied_model._input_conversion._data_conversion_fit( + train_data, copied_model._batch_size, copied_model._num_of_classes, + ) if copied_model._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 7db3dfa6e..6b22a5edb 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,9 +1,9 @@ from abc import ABC, abstractmethod -from typing import TypeVar, Generic +from typing import Generic, TypeVar from torch import Tensor -from safeds.data.tabular.containers import TaggedTable, TimeSeries, Table +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries IT = TypeVar("IT", Table, TimeSeries) OT = TypeVar("OT", TaggedTable, TimeSeries) diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index d99f925fe..4acb84ce1 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,6 +1,6 @@ from torch import Tensor -from safeds.data.tabular.containers import TaggedTable, Table, Column +from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.ml.nn._output_conversion import _OutputConversion @@ -11,4 +11,6 @@ def __init__(self, prediction_name: str = "prediction") -> None: self._prediction_name = prediction_name def _data_conversion(self, input_data: Table, output_data: Tensor) -> TaggedTable: - return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns(self._prediction_name) + return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns( + self._prediction_name, + ) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index db94ec61c..fd1dacc94 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,8 +1,13 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError -from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor, InputConversionTable, \ - OutputConversionTable +from safeds.ml.nn import ( + ForwardLayer, + InputConversionTable, + NeuralNetworkClassifier, + NeuralNetworkRegressor, + OutputConversionTable, +) class TestClassificationModel: @@ -18,8 +23,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(1, 1)], - OutputConversionTable()).fit( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(1, 1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, ) @@ -36,8 +42,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()).fit( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, ) @@ -46,7 +53,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], - OutputConversionTable() + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [0]}), ) @@ -64,7 +71,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz fitted_model = NeuralNetworkClassifier( InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], - OutputConversionTable() + OutputConversionTable(), ).fit( Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}), batch_size=batch_size, @@ -87,7 +94,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ fitted_model = NeuralNetworkClassifier( InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], - OutputConversionTable() + OutputConversionTable(), ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), batch_size=batch_size, @@ -97,14 +104,16 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()).predict( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}), @@ -112,9 +121,11 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se assert model.is_fitted def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), - [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), @@ -122,9 +133,11 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classificatio assert model.is_fitted def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), - [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), ) @@ -137,9 +150,11 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b", "c"], "a"), - [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b", "c"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) with pytest.raises( InputSizeError, ): @@ -148,8 +163,9 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) class Test: self.was_called = False @@ -167,8 +183,9 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkClassifier(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) class Test: self.was_called = False @@ -199,8 +216,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()).fit( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, ) @@ -217,8 +235,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()).fit( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, ) @@ -232,8 +251,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None ids=["one", "two"], ) def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), - [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).fit( + fitted_model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, ) @@ -248,7 +268,9 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i ids=["one", "two"], ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).fit( + fitted_model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, ) @@ -257,12 +279,16 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()).predict( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: - model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"),[ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}), @@ -270,7 +296,9 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: assert model.is_fitted def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), ) @@ -283,7 +311,11 @@ def test_should_raise_if_test_and_train_data_mismatch(self) -> None: ) def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkRegressor(InputConversionTable(["b", "c"], "a"), [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], OutputConversionTable()) + model = NeuralNetworkRegressor( + InputConversionTable(["b", "c"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) with pytest.raises( InputSizeError, ): @@ -292,7 +324,9 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable()) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) class Test: self.was_called = False @@ -310,8 +344,9 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkRegressor(InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], - OutputConversionTable()) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + ) class Test: self.was_called = False From 05e0703343a95b1363d05f1cef2cf8d7578d8ca3 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:36:51 +0000 Subject: [PATCH 55/63] style: apply automated linter fixes --- src/safeds/ml/nn/_input_conversion_table.py | 3 +- src/safeds/ml/nn/_model.py | 14 +++-- tests/safeds/ml/nn/test_model.py | 60 +++++++++++++++------ 3 files changed, 58 insertions(+), 19 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index b9251325e..03d67e9b9 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -17,7 +17,8 @@ def _data_size(self) -> int: def _data_conversion_fit(self, input_data: Table, batch_size: int, num_of_classes: int = 1) -> DataLoader: return input_data.tag_columns(self._target_name, self._feature_names)._into_dataloader_with_classes( - batch_size, num_of_classes, + batch_size, + num_of_classes, ) def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader: diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 1cf128015..73e3d9ecc 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -27,7 +27,10 @@ class NeuralNetworkRegressor(Generic[IT, OT]): def __init__( - self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT], + self, + input_conversion: _InputConversion[IT], + layers: list[_Layer], + output_conversion: _OutputConversion[IT, OT], ): self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) @@ -176,7 +179,10 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier(Generic[IT, OT]): def __init__( - self, input_conversion: _InputConversion[IT], layers: list[_Layer], output_conversion: _OutputConversion[IT, OT], + self, + input_conversion: _InputConversion[IT], + layers: list[_Layer], + output_conversion: _OutputConversion[IT, OT], ): self._input_conversion: _InputConversion[IT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) @@ -243,7 +249,9 @@ def fit( copied_model._batch_size = batch_size dataloader = copied_model._input_conversion._data_conversion_fit( - train_data, copied_model._batch_size, copied_model._num_of_classes, + train_data, + copied_model._batch_size, + copied_model._num_of_classes, ) if copied_model._num_of_classes > 1: diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index fd1dacc94..8a323587f 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -24,7 +24,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(1, 1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(1, 1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, @@ -43,7 +45,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, @@ -105,14 +109,18 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: model = NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) assert not model.is_fitted model = model.fit( @@ -164,7 +172,9 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) class Test: @@ -184,7 +194,9 @@ def callback_was_called(self) -> bool: def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: model = NeuralNetworkClassifier( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) class Test: @@ -217,7 +229,9 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [2]}), epoch_size=epoch_size, @@ -236,7 +250,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [2]}), batch_size=batch_size, @@ -252,7 +268,9 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None ) def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, @@ -269,7 +287,9 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), batch_size=batch_size, @@ -280,14 +300,18 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) assert not model.is_fitted model = model.fit( @@ -297,7 +321,9 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: def test_should_raise_if_test_and_train_data_mismatch(self) -> None: model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), @@ -325,7 +351,9 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) class Test: @@ -345,7 +373,9 @@ def callback_was_called(self) -> bool: def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: model = NeuralNetworkRegressor( - InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), ) class Test: From f4a69de336ca5cb61a37194f3384c9e97e67b2c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 17 Apr 2024 19:50:23 +0200 Subject: [PATCH 56/63] refactor: lazy imports --- src/safeds/ml/nn/_forward_layer.py | 13 +++++++------ src/safeds/ml/nn/_input_conversion.py | 6 ++++-- src/safeds/ml/nn/_input_conversion_table.py | 6 +++++- src/safeds/ml/nn/_layer.py | 6 +++++- src/safeds/ml/nn/_output_conversion.py | 7 ++++++- src/safeds/ml/nn/_output_conversion_table.py | 6 +++++- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 6118348e0..4299f341a 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,20 +1,21 @@ -from torch import Tensor, nn +from torch import Tensor +from torch.nn import Module, Linear, Sigmoid, ReLU, Softmax from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.nn._layer import _Layer -class _InternalLayer(nn.Module): +class _InternalLayer(Module): def __init__(self, input_size: int, output_size: int, activation_function: str): super().__init__() - self._layer = nn.Linear(input_size, output_size) + self._layer = Linear(input_size, output_size) match activation_function: case "sigmoid": - self._fn = nn.Sigmoid() + self._fn = Sigmoid() case "relu": - self._fn = nn.ReLU() + self._fn = ReLU() case "softmax": - self._fn = nn.Softmax() + self._fn = Softmax() case _: raise ValueError("Unknown Activation Function: " + activation_function) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 58aa2646e..eac0cb0d6 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,7 +1,9 @@ +from __future__ import annotations from abc import ABC, abstractmethod -from typing import Generic, TypeVar +from typing import Generic, TypeVar, TYPE_CHECKING -from torch.utils.data import DataLoader +if TYPE_CHECKING: + from torch.utils.data import DataLoader from safeds.data.tabular.containers import Table, TimeSeries diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index 03d67e9b9..577890ad9 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -1,4 +1,8 @@ -from torch.utils.data import DataLoader +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch.utils.data import DataLoader from safeds.data.tabular.containers import Table from safeds.ml.nn._input_conversion import _InputConversion diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 9b1e28184..36f653a50 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -1,6 +1,10 @@ +from __future__ import annotations + from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -from torch import nn +if TYPE_CHECKING: + from torch import nn class _Layer(ABC): diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 6b22a5edb..d92c66726 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,7 +1,12 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Generic, TypeVar -from torch import Tensor +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch import Tensor from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index 4acb84ce1..fe1bc6e41 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,4 +1,8 @@ -from torch import Tensor +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch import Tensor from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.ml.nn._output_conversion import _OutputConversion From 8e0228c2c076bbc1062d1f23498619298694e8e6 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:52:02 +0000 Subject: [PATCH 57/63] style: apply automated linter fixes --- src/safeds/ml/nn/_forward_layer.py | 2 +- src/safeds/ml/nn/_input_conversion.py | 3 ++- src/safeds/ml/nn/_input_conversion_table.py | 1 + src/safeds/ml/nn/_output_conversion.py | 4 +--- src/safeds/ml/nn/_output_conversion_table.py | 1 + 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 4299f341a..d05dcad45 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,5 +1,5 @@ from torch import Tensor -from torch.nn import Module, Linear, Sigmoid, ReLU, Softmax +from torch.nn import Linear, Module, ReLU, Sigmoid, Softmax from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.nn._layer import _Layer diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index eac0cb0d6..c59d47187 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,6 +1,7 @@ from __future__ import annotations + from abc import ABC, abstractmethod -from typing import Generic, TypeVar, TYPE_CHECKING +from typing import TYPE_CHECKING, Generic, TypeVar if TYPE_CHECKING: from torch.utils.data import DataLoader diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index 577890ad9..8afcb0481 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -1,4 +1,5 @@ from __future__ import annotations + from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index d92c66726..17034b652 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,9 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Generic, TypeVar - -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Generic, TypeVar if TYPE_CHECKING: from torch import Tensor diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index fe1bc6e41..c4a50b337 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,4 +1,5 @@ from __future__ import annotations + from typing import TYPE_CHECKING if TYPE_CHECKING: From d02f03bfa6613f5d8d98588142083e983fbbf789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 17 Apr 2024 20:09:47 +0200 Subject: [PATCH 58/63] refactor: non global internal model creation --- src/safeds/ml/nn/_forward_layer.py | 47 ++++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 4299f341a..e5f745e91 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,26 +1,35 @@ -from torch import Tensor -from torch.nn import Module, Linear, Sigmoid, ReLU, Softmax +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.nn._layer import _Layer -class _InternalLayer(Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = Linear(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = Sigmoid() - case "relu": - self._fn = ReLU() - case "softmax": - self._fn = Softmax() - case _: - raise ValueError("Unknown Activation Function: " + activation_function) +def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: + from torch import nn + + class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + self._layer = nn.Linear(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) + return _InternalLayer(input_size, output_size, activation_function) class ForwardLayer(_Layer): @@ -48,8 +57,8 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, activation_function: str) -> _InternalLayer: - return _InternalLayer(self._input_size, self._output_size, activation_function) + def _get_internal_layer(self, activation_function: str) -> nn.Module: + return _create_internal_model(self._input_size, self._output_size, activation_function) @property def input_size(self) -> int: From ac631d8bc7de1a2971026d883f1c998255d69f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 18 Apr 2024 11:51:22 +0200 Subject: [PATCH 59/63] feat: Added predict type to `InputConversion` feat: Raise error if nn is fitted on mismatching data refactor: renamed `TestTrainDataMismatchError` to `FeatureDataMismatchError` --- src/safeds/exceptions/__init__.py | 6 +- src/safeds/exceptions/_ml.py | 6 +- src/safeds/ml/nn/_input_conversion.py | 15 ++-- src/safeds/ml/nn/_input_conversion_table.py | 13 ++-- src/safeds/ml/nn/_model.py | 47 ++++++------ tests/safeds/ml/nn/test_model.py | 80 ++++++++++++++------- 6 files changed, 104 insertions(+), 63 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 96ee06c40..af4fba4c9 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -32,12 +32,12 @@ DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, + FeatureDataMismatchError, InputSizeError, LearningError, ModelNotFittedError, NonTimeSeriesError, PredictionError, - TestTrainDataMismatchError, UntaggedTableError, ) @@ -66,12 +66,12 @@ "DatasetContainsTargetError": "._ml:DatasetContainsTargetError", "DatasetMissesDataError": "._ml:DatasetMissesDataError", "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError", + "FeatureDataMismatchError": "._ml:FeatureDataMismatchError", "InputSizeError": "._ml:InputSizeError", "LearningError": "._ml:LearningError", "ModelNotFittedError": "._ml:ModelNotFittedError", "NonTimeSeriesError": "._ml:NonTimeSeriesError", "PredictionError": "._ml:PredictionError", - "TestTrainDataMismatchError": "._ml:TestTrainDataMismatchError", "UntaggedTableError": "._ml:UntaggedTableError", # Other "Bound": "._generic:Bound", @@ -103,12 +103,12 @@ "DatasetContainsTargetError", "DatasetMissesDataError", "DatasetMissesFeaturesError", + "FeatureDataMismatchError", "InputSizeError", "LearningError", "ModelNotFittedError", "NonTimeSeriesError", "PredictionError", - "TestTrainDataMismatchError", "UntaggedTableError", # Other "Bound", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 4512bd34f..68063cff0 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -68,12 +68,12 @@ def __init__(self, reason: str): super().__init__(f"Error occurred while predicting: {reason}") -class TestTrainDataMismatchError(Exception): - """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data.""" +class FeatureDataMismatchError(Exception): + """Raised when the columns of the table passed to the predict or fit method do not match with the specified features of the neural network.""" def __init__(self) -> None: super().__init__( - "The column names in the test table do not match with the feature columns names of the training data.", + "The features in the given table do not match with the specified feature columns names of the neural network.", ) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index c59d47187..bc4274b6b 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -8,10 +8,11 @@ from safeds.data.tabular.containers import Table, TimeSeries -T = TypeVar("T", Table, TimeSeries) +FT = TypeVar("FT", Table, TimeSeries) +PT = TypeVar("PT", Table, TimeSeries) -class _InputConversion(Generic[T], ABC): +class _InputConversion(Generic[FT, PT], ABC): """The input conversion for a neural network, defines the input parameters for the neural network.""" @property @@ -20,13 +21,17 @@ def _data_size(self) -> int: pass # pragma: no cover @abstractmethod - def _data_conversion_fit(self, input_data: T, batch_size: int, num_of_classes: int = 1) -> DataLoader: + def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader: pass # pragma: no cover @abstractmethod - def _data_conversion_predict(self, input_data: T, batch_size: int) -> DataLoader: + def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader: pass # pragma: no cover @abstractmethod - def _is_data_valid(self, input_data: T) -> bool: + def _is_fit_data_valid(self, input_data: FT) -> bool: + pass # pragma: no cover + + @abstractmethod + def _is_predict_data_valid(self, input_data: PT) -> bool: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index 8afcb0481..553a5edbf 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -5,11 +5,11 @@ if TYPE_CHECKING: from torch.utils.data import DataLoader -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Table, TaggedTable from safeds.ml.nn._input_conversion import _InputConversion -class InputConversionTable(_InputConversion[Table]): +class InputConversionTable(_InputConversion[TaggedTable, Table]): """The input conversion for a neural network, defines the input parameters for the neural network.""" def __init__(self, feature_names: list[str], target_name: str) -> None: @@ -20,8 +20,8 @@ def __init__(self, feature_names: list[str], target_name: str) -> None: def _data_size(self) -> int: return len(self._feature_names) - def _data_conversion_fit(self, input_data: Table, batch_size: int, num_of_classes: int = 1) -> DataLoader: - return input_data.tag_columns(self._target_name, self._feature_names)._into_dataloader_with_classes( + def _data_conversion_fit(self, input_data: TaggedTable, batch_size: int, num_of_classes: int = 1) -> DataLoader: + return input_data._into_dataloader_with_classes( batch_size, num_of_classes, ) @@ -29,5 +29,8 @@ def _data_conversion_fit(self, input_data: Table, batch_size: int, num_of_classe def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader: return input_data._into_dataloader(batch_size) - def _is_data_valid(self, input_data: Table) -> bool: + def _is_fit_data_valid(self, input_data: TaggedTable) -> bool: + return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names)) + + def _is_predict_data_valid(self, input_data: Table) -> bool: return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names)) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 73e3d9ecc..7b98b446a 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -9,7 +9,7 @@ InputSizeError, ModelNotFittedError, OutOfBoundsError, - TestTrainDataMismatchError, + FeatureDataMismatchError, ) if TYPE_CHECKING: @@ -21,20 +21,21 @@ from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion -IT = TypeVar("IT", Table, TimeSeries) -OT = TypeVar("OT", TaggedTable, TimeSeries) +IFT = TypeVar("IFT", Table, TimeSeries) # InputFitType +IPT = TypeVar("IPT", Table, TimeSeries) # InputPredictType +OT = TypeVar("OT", TaggedTable, TimeSeries) # OutputType -class NeuralNetworkRegressor(Generic[IT, OT]): +class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): def __init__( self, - input_conversion: _InputConversion[IT], + input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], - output_conversion: _OutputConversion[IT, OT], + output_conversion: _OutputConversion[IPT, OT], ): - self._input_conversion: _InputConversion[IT] = input_conversion + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) - self._output_conversion: _OutputConversion[IT, OT] = output_conversion + self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -43,7 +44,7 @@ def __init__( def fit( self, - train_data: IT, + train_data: IFT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -90,6 +91,8 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) if self._input_conversion._data_size is not self._input_size: raise InputSizeError(self._input_conversion._data_size, self._input_size) + if not self._input_conversion._is_fit_data_valid(train_data): + raise FeatureDataMismatchError copied_model = copy.deepcopy(self) @@ -129,7 +132,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: IT) -> OT: + def predict(self, test_data: IPT) -> OT: """ Make a prediction for the given test data. @@ -154,8 +157,8 @@ def predict(self, test_data: IT) -> OT: if not self._is_fitted: raise ModelNotFittedError - if not self._input_conversion._is_data_valid(test_data): - raise TestTrainDataMismatchError + if not self._input_conversion._is_predict_data_valid(test_data): + raise FeatureDataMismatchError dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] with torch.no_grad(): @@ -177,16 +180,16 @@ def is_fitted(self) -> bool: return self._is_fitted -class NeuralNetworkClassifier(Generic[IT, OT]): +class NeuralNetworkClassifier(Generic[IFT, IPT, OT]): def __init__( self, - input_conversion: _InputConversion[IT], + input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], - output_conversion: _OutputConversion[IT, OT], + output_conversion: _OutputConversion[IPT, OT], ): - self._input_conversion: _InputConversion[IT] = input_conversion + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) - self._output_conversion: _OutputConversion[IT, OT] = output_conversion + self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -196,7 +199,7 @@ def __init__( def fit( self, - train_data: IT, + train_data: IFT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -243,6 +246,8 @@ def fit( raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) if self._input_conversion._data_size is not self._input_size: raise InputSizeError(self._input_conversion._data_size, self._input_size) + if not self._input_conversion._is_fit_data_valid(train_data): + raise FeatureDataMismatchError copied_model = copy.deepcopy(self) @@ -289,7 +294,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: IT) -> OT: + def predict(self, test_data: IPT) -> OT: """ Make a prediction for the given test data. @@ -314,8 +319,8 @@ def predict(self, test_data: IT) -> OT: if not self._is_fitted: raise ModelNotFittedError - if not self._input_conversion._is_data_valid(test_data): - raise TestTrainDataMismatchError + if not self._input_conversion._is_predict_data_valid(test_data): + raise FeatureDataMismatchError dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] with torch.no_grad(): diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 8a323587f..2cd317f50 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError +from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, FeatureDataMismatchError from safeds.ml.nn import ( ForwardLayer, InputConversionTable, @@ -28,7 +28,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None [ForwardLayer(1, 1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1], "b": [2]}), + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -49,7 +49,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1], "b": [2]}), + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) @@ -59,7 +59,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1], "b": [0]}), + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert isinstance(fitted_model, NeuralNetworkClassifier) @@ -77,7 +77,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}), + Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}).tag_columns("a"), batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [1, 0]})) @@ -124,7 +124,7 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se ) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1], "b": [0]}), + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert model.is_fitted @@ -136,27 +136,41 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classificatio ) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) assert model.is_fitted - def test_should_raise_if_test_and_train_data_mismatch(self) -> None: + def test_should_raise_if_test_features_mismatch(self) -> None: model = NeuralNetworkClassifier( InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], OutputConversionTable(), ) model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) with pytest.raises( - TestTrainDataMismatchError, - match="The column names in the test table do not match with the feature columns names of the training data.", + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_train_features_mismatch(self) -> None: + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) + with pytest.raises( + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("b"), + ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: model = NeuralNetworkClassifier( InputConversionTable(["b", "c"], "a"), @@ -167,7 +181,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: InputSizeError, ): model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: @@ -188,7 +202,7 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_batch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) assert obj.callback_was_called() is True @@ -210,7 +224,7 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_epoch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) assert obj.callback_was_called() is True @@ -233,7 +247,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1], "b": [2]}), + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -254,7 +268,7 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1], "b": [2]}), + Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) @@ -272,7 +286,7 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), batch_size=batch_size, ) assert isinstance(fitted_model, NeuralNetworkRegressor) @@ -291,7 +305,7 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ).fit( - Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}), + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [5, 6, 7]})) @@ -315,27 +329,41 @@ def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: ) assert not model.is_fitted model = model.fit( - Table.from_dict({"a": [1], "b": [0]}), + Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert model.is_fitted - def test_should_raise_if_test_and_train_data_mismatch(self) -> None: + def test_should_raise_if_test_features_mismatch(self) -> None: model = NeuralNetworkRegressor( InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=1)], OutputConversionTable(), ) model = model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) with pytest.raises( - TestTrainDataMismatchError, - match="The column names in the test table do not match with the feature columns names of the training data.", + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_train_features_mismatch(self) -> None: + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) + with pytest.raises( + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("b"), + ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: model = NeuralNetworkRegressor( InputConversionTable(["b", "c"], "a"), @@ -346,7 +374,7 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: InputSizeError, ): model.fit( - Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}), + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: @@ -367,7 +395,7 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_batch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_batch_completion=obj.cb) assert obj.callback_was_called() is True @@ -389,6 +417,6 @@ def callback_was_called(self) -> bool: return self.was_called obj = Test() - model.fit(Table.from_dict({"a": [1], "b": [0]}), callback_on_epoch_completion=obj.cb) + model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) assert obj.callback_was_called() is True From 53b2b82657473de1aad7e3849c3fe2e04b8aa50e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 18 Apr 2024 11:58:14 +0200 Subject: [PATCH 60/63] refactor: changed `TypeVar` to match correct classes --- src/safeds/ml/nn/_input_conversion.py | 4 ++-- src/safeds/ml/nn/_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index bc4274b6b..67484363c 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -6,9 +6,9 @@ if TYPE_CHECKING: from torch.utils.data import DataLoader -from safeds.data.tabular.containers import Table, TimeSeries +from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable -FT = TypeVar("FT", Table, TimeSeries) +FT = TypeVar("FT", TaggedTable, TimeSeries) PT = TypeVar("PT", Table, TimeSeries) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 7b98b446a..2450a7c95 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -21,7 +21,7 @@ from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion -IFT = TypeVar("IFT", Table, TimeSeries) # InputFitType +IFT = TypeVar("IFT", TaggedTable, TimeSeries) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries) # InputPredictType OT = TypeVar("OT", TaggedTable, TimeSeries) # OutputType From bea4d1ae85e2adfda90956a062977a76ec08d310 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:59:48 +0000 Subject: [PATCH 61/63] style: apply automated linter fixes --- src/safeds/ml/nn/_input_conversion.py | 2 +- src/safeds/ml/nn/_model.py | 2 +- tests/safeds/ml/nn/test_model.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 67484363c..8e60e8bdb 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from torch.utils.data import DataLoader -from safeds.data.tabular.containers import Table, TimeSeries, TaggedTable +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries FT = TypeVar("FT", TaggedTable, TimeSeries) PT = TypeVar("PT", Table, TimeSeries) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2450a7c95..0f862e4b6 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -6,10 +6,10 @@ from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.exceptions import ( ClosedBound, + FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError, - FeatureDataMismatchError, ) if TYPE_CHECKING: diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 2cd317f50..2c43739a8 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, FeatureDataMismatchError +from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError from safeds.ml.nn import ( ForwardLayer, InputConversionTable, From 9719467d3e2ce905fd0ae12f20394b7674293b58 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 Date: Thu, 18 Apr 2024 12:29:36 +0200 Subject: [PATCH 62/63] added documentation --- src/safeds/ml/nn/_input_conversion_table.py | 10 ++++++++++ src/safeds/ml/nn/_output_conversion_table.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index 553a5edbf..df9672100 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -13,6 +13,16 @@ class InputConversionTable(_InputConversion[TaggedTable, Table]): """The input conversion for a neural network, defines the input parameters for the neural network.""" def __init__(self, feature_names: list[str], target_name: str) -> None: + """ + The input conversion for a neural network, defines the input parameters for the neural network. + + Parameters + ---------- + feature_names + The names of the features for the input table, used as features for the training. + target_name + The name of the target for the input table, used as target for the training. + """ self._feature_names = feature_names self._target_name = target_name diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index c4a50b337..9776e0f85 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -13,6 +13,14 @@ class OutputConversionTable(_OutputConversion[Table, TaggedTable]): """The output conversion for a neural network, defines the output parameters for the neural network.""" def __init__(self, prediction_name: str = "prediction") -> None: + """ + The output conversion for a neural network, defines the output parameters for the neural network. + + Parameters + ---------- + prediction_name + The name of the new column where the prediction will be stored. + """ self._prediction_name = prediction_name def _data_conversion(self, input_data: Table, output_data: Tensor) -> TaggedTable: From 3ecc3c7377623a5a29437b02efbb3661775c2879 Mon Sep 17 00:00:00 2001 From: Gerhardsa0 Date: Thu, 18 Apr 2024 12:35:05 +0200 Subject: [PATCH 63/63] Linter changes --- src/safeds/ml/nn/_input_conversion_table.py | 2 +- src/safeds/ml/nn/_output_conversion_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index df9672100..9b57a397a 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -14,7 +14,7 @@ class InputConversionTable(_InputConversion[TaggedTable, Table]): def __init__(self, feature_names: list[str], target_name: str) -> None: """ - The input conversion for a neural network, defines the input parameters for the neural network. + Define the input parameters for the neural network in the input conversion. Parameters ---------- diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index 9776e0f85..1b56988e1 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -14,7 +14,7 @@ class OutputConversionTable(_OutputConversion[Table, TaggedTable]): def __init__(self, prediction_name: str = "prediction") -> None: """ - The output conversion for a neural network, defines the output parameters for the neural network. + Define the output parameters for the neural network in the output conversion. Parameters ----------