diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6ca27ab73..7c0271c7e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -15,7 +15,7 @@ from collections.abc import Callable, Mapping, Sequence from typing import Any - import numpy as np + import torch from torch import Tensor from torch.utils.data import DataLoader, Dataset @@ -916,7 +916,7 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> ) -def _create_dataset(features: np.array, target: np.array) -> Dataset: +def _create_dataset(features: Tensor, target: Tensor) -> Dataset: import torch from torch.utils.data import Dataset diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 96ee06c40..af4fba4c9 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -32,12 +32,12 @@ DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, + FeatureDataMismatchError, InputSizeError, LearningError, ModelNotFittedError, NonTimeSeriesError, PredictionError, - TestTrainDataMismatchError, UntaggedTableError, ) @@ -66,12 +66,12 @@ "DatasetContainsTargetError": "._ml:DatasetContainsTargetError", "DatasetMissesDataError": "._ml:DatasetMissesDataError", "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError", + "FeatureDataMismatchError": "._ml:FeatureDataMismatchError", "InputSizeError": "._ml:InputSizeError", "LearningError": "._ml:LearningError", "ModelNotFittedError": "._ml:ModelNotFittedError", "NonTimeSeriesError": "._ml:NonTimeSeriesError", "PredictionError": "._ml:PredictionError", - "TestTrainDataMismatchError": "._ml:TestTrainDataMismatchError", "UntaggedTableError": "._ml:UntaggedTableError", # Other "Bound": "._generic:Bound", @@ -103,12 +103,12 @@ "DatasetContainsTargetError", "DatasetMissesDataError", "DatasetMissesFeaturesError", + "FeatureDataMismatchError", "InputSizeError", "LearningError", "ModelNotFittedError", "NonTimeSeriesError", "PredictionError", - "TestTrainDataMismatchError", "UntaggedTableError", # Other "Bound", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 4512bd34f..68063cff0 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -68,12 +68,12 @@ def __init__(self, reason: str): super().__init__(f"Error occurred while predicting: {reason}") -class TestTrainDataMismatchError(Exception): - """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data.""" +class FeatureDataMismatchError(Exception): + """Raised when the columns of the table passed to the predict or fit method do not match with the specified features of the neural network.""" def __init__(self) -> None: super().__init__( - "The column names in the test table do not match with the feature columns names of the training data.", + "The features in the given table do not match with the specified feature columns names of the neural network.", ) diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 6334e0d12..6158e640d 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -6,12 +6,16 @@ if TYPE_CHECKING: from ._forward_layer import ForwardLayer + from ._input_conversion_table import InputConversionTable from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor + from ._output_conversion_table import OutputConversionTable apipkg.initpkg( __name__, { "ForwardLayer": "._forward_layer:ForwardLayer", + "InputConversionTable": "._input_conversion_table:InputConversionTable", + "OutputConversionTable": "._output_conversion_table:OutputConversionTable", "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier", "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor", }, @@ -19,6 +23,8 @@ __all__ = [ "ForwardLayer", + "InputConversionTable", + "OutputConversionTable", "NeuralNetworkClassifier", "NeuralNetworkRegressor", ] diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 8164f9e6c..e5f745e91 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,28 +1,38 @@ -from torch import Tensor, nn +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError -from safeds.ml.nn._layer import Layer +from safeds.ml.nn._layer import _Layer + + +def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: + from torch import nn + class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + self._layer = nn.Linear(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case _: + raise ValueError("Unknown Activation Function: " + activation_function) -class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = nn.Linear(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - case _: - raise ValueError("Unknown Activation Function: " + activation_function) + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) + return _InternalLayer(input_size, output_size, activation_function) -class ForwardLayer(Layer): +class ForwardLayer(_Layer): def __init__(self, output_size: int, input_size: int | None = None): """ Create a FNN Layer. @@ -47,8 +57,8 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, activation_function: str) -> _InternalLayer: - return _InternalLayer(self._input_size, self._output_size, activation_function) + def _get_internal_layer(self, activation_function: str) -> nn.Module: + return _create_internal_model(self._input_size, self._output_size, activation_function) @property def input_size(self) -> int: diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py new file mode 100644 index 000000000..8e60e8bdb --- /dev/null +++ b/src/safeds/ml/nn/_input_conversion.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Generic, TypeVar + +if TYPE_CHECKING: + from torch.utils.data import DataLoader + +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries + +FT = TypeVar("FT", TaggedTable, TimeSeries) +PT = TypeVar("PT", Table, TimeSeries) + + +class _InputConversion(Generic[FT, PT], ABC): + """The input conversion for a neural network, defines the input parameters for the neural network.""" + + @property + @abstractmethod + def _data_size(self) -> int: + pass # pragma: no cover + + @abstractmethod + def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader: + pass # pragma: no cover + + @abstractmethod + def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader: + pass # pragma: no cover + + @abstractmethod + def _is_fit_data_valid(self, input_data: FT) -> bool: + pass # pragma: no cover + + @abstractmethod + def _is_predict_data_valid(self, input_data: PT) -> bool: + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py new file mode 100644 index 000000000..9b57a397a --- /dev/null +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch.utils.data import DataLoader + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.nn._input_conversion import _InputConversion + + +class InputConversionTable(_InputConversion[TaggedTable, Table]): + """The input conversion for a neural network, defines the input parameters for the neural network.""" + + def __init__(self, feature_names: list[str], target_name: str) -> None: + """ + Define the input parameters for the neural network in the input conversion. + + Parameters + ---------- + feature_names + The names of the features for the input table, used as features for the training. + target_name + The name of the target for the input table, used as target for the training. + """ + self._feature_names = feature_names + self._target_name = target_name + + @property + def _data_size(self) -> int: + return len(self._feature_names) + + def _data_conversion_fit(self, input_data: TaggedTable, batch_size: int, num_of_classes: int = 1) -> DataLoader: + return input_data._into_dataloader_with_classes( + batch_size, + num_of_classes, + ) + + def _data_conversion_predict(self, input_data: Table, batch_size: int) -> DataLoader: + return input_data._into_dataloader(batch_size) + + def _is_fit_data_valid(self, input_data: TaggedTable) -> bool: + return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names)) + + def _is_predict_data_valid(self, input_data: Table) -> bool: + return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names)) diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index a2ac00d87..36f653a50 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -1,9 +1,13 @@ +from __future__ import annotations + from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -from torch import nn +if TYPE_CHECKING: + from torch import nn -class Layer(ABC): +class _Layer(ABC): @abstractmethod def __init__(self) -> None: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index b94669096..0f862e4b6 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -1,15 +1,15 @@ from __future__ import annotations import copy -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING, Generic, Self, TypeVar -from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.exceptions import ( ClosedBound, + FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError, - TestTrainDataMismatchError, ) if TYPE_CHECKING: @@ -17,22 +17,34 @@ from torch import Tensor, nn - from safeds.ml.nn._layer import Layer + from safeds.ml.nn._input_conversion import _InputConversion + from safeds.ml.nn._layer import _Layer + from safeds.ml.nn._output_conversion import _OutputConversion +IFT = TypeVar("IFT", TaggedTable, TimeSeries) # InputFitType +IPT = TypeVar("IPT", Table, TimeSeries) # InputPredictType +OT = TypeVar("OT", TaggedTable, TimeSeries) # OutputType -class NeuralNetworkRegressor: - def __init__(self, layers: list[Layer]): + +class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): + def __init__( + self, + input_conversion: _InputConversion[IFT, IPT], + layers: list[_Layer], + output_conversion: _OutputConversion[IPT, OT], + ): + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=False) + self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 def fit( self, - train_data: TaggedTable, + train_data: IFT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -77,15 +89,16 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) - if train_data.features.number_of_columns is not self._input_size: - raise InputSizeError(train_data.features.number_of_columns, self._input_size) + if self._input_conversion._data_size is not self._input_size: + raise InputSizeError(self._input_conversion._data_size, self._input_size) + if not self._input_conversion._is_fit_data_valid(train_data): + raise FeatureDataMismatchError copied_model = copy.deepcopy(self) - copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, 1) + dataloader = copied_model._input_conversion._data_conversion_fit(train_data, copied_model._batch_size) loss_fn = nn.MSELoss() @@ -119,7 +132,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: Table) -> TaggedTable: + def predict(self, test_data: IPT) -> OT: """ Make a prediction for the given test data. @@ -144,17 +157,15 @@ def predict(self, test_data: Table) -> TaggedTable: if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__( - sorted(self._feature_names) if self._feature_names is not None else None, - ): - raise TestTrainDataMismatchError - dataloader = test_data._into_dataloader(self._batch_size) + if not self._input_conversion._is_predict_data_valid(test_data): + raise FeatureDataMismatchError + dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] with torch.no_grad(): for x in dataloader: elem = self._model(x) - predictions += elem.squeeze(dim=1).tolist() - return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + predictions.append(elem.squeeze(dim=1)) + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property def is_fitted(self) -> bool: @@ -169,20 +180,26 @@ def is_fitted(self) -> bool: return self._is_fitted -class NeuralNetworkClassifier: - def __init__(self, layers: list[Layer]): +class NeuralNetworkClassifier(Generic[IFT, IPT, OT]): + def __init__( + self, + input_conversion: _InputConversion[IFT, IPT], + layers: list[_Layer], + output_conversion: _OutputConversion[IPT, OT], + ): + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(layers, is_for_classification=True) + self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False self._num_of_classes = layers[-1].output_size - self._feature_names: None | list[str] = None self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 def fit( self, - train_data: TaggedTable, + train_data: IFT, epoch_size: int = 25, batch_size: int = 1, learning_rate: float = 0.001, @@ -227,15 +244,20 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) - if train_data.features.number_of_columns is not self._input_size: - raise InputSizeError(train_data.features.number_of_columns, self._input_size) + if self._input_conversion._data_size is not self._input_size: + raise InputSizeError(self._input_conversion._data_size, self._input_size) + if not self._input_conversion._is_fit_data_valid(train_data): + raise FeatureDataMismatchError copied_model = copy.deepcopy(self) - copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) + dataloader = copied_model._input_conversion._data_conversion_fit( + train_data, + copied_model._batch_size, + copied_model._num_of_classes, + ) if copied_model._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() @@ -272,7 +294,7 @@ def fit( copied_model._model.eval() return copied_model - def predict(self, test_data: Table) -> TaggedTable: + def predict(self, test_data: IPT) -> OT: """ Make a prediction for the given test data. @@ -297,24 +319,18 @@ def predict(self, test_data: Table) -> TaggedTable: if not self._is_fitted: raise ModelNotFittedError - if not (sorted(test_data.column_names)).__eq__( - sorted(self._feature_names) if self._feature_names is not None else None, - ): - raise TestTrainDataMismatchError - dataloader = test_data._into_dataloader(self._batch_size) + if not self._input_conversion._is_predict_data_valid(test_data): + raise FeatureDataMismatchError + dataloader = self._input_conversion._data_conversion_predict(test_data, self._batch_size) predictions = [] with torch.no_grad(): for x in dataloader: elem = self._model(x) if self._num_of_classes > 1: - predictions += torch.argmax(elem, dim=1).tolist() + predictions.append(torch.argmax(elem, dim=1)) else: - p = elem.squeeze().round().tolist() - if isinstance(p, float): - predictions.append(p) - else: - predictions += p - return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") + predictions.append(elem.squeeze(dim=1).round()) + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property def is_fitted(self) -> bool: @@ -329,11 +345,11 @@ def is_fitted(self) -> bool: return self._is_fitted -def _create_internal_model(layers: list[Layer], is_for_classification: bool) -> nn.Module: +def _create_internal_model(layers: list[_Layer], is_for_classification: bool) -> nn.Module: from torch import nn class _InternalModel(nn.Module): - def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: + def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: super().__init__() self._layer_list = layers diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py new file mode 100644 index 000000000..17034b652 --- /dev/null +++ b/src/safeds/ml/nn/_output_conversion.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Generic, TypeVar + +if TYPE_CHECKING: + from torch import Tensor + +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries + +IT = TypeVar("IT", Table, TimeSeries) +OT = TypeVar("OT", TaggedTable, TimeSeries) + + +class _OutputConversion(Generic[IT, OT], ABC): + """The output conversion for a neural network, defines the output parameters for the neural network.""" + + @abstractmethod + def _data_conversion(self, input_data: IT, output_data: Tensor) -> OT: + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py new file mode 100644 index 000000000..1b56988e1 --- /dev/null +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch import Tensor + +from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.ml.nn._output_conversion import _OutputConversion + + +class OutputConversionTable(_OutputConversion[Table, TaggedTable]): + """The output conversion for a neural network, defines the output parameters for the neural network.""" + + def __init__(self, prediction_name: str = "prediction") -> None: + """ + Define the output parameters for the neural network in the output conversion. + + Parameters + ---------- + prediction_name + The name of the new column where the prediction will be stored. + """ + self._prediction_name = prediction_name + + def _data_conversion(self, input_data: Table, output_data: Tensor) -> TaggedTable: + return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns( + self._prediction_name, + ) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 435af1912..2c43739a8 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,7 +1,13 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError -from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor +from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError +from safeds.ml.nn import ( + ForwardLayer, + InputConversionTable, + NeuralNetworkClassifier, + NeuralNetworkRegressor, + OutputConversionTable, +) class TestClassificationModel: @@ -17,7 +23,11 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([ForwardLayer(1, 1)]).fit( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(1, 1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -34,14 +44,20 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) @@ -57,7 +73,9 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}).tag_columns("a"), batch_size=batch_size, @@ -78,7 +96,9 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ batch_size: int, ) -> None: fitted_model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], + OutputConversionTable(), ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), batch_size=batch_size, @@ -88,12 +108,20 @@ def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_ def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), @@ -101,28 +129,54 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se assert model.is_fitted def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) assert model.is_fitted - def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + def test_should_raise_if_test_features_mismatch(self) -> None: + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) with pytest.raises( - TestTrainDataMismatchError, - match="The column names in the test table do not match with the feature columns names of the training data.", + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_train_features_mismatch(self) -> None: + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) + with pytest.raises( + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("b"), + ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkClassifier( + InputConversionTable(["b", "c"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) with pytest.raises( InputSizeError, ): @@ -131,7 +185,11 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) class Test: self.was_called = False @@ -149,7 +207,11 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) class Test: self.was_called = False @@ -180,7 +242,11 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -197,7 +263,11 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) @@ -211,7 +281,11 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None ids=["one", "two"], ) def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + fitted_model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), batch_size=batch_size, ) @@ -226,7 +300,11 @@ def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: i ids=["one", "two"], ) def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: - fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + fitted_model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).fit( Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), batch_size=batch_size, ) @@ -235,33 +313,63 @@ def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_siz def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert model.is_fitted - def test_should_raise_if_test_and_train_data_mismatch(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + def test_should_raise_if_test_features_mismatch(self) -> None: + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) with pytest.raises( - TestTrainDataMismatchError, - match="The column names in the test table do not match with the feature columns names of the training data.", + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", ): model.predict( Table.from_dict({"a": [1], "c": [2]}), ) + def test_should_raise_if_train_features_mismatch(self) -> None: + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) + with pytest.raises( + FeatureDataMismatchError, + match="The features in the given table do not match with the specified feature columns names of the neural network.", + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("b"), + ) + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = NeuralNetworkRegressor( + InputConversionTable(["b", "c"], "a"), + [ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)], + OutputConversionTable(), + ) with pytest.raises( InputSizeError, ): @@ -270,7 +378,11 @@ def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: ) def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) class Test: self.was_called = False @@ -288,7 +400,11 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor( + InputConversionTable(["b"], "a"), + [ForwardLayer(input_size=1, output_size=1)], + OutputConversionTable(), + ) class Test: self.was_called = False