From 7c956832aa9377e74864f5ada6decb346204fd93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 9 Jun 2023 15:48:59 +0200 Subject: [PATCH 01/11] feat: Added error messages for Classificators and Regressors to provide feedback if the data cannot be used. feat: Added `NonNumericColumnError` and `DatasetMissesDataError` feat: Added possibility to write help messages in `NonNumericColumnError` and `MissingValuesColumnError` Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- src/safeds/exceptions/__init__.py | 4 + src/safeds/exceptions/_data.py | 13 +- src/safeds/exceptions/_ml.py | 7 ++ src/safeds/ml/classical/_util_sklearn.py | 48 +++++++ .../ml/classical/classification/_ada_boost.py | 14 +++ .../classification/_decision_tree.py | 14 +++ .../classification/_gradient_boosting.py | 14 +++ .../classification/_k_nearest_neighbors.py | 17 +++ .../classification/_logistic_regression.py | 14 +++ .../classification/_random_forest.py | 14 +++ .../classification/_support_vector_machine.py | 14 +++ .../ml/classical/regression/_ada_boost.py | 14 +++ .../ml/classical/regression/_decision_tree.py | 14 +++ .../regression/_elastic_net_regression.py | 14 +++ .../regression/_gradient_boosting.py | 14 +++ .../regression/_k_nearest_neighbors.py | 17 +++ .../classical/regression/_lasso_regression.py | 14 +++ .../regression/_linear_regression.py | 14 +++ .../ml/classical/regression/_random_forest.py | 14 +++ .../classical/regression/_ridge_regression.py | 14 +++ .../regression/_support_vector_machine.py | 14 +++ .../containers/_table/test_plot_boxplots.py | 2 +- .../classification/test_classifier.py | 117 ++++++++++++++---- .../ml/classical/regression/test_regressor.py | 117 ++++++++++++++---- 24 files changed, 497 insertions(+), 55 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 48af59135..91da245d5 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -11,6 +11,7 @@ UnknownColumnNameError, ValueNotPresentWhenFittedError, WrongFileExtensionError, + MissingValuesColumnError, ) from safeds.exceptions._ml import ( DatasetContainsTargetError, @@ -19,6 +20,7 @@ ModelNotFittedError, PredictionError, UntaggedTableError, + DatasetMissesDataError, ) __all__ = [ @@ -33,6 +35,7 @@ "UnknownColumnNameError", "ValueNotPresentWhenFittedError", "WrongFileExtensionError", + "MissingValuesColumnError", # ML exceptions "DatasetContainsTargetError", "DatasetMissesFeaturesError", @@ -40,4 +43,5 @@ "ModelNotFittedError", "PredictionError", "UntaggedTableError", + "DatasetMissesDataError", ] diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 26e402c1c..b996f972e 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -23,8 +23,17 @@ def __init__(self, column_names: list[str]): class NonNumericColumnError(Exception): """Exception raised for trying to do numerical operations on a non-numerical column.""" - def __init__(self, column_info: str) -> None: - super().__init__(f"Tried to do a numerical operation on one or multiple non numerical Columns: \n{column_info}") + def __init__(self, column_info: str, help_msg: str | None = None) -> None: + line_break = "\n" + super().__init__(f"Tried to do a numerical operation on one or multiple non-numerical columns: \n{column_info}{line_break + help_msg if help_msg is not None else ''}") + + +class MissingValuesColumnError(Exception): + """Exception raised for trying to do operations on a columns containing missing values.""" + + def __init__(self, column_info: str, help_msg: str | None = None) -> None: + line_break = "\n" + super().__init__(f"Tried to do an operation on one or multiple columns containing missing values: \n{column_info}{line_break + help_msg if help_msg is not None else ''}") class DuplicateColumnNameError(Exception): diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index bf82d7b9c..51d82c170 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -26,6 +26,13 @@ def __init__(self, missing_feature_names: list[str]): super().__init__(f"Dataset misses the feature columns '{missing_feature_names}'.") +class DatasetMissesDataError(ValueError): + """Raised when a dataset contains no rows.""" + + def __init__(self): + super().__init__("Dataset contains no rows") + + class LearningError(Exception): """ Raised when an error occurred while training a model. diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 6b581c3d8..d82322ed8 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -9,6 +9,9 @@ ModelNotFittedError, PredictionError, UntaggedTableError, + NonNumericColumnError, + MissingValuesColumnError, + DatasetMissesDataError, ) @@ -30,9 +33,32 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: If the tagged table contains invalid values or if the training failed. UntaggedTableError If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ if not isinstance(tagged_table, TaggedTable) and isinstance(tagged_table, Table): raise UntaggedTableError + + if tagged_table.number_of_rows == 0: + raise DatasetMissesDataError() + + non_numerical_column_names = set(tagged_table.features.column_names) - set(tagged_table.features.remove_columns_with_non_numerical_values().column_names) + if len(non_numerical_column_names) != 0: + raise NonNumericColumnError(str(non_numerical_column_names), + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\n" + "The OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\n" + "or is ordinal, you should use the LabelEncoder.") + + null_containing_column_names = set(tagged_table.features.column_names) - set(tagged_table.features.remove_columns_with_missing_values().column_names) + if len(null_containing_column_names) != 0: + raise MissingValuesColumnError(str(null_containing_column_names), + "You can use the Imputer to replace the missing values based on different strategies.\n" + "If you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.") + try: model.fit( tagged_table.features._data, @@ -73,6 +99,12 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ # Validation if model is None or target_name is None or feature_names is None: @@ -83,6 +115,22 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ if missing_feature_names: raise DatasetMissesFeaturesError(missing_feature_names) + if dataset.number_of_rows == 0: + raise DatasetMissesDataError() + + non_numerical_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(dataset.keep_only_columns(feature_names).remove_columns_with_non_numerical_values().column_names) + if len(non_numerical_column_names) != 0: + raise NonNumericColumnError(str(non_numerical_column_names), + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\n" + "The OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\n" + "or is ordinal, you should use the LabelEncoder.\n") + + null_containing_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(dataset.keep_only_columns(feature_names).remove_columns_with_missing_values().column_names) + if len(null_containing_column_names) != 0: + raise MissingValuesColumnError(str(null_containing_column_names), + "You can use the Imputer to replace the missing values based on different strategies.\n" + "If you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.") + dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names diff --git a/src/safeds/ml/classical/classification/_ada_boost.py b/src/safeds/ml/classical/classification/_ada_boost.py index 18b34d19d..86ffde895 100644 --- a/src/safeds/ml/classical/classification/_ada_boost.py +++ b/src/safeds/ml/classical/classification/_ada_boost.py @@ -90,6 +90,14 @@ def fit(self, training_set: TaggedTable) -> AdaBoost: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -129,6 +137,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_decision_tree.py b/src/safeds/ml/classical/classification/_decision_tree.py index fd5f24a4b..5b183e8b4 100644 --- a/src/safeds/ml/classical/classification/_decision_tree.py +++ b/src/safeds/ml/classical/classification/_decision_tree.py @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> DecisionTree: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py index c94a95f7d..17b3a4205 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting.py @@ -77,6 +77,14 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -112,6 +120,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py index df9bb7e79..2140da0a5 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py @@ -4,6 +4,7 @@ from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier +from safeds.exceptions import DatasetMissesDataError from safeds.ml.classical._util_sklearn import fit, predict from ._classifier import Classifier @@ -69,7 +70,17 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: If `number_of_neighbors` is greater than the sample size. LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ + if training_set.number_of_rows == 0: + raise DatasetMissesDataError() if self._number_of_neighbors > training_set.number_of_rows: raise ValueError( ( @@ -111,6 +122,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_logistic_regression.py b/src/safeds/ml/classical/classification/_logistic_regression.py index 864aa91d4..b2a5cbeba 100644 --- a/src/safeds/ml/classical/classification/_logistic_regression.py +++ b/src/safeds/ml/classical/classification/_logistic_regression.py @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> LogisticRegression: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_random_forest.py b/src/safeds/ml/classical/classification/_random_forest.py index 5f0bdf26c..c50b063e0 100644 --- a/src/safeds/ml/classical/classification/_random_forest.py +++ b/src/safeds/ml/classical/classification/_random_forest.py @@ -65,6 +65,14 @@ def fit(self, training_set: TaggedTable) -> RandomForest: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -100,6 +108,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/classification/_support_vector_machine.py b/src/safeds/ml/classical/classification/_support_vector_machine.py index 91048f660..9b15badcc 100644 --- a/src/safeds/ml/classical/classification/_support_vector_machine.py +++ b/src/safeds/ml/classical/classification/_support_vector_machine.py @@ -64,6 +64,14 @@ def fit(self, training_set: TaggedTable) -> SupportVectorMachine: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) @@ -99,6 +107,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_ada_boost.py b/src/safeds/ml/classical/regression/_ada_boost.py index e3ba73ca2..9e4254292 100644 --- a/src/safeds/ml/classical/regression/_ada_boost.py +++ b/src/safeds/ml/classical/regression/_ada_boost.py @@ -90,6 +90,14 @@ def fit(self, training_set: TaggedTable) -> AdaBoost: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -129,6 +137,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_decision_tree.py b/src/safeds/ml/classical/regression/_decision_tree.py index be354f695..91beb47fb 100644 --- a/src/safeds/ml/classical/regression/_decision_tree.py +++ b/src/safeds/ml/classical/regression/_decision_tree.py @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> DecisionTree: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_elastic_net_regression.py b/src/safeds/ml/classical/regression/_elastic_net_regression.py index e615ac870..bb8951814 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regression.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regression.py @@ -102,6 +102,14 @@ def fit(self, training_set: TaggedTable) -> ElasticNetRegression: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -137,6 +145,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py index d8cb47fa3..c851f948f 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting.py @@ -77,6 +77,14 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -112,6 +120,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py index da1e4d20d..e645b37f1 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py @@ -4,6 +4,7 @@ from sklearn.neighbors import KNeighborsRegressor as sk_KNeighborsRegressor +from safeds.exceptions import DatasetMissesDataError from safeds.ml.classical._util_sklearn import fit, predict from ._regressor import Regressor @@ -69,7 +70,17 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: If `number_of_neighbors` is greater than the sample size. LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ + if training_set.number_of_rows == 0: + raise DatasetMissesDataError() if self._number_of_neighbors > training_set.number_of_rows: raise ValueError( ( @@ -112,6 +123,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_lasso_regression.py b/src/safeds/ml/classical/regression/_lasso_regression.py index bc175e5f9..857db67a5 100644 --- a/src/safeds/ml/classical/regression/_lasso_regression.py +++ b/src/safeds/ml/classical/regression/_lasso_regression.py @@ -75,6 +75,14 @@ def fit(self, training_set: TaggedTable) -> LassoRegression: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -110,6 +118,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_linear_regression.py b/src/safeds/ml/classical/regression/_linear_regression.py index 4c60da473..1c60a7114 100644 --- a/src/safeds/ml/classical/regression/_linear_regression.py +++ b/src/safeds/ml/classical/regression/_linear_regression.py @@ -43,6 +43,14 @@ def fit(self, training_set: TaggedTable) -> LinearRegression: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -78,6 +86,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_random_forest.py b/src/safeds/ml/classical/regression/_random_forest.py index b3c6f878b..08d8c9883 100644 --- a/src/safeds/ml/classical/regression/_random_forest.py +++ b/src/safeds/ml/classical/regression/_random_forest.py @@ -65,6 +65,14 @@ def fit(self, training_set: TaggedTable) -> RandomForest: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -100,6 +108,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_ridge_regression.py b/src/safeds/ml/classical/regression/_ridge_regression.py index 002bf26c7..c57b77b43 100644 --- a/src/safeds/ml/classical/regression/_ridge_regression.py +++ b/src/safeds/ml/classical/regression/_ridge_regression.py @@ -76,6 +76,14 @@ def fit(self, training_set: TaggedTable) -> RidgeRegression: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -111,6 +119,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/src/safeds/ml/classical/regression/_support_vector_machine.py b/src/safeds/ml/classical/regression/_support_vector_machine.py index d2e26f11b..19e9f05f6 100644 --- a/src/safeds/ml/classical/regression/_support_vector_machine.py +++ b/src/safeds/ml/classical/regression/_support_vector_machine.py @@ -64,6 +64,14 @@ def fit(self, training_set: TaggedTable) -> SupportVectorMachine: ------ LearningError If the training data contains invalid values or if the training failed. + UntaggedTableError + If the table is untagged. + NonNumericColumnError + If the training data contains non-numerical values. + MissingValuesColumnError + If the training data contains missing values. + DatasetMissesDataError + If the training data contains no rows. """ wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) @@ -99,6 +107,12 @@ def predict(self, dataset: Table) -> TaggedTable: If the dataset misses feature columns. PredictionError If predicting with the given dataset failed. + NonNumericColumnError + If the dataset contains non-numerical values. + MissingValuesColumnError + If the dataset contains missing values. + DatasetMissesDataError + If the dataset contains no rows. """ return predict(self._wrapped_regressor, dataset, self._feature_names, self._target_name) diff --git a/tests/safeds/data/tabular/containers/_table/test_plot_boxplots.py b/tests/safeds/data/tabular/containers/_table/test_plot_boxplots.py index 80970da40..d8f615ef7 100644 --- a/tests/safeds/data/tabular/containers/_table/test_plot_boxplots.py +++ b/tests/safeds/data/tabular/containers/_table/test_plot_boxplots.py @@ -35,7 +35,7 @@ def test_should_raise_if_column_contains_non_numerical_values() -> None: with pytest.raises( NonNumericColumnError, match=( - r"Tried to do a numerical operation on one or multiple non numerical Columns: \nThis table contains only" + r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThis table contains only" r" non-numerical columns." ), ): diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index c198c46ac..eb8afe81a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -1,16 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import ( DatasetContainsTargetError, DatasetMissesFeaturesError, - LearningError, ModelNotFittedError, - PredictionError, UntaggedTableError, + NonNumericColumnError, + MissingValuesColumnError, + DatasetMissesDataError, ) from safeds.ml.classical.classification import ( AdaBoost, @@ -63,18 +64,6 @@ def valid_data() -> TaggedTable: ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) -@pytest.fixture() -def invalid_data() -> TaggedTable: - return Table( - { - "id": [1, 4], - "feat1": ["a", 5], - "feat2": [3, 6], - "target": [0, 1], - }, - ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) - - @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, classifier: Classifier, valid_data: TaggedTable) -> None: @@ -91,8 +80,50 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi classifier.fit(valid_data) assert valid_data == valid_data_copy - def test_should_raise_on_invalid_data(self, classifier: Classifier, invalid_data: TaggedTable) -> None: - with pytest.raises(LearningError): + @pytest.mark.parametrize( + ("invalid_data", "expected_error", "expected_error_msg"), + [ + ( + Table( + { + "id": [1, 4], + "feat1": ["a", 5], + "feat2": [3, 6], + "target": [0, 1], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + NonNumericColumnError, + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\nor is ordinal, you should use the LabelEncoder.", + ), + ( + Table( + { + "id": [1, 4], + "feat1": [None, 5], + "feat2": [3, 6], + "target": [0, 1], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + MissingValuesColumnError, + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou can use the Imputer to replace the missing values based on different strategies.\nIf you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", + ), + ( + Table( + { + "id": [], + "feat1": [], + "feat2": [], + "target": [], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + DatasetMissesDataError, + r"Dataset contains no rows", + ) + ], + ids=["non-numerical data", "missing values in data", "no rows in data"] + ) + def test_should_raise_on_invalid_data(self, classifier: Classifier, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str) -> None: + with pytest.raises(expected_error, match=expected_error_msg): classifier.fit(invalid_data) @pytest.mark.parametrize( @@ -151,15 +182,49 @@ def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, v with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): fitted_classifier.predict(valid_data.remove_columns(["feat1", "feat2", "target"])) - def test_should_raise_on_invalid_data( - self, - classifier: Classifier, - valid_data: TaggedTable, - invalid_data: TaggedTable, - ) -> None: - fitted_classifier = classifier.fit(valid_data) - with pytest.raises(PredictionError): - fitted_classifier.predict(invalid_data.features) + @pytest.mark.parametrize( + ("invalid_data", "expected_error", "expected_error_msg"), + [ + ( + Table( + { + "id": [1, 4], + "feat1": ["a", 5], + "feat2": [3, 6], + }, + ), + NonNumericColumnError, + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}", + ), + ( + Table( + { + "id": [1, 4], + "feat1": [None, 5], + "feat2": [3, 6], + }, + ), + MissingValuesColumnError, + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}", + ), + ( + Table( + { + "id": [], + "feat1": [], + "feat2": [], + }, + ), + DatasetMissesDataError, + r"Dataset contains no rows", + ) + ], + ids=["non-numerical data", "missing values in data", "no rows in data"] + ) + def test_should_raise_on_invalid_data(self, classifier: Classifier, valid_data: TaggedTable, invalid_data: Table, expected_error: Any, expected_error_msg: str) -> None: + classifier = classifier.fit(valid_data) + with pytest.raises(expected_error, match=expected_error_msg): + classifier.predict(invalid_data) @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 21aa4816d..61882cb8b 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pandas as pd import pytest @@ -9,10 +9,11 @@ ColumnLengthMismatchError, DatasetContainsTargetError, DatasetMissesFeaturesError, - LearningError, ModelNotFittedError, - PredictionError, UntaggedTableError, + NonNumericColumnError, + MissingValuesColumnError, + DatasetMissesDataError, ) from safeds.ml.classical.regression import ( AdaBoost, @@ -74,18 +75,6 @@ def valid_data() -> TaggedTable: ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) -@pytest.fixture() -def invalid_data() -> TaggedTable: - return Table( - { - "id": [1, 4], - "feat1": ["a", 5], - "feat2": [3, 6], - "target": [0, 1], - }, - ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]) - - @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, regressor: Regressor, valid_data: TaggedTable) -> None: @@ -102,8 +91,50 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt regressor.fit(valid_data) assert valid_data == valid_data_copy - def test_should_raise_on_invalid_data(self, regressor: Regressor, invalid_data: TaggedTable) -> None: - with pytest.raises(LearningError): + @pytest.mark.parametrize( + ("invalid_data", "expected_error", "expected_error_msg"), + [ + ( + Table( + { + "id": [1, 4], + "feat1": ["a", 5], + "feat2": [3, 6], + "target": [0, 1], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + NonNumericColumnError, + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}", + ), + ( + Table( + { + "id": [1, 4], + "feat1": [None, 5], + "feat2": [3, 6], + "target": [0, 1], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + MissingValuesColumnError, + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}", + ), + ( + Table( + { + "id": [], + "feat1": [], + "feat2": [], + "target": [], + }, + ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), + DatasetMissesDataError, + r"Dataset contains no rows", + ) + ], + ids=["non-numerical data", "missing values in data", "no rows in data"] + ) + def test_should_raise_on_invalid_data(self, regressor: Regressor, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str) -> None: + with pytest.raises(expected_error, match=expected_error_msg): regressor.fit(invalid_data) @pytest.mark.parametrize( @@ -162,15 +193,49 @@ def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, val with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): fitted_regressor.predict(valid_data.remove_columns(["feat1", "feat2", "target"])) - def test_should_raise_on_invalid_data( - self, - regressor: Regressor, - valid_data: TaggedTable, - invalid_data: TaggedTable, - ) -> None: - fitted_regressor = regressor.fit(valid_data) - with pytest.raises(PredictionError): - fitted_regressor.predict(invalid_data.features) + @pytest.mark.parametrize( + ("invalid_data", "expected_error", "expected_error_msg"), + [ + ( + Table( + { + "id": [1, 4], + "feat1": ["a", 5], + "feat2": [3, 6], + }, + ), + NonNumericColumnError, + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\nor is ordinal, you should use the LabelEncoder.", + ), + ( + Table( + { + "id": [1, 4], + "feat1": [None, 5], + "feat2": [3, 6], + }, + ), + MissingValuesColumnError, + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou can use the Imputer to replace the missing values based on different strategies.\nIf you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", + ), + ( + Table( + { + "id": [], + "feat1": [], + "feat2": [], + }, + ), + DatasetMissesDataError, + r"Dataset contains no rows", + ) + ], + ids=["non-numerical data", "missing values in data", "no rows in data"] + ) + def test_should_raise_on_invalid_data(self, regressor: Regressor, valid_data: TaggedTable, invalid_data: Table, expected_error: Any, expected_error_msg: str) -> None: + regressor = regressor.fit(valid_data) + with pytest.raises(expected_error, match=expected_error_msg): + regressor.predict(invalid_data) @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) From 1b18f2b6c27a79fbb4e6bf1140a4cb819bf3b967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 9 Jun 2023 16:04:18 +0200 Subject: [PATCH 02/11] refactor: Satisfying the MyPy Linter... Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- src/safeds/exceptions/_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 51d82c170..1cc83ebc0 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -29,7 +29,7 @@ def __init__(self, missing_feature_names: list[str]): class DatasetMissesDataError(ValueError): """Raised when a dataset contains no rows.""" - def __init__(self): + def __init__(self) -> None: super().__init__("Dataset contains no rows") From 0d4b3d6597fb216566ba242075126b3c2586231f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:06:30 +0000 Subject: [PATCH 03/11] style: apply automated linter fixes --- src/safeds/exceptions/__init__.py | 4 +- src/safeds/exceptions/_data.py | 10 ++- src/safeds/ml/classical/_util_sklearn.py | 70 +++++++++++++------ .../classification/_k_nearest_neighbors.py | 2 +- .../regression/_k_nearest_neighbors.py | 2 +- .../classification/test_classifier.py | 41 ++++++++--- .../ml/classical/regression/test_regressor.py | 41 ++++++++--- 7 files changed, 119 insertions(+), 51 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 91da245d5..05bf9dcf8 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -5,22 +5,22 @@ ColumnSizeError, DuplicateColumnNameError, IndexOutOfBoundsError, + MissingValuesColumnError, NonNumericColumnError, SchemaMismatchError, TransformerNotFittedError, UnknownColumnNameError, ValueNotPresentWhenFittedError, WrongFileExtensionError, - MissingValuesColumnError, ) from safeds.exceptions._ml import ( DatasetContainsTargetError, + DatasetMissesDataError, DatasetMissesFeaturesError, LearningError, ModelNotFittedError, PredictionError, UntaggedTableError, - DatasetMissesDataError, ) __all__ = [ diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index b996f972e..7f91b3187 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -25,7 +25,10 @@ class NonNumericColumnError(Exception): def __init__(self, column_info: str, help_msg: str | None = None) -> None: line_break = "\n" - super().__init__(f"Tried to do a numerical operation on one or multiple non-numerical columns: \n{column_info}{line_break + help_msg if help_msg is not None else ''}") + super().__init__( + "Tried to do a numerical operation on one or multiple non-numerical columns:" + f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}", + ) class MissingValuesColumnError(Exception): @@ -33,7 +36,10 @@ class MissingValuesColumnError(Exception): def __init__(self, column_info: str, help_msg: str | None = None) -> None: line_break = "\n" - super().__init__(f"Tried to do an operation on one or multiple columns containing missing values: \n{column_info}{line_break + help_msg if help_msg is not None else ''}") + super().__init__( + "Tried to do an operation on one or multiple columns containing missing values:" + f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}", + ) class DuplicateColumnNameError(Exception): diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index d82322ed8..19c13e5b3 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -4,14 +4,14 @@ from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import ( DatasetContainsTargetError, + DatasetMissesDataError, DatasetMissesFeaturesError, LearningError, + MissingValuesColumnError, ModelNotFittedError, + NonNumericColumnError, PredictionError, UntaggedTableError, - NonNumericColumnError, - MissingValuesColumnError, - DatasetMissesDataError, ) @@ -44,20 +44,32 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: raise UntaggedTableError if tagged_table.number_of_rows == 0: - raise DatasetMissesDataError() + raise DatasetMissesDataError - non_numerical_column_names = set(tagged_table.features.column_names) - set(tagged_table.features.remove_columns_with_non_numerical_values().column_names) + non_numerical_column_names = set(tagged_table.features.column_names) - set( + tagged_table.features.remove_columns_with_non_numerical_values().column_names, + ) if len(non_numerical_column_names) != 0: - raise NonNumericColumnError(str(non_numerical_column_names), - "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\n" - "The OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\n" - "or is ordinal, you should use the LabelEncoder.") + raise NonNumericColumnError( + str(non_numerical_column_names), + ( + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" + " different values\nor is ordinal, you should use the LabelEncoder." + ), + ) - null_containing_column_names = set(tagged_table.features.column_names) - set(tagged_table.features.remove_columns_with_missing_values().column_names) + null_containing_column_names = set(tagged_table.features.column_names) - set( + tagged_table.features.remove_columns_with_missing_values().column_names, + ) if len(null_containing_column_names) != 0: - raise MissingValuesColumnError(str(null_containing_column_names), - "You can use the Imputer to replace the missing values based on different strategies.\n" - "If you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.") + raise MissingValuesColumnError( + str(null_containing_column_names), + ( + "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`." + ), + ) try: model.fit( @@ -116,20 +128,32 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ raise DatasetMissesFeaturesError(missing_feature_names) if dataset.number_of_rows == 0: - raise DatasetMissesDataError() + raise DatasetMissesDataError - non_numerical_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(dataset.keep_only_columns(feature_names).remove_columns_with_non_numerical_values().column_names) + non_numerical_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set( + dataset.keep_only_columns(feature_names).remove_columns_with_non_numerical_values().column_names, + ) if len(non_numerical_column_names) != 0: - raise NonNumericColumnError(str(non_numerical_column_names), - "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\n" - "The OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\n" - "or is ordinal, you should use the LabelEncoder.\n") + raise NonNumericColumnError( + str(non_numerical_column_names), + ( + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" + " different values\nor is ordinal, you should use the LabelEncoder.\n" + ), + ) - null_containing_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set(dataset.keep_only_columns(feature_names).remove_columns_with_missing_values().column_names) + null_containing_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set( + dataset.keep_only_columns(feature_names).remove_columns_with_missing_values().column_names, + ) if len(null_containing_column_names) != 0: - raise MissingValuesColumnError(str(null_containing_column_names), - "You can use the Imputer to replace the missing values based on different strategies.\n" - "If you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.") + raise MissingValuesColumnError( + str(null_containing_column_names), + ( + "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`." + ), + ) dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py index 2140da0a5..a7eaa4ff0 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py @@ -80,7 +80,7 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: If the training data contains no rows. """ if training_set.number_of_rows == 0: - raise DatasetMissesDataError() + raise DatasetMissesDataError if self._number_of_neighbors > training_set.number_of_rows: raise ValueError( ( diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py index e645b37f1..f99b4d66e 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py @@ -80,7 +80,7 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors: If the training data contains no rows. """ if training_set.number_of_rows == 0: - raise DatasetMissesDataError() + raise DatasetMissesDataError if self._number_of_neighbors > training_set.number_of_rows: raise ValueError( ( diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index eb8afe81a..04d5acf12 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -6,12 +6,12 @@ from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import ( DatasetContainsTargetError, + DatasetMissesDataError, DatasetMissesFeaturesError, + MissingValuesColumnError, ModelNotFittedError, - UntaggedTableError, NonNumericColumnError, - MissingValuesColumnError, - DatasetMissesDataError, + UntaggedTableError, ) from safeds.ml.classical.classification import ( AdaBoost, @@ -93,7 +93,12 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi }, ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), NonNumericColumnError, - r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\nor is ordinal, you should use the LabelEncoder.", + ( + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou" + r" can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + r" data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too" + r" many different values\nor is ordinal, you should use the LabelEncoder." + ), ), ( Table( @@ -105,7 +110,12 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi }, ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), MissingValuesColumnError, - r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou can use the Imputer to replace the missing values based on different strategies.\nIf you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", + ( + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou" + r" can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + r" remove the missing values entirely you can use the method" + r" `Table.remove_rows_with_missing_values`." + ), ), ( Table( @@ -118,11 +128,13 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), DatasetMissesDataError, r"Dataset contains no rows", - ) + ), ], - ids=["non-numerical data", "missing values in data", "no rows in data"] + ids=["non-numerical data", "missing values in data", "no rows in data"], ) - def test_should_raise_on_invalid_data(self, classifier: Classifier, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str) -> None: + def test_should_raise_on_invalid_data( + self, classifier: Classifier, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str, + ) -> None: with pytest.raises(expected_error, match=expected_error_msg): classifier.fit(invalid_data) @@ -217,11 +229,18 @@ def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, v ), DatasetMissesDataError, r"Dataset contains no rows", - ) + ), ], - ids=["non-numerical data", "missing values in data", "no rows in data"] + ids=["non-numerical data", "missing values in data", "no rows in data"], ) - def test_should_raise_on_invalid_data(self, classifier: Classifier, valid_data: TaggedTable, invalid_data: Table, expected_error: Any, expected_error_msg: str) -> None: + def test_should_raise_on_invalid_data( + self, + classifier: Classifier, + valid_data: TaggedTable, + invalid_data: Table, + expected_error: Any, + expected_error_msg: str, + ) -> None: classifier = classifier.fit(valid_data) with pytest.raises(expected_error, match=expected_error_msg): classifier.predict(invalid_data) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 61882cb8b..2782e5a32 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -8,12 +8,12 @@ from safeds.exceptions import ( ColumnLengthMismatchError, DatasetContainsTargetError, + DatasetMissesDataError, DatasetMissesFeaturesError, + MissingValuesColumnError, ModelNotFittedError, - UntaggedTableError, NonNumericColumnError, - MissingValuesColumnError, - DatasetMissesDataError, + UntaggedTableError, ) from safeds.ml.classical.regression import ( AdaBoost, @@ -129,11 +129,13 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt ).tag_columns(target_name="target", feature_names=["feat1", "feat2"]), DatasetMissesDataError, r"Dataset contains no rows", - ) + ), ], - ids=["non-numerical data", "missing values in data", "no rows in data"] + ids=["non-numerical data", "missing values in data", "no rows in data"], ) - def test_should_raise_on_invalid_data(self, regressor: Regressor, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str) -> None: + def test_should_raise_on_invalid_data( + self, regressor: Regressor, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str, + ) -> None: with pytest.raises(expected_error, match=expected_error_msg): regressor.fit(invalid_data) @@ -205,7 +207,12 @@ def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, val }, ), NonNumericColumnError, - r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many different values\nor is ordinal, you should use the LabelEncoder.", + ( + r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\{'feat1'\}\nYou" + r" can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + r" data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too" + r" many different values\nor is ordinal, you should use the LabelEncoder." + ), ), ( Table( @@ -216,7 +223,12 @@ def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, val }, ), MissingValuesColumnError, - r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou can use the Imputer to replace the missing values based on different strategies.\nIf you want to remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", + ( + r"Tried to do an operation on one or multiple columns containing missing values: \n\{'feat1'\}\nYou" + r" can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + r" remove the missing values entirely you can use the method" + r" `Table.remove_rows_with_missing_values`." + ), ), ( Table( @@ -228,11 +240,18 @@ def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, val ), DatasetMissesDataError, r"Dataset contains no rows", - ) + ), ], - ids=["non-numerical data", "missing values in data", "no rows in data"] + ids=["non-numerical data", "missing values in data", "no rows in data"], ) - def test_should_raise_on_invalid_data(self, regressor: Regressor, valid_data: TaggedTable, invalid_data: Table, expected_error: Any, expected_error_msg: str) -> None: + def test_should_raise_on_invalid_data( + self, + regressor: Regressor, + valid_data: TaggedTable, + invalid_data: Table, + expected_error: Any, + expected_error_msg: str, + ) -> None: regressor = regressor.fit(valid_data) with pytest.raises(expected_error, match=expected_error_msg): regressor.predict(invalid_data) From 60d35308ae6bf955905cb472a1e8d304bf40152b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:08:05 +0000 Subject: [PATCH 04/11] style: apply automated linter fixes --- src/safeds/exceptions/_data.py | 12 ++++++++---- .../ml/classical/classification/test_classifier.py | 6 +++++- .../safeds/ml/classical/regression/test_regressor.py | 6 +++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 7f91b3187..1cbeb6c3c 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -26,8 +26,10 @@ class NonNumericColumnError(Exception): def __init__(self, column_info: str, help_msg: str | None = None) -> None: line_break = "\n" super().__init__( - "Tried to do a numerical operation on one or multiple non-numerical columns:" - f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}", + ( + "Tried to do a numerical operation on one or multiple non-numerical columns:" + f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}" + ), ) @@ -37,8 +39,10 @@ class MissingValuesColumnError(Exception): def __init__(self, column_info: str, help_msg: str | None = None) -> None: line_break = "\n" super().__init__( - "Tried to do an operation on one or multiple columns containing missing values:" - f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}", + ( + "Tried to do an operation on one or multiple columns containing missing values:" + f" \n{column_info}{line_break + help_msg if help_msg is not None else ''}" + ), ) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 04d5acf12..311cc5292 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -133,7 +133,11 @@ def test_should_not_change_input_table(self, classifier: Classifier, request: Fi ids=["non-numerical data", "missing values in data", "no rows in data"], ) def test_should_raise_on_invalid_data( - self, classifier: Classifier, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str, + self, + classifier: Classifier, + invalid_data: TaggedTable, + expected_error: Any, + expected_error_msg: str, ) -> None: with pytest.raises(expected_error, match=expected_error_msg): classifier.fit(invalid_data) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 2782e5a32..7179d6781 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -134,7 +134,11 @@ def test_should_not_change_input_table(self, regressor: Regressor, request: Fixt ids=["non-numerical data", "missing values in data", "no rows in data"], ) def test_should_raise_on_invalid_data( - self, regressor: Regressor, invalid_data: TaggedTable, expected_error: Any, expected_error_msg: str, + self, + regressor: Regressor, + invalid_data: TaggedTable, + expected_error: Any, + expected_error_msg: str, ) -> None: with pytest.raises(expected_error, match=expected_error_msg): regressor.fit(invalid_data) From 5e341cfb5f35d877b5c7c454299296ab8a8fcbfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 16 Jun 2023 09:50:57 +0200 Subject: [PATCH 05/11] test: Added tests for `_util_sklearn.fit` and `_util_sklearn.predict` to test `LearningError` and `PredictionError` refactor: Removed option in `_util_sklearn.predict` to set feature_names or target_name to None as if at least one of them is None the method will always raise an Exception Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- src/safeds/ml/classical/_util_sklearn.py | 6 ++--- .../safeds/ml/classical/test_util_sklearn.py | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index d82322ed8..2179a4bdf 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -69,7 +69,7 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: # noinspection PyProtectedMember -def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_name: str | None) -> TaggedTable: +def predict(model: Any, dataset: Table, feature_names: list[str], target_name: str) -> TaggedTable: """ Predict a target vector using a dataset containing feature vectors. The model has to be trained first. @@ -79,9 +79,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ Classifier or regressor from scikit-learn. dataset : Table The dataset containing the features. - target_name : Optional[str] + target_name : str The name of the target column. - feature_names : Optional[list[str]] + feature_names : list[str] The names of the feature columns. Returns diff --git a/tests/safeds/ml/classical/test_util_sklearn.py b/tests/safeds/ml/classical/test_util_sklearn.py index 46b88a9f2..225ce7161 100644 --- a/tests/safeds/ml/classical/test_util_sklearn.py +++ b/tests/safeds/ml/classical/test_util_sklearn.py @@ -1,6 +1,10 @@ import warnings +import pytest + from safeds.data.tabular.containers import Table +from safeds.exceptions import LearningError, PredictionError +from safeds.ml.classical._util_sklearn import fit, predict from safeds.ml.classical.regression import LinearRegression @@ -17,3 +21,23 @@ def test_predict_should_not_warn_about_feature_names() -> None: with warnings.catch_warnings(): warnings.filterwarnings("error", message="X has feature names") fitted_model.predict(test_set) + + +class MLModelRaiseValueErrorOnFitAndPredict: + def fit(self, x, y): + raise ValueError("Raise ValueError (LearningError) in fit for Test") + + def predict(self, x): + raise ValueError("Raise ValueError (PredictionError) in predict for Test") + + +def test_should_raise_learning_error(): + tagged_table = Table({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}).tag_columns("col3") + with pytest.raises(LearningError, match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test"): + fit(MLModelRaiseValueErrorOnFitAndPredict(), tagged_table) + + +def test_should_raise_prediction_error(): + table = Table({"col1": [1, 2], "col2": [3, 4]}) + with pytest.raises(PredictionError, match=r"Error occurred while predicting: Raise ValueError \(PredictionError\) in predict for Test"): + predict(MLModelRaiseValueErrorOnFitAndPredict(), table, ["col1", "col2"], "col3") From 3d2355085b2959eeb9fb5a195fb4c062c4598ea8 Mon Sep 17 00:00:00 2001 From: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Date: Fri, 16 Jun 2023 09:52:26 +0200 Subject: [PATCH 06/11] Update src/safeds/exceptions/_data.py Co-authored-by: Junior Atemebang <129027012+jxnior01@users.noreply.github.com> --- src/safeds/exceptions/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 1cbeb6c3c..2278efbfd 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -34,7 +34,7 @@ def __init__(self, column_info: str, help_msg: str | None = None) -> None: class MissingValuesColumnError(Exception): - """Exception raised for trying to do operations on a columns containing missing values.""" + """Exception raised for trying to do operations on columns containing missing values.""" def __init__(self, column_info: str, help_msg: str | None = None) -> None: line_break = "\n" From d37fc58eca11bbd9429955a92c2b0bd324282d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 16 Jun 2023 09:59:30 +0200 Subject: [PATCH 07/11] refactor: Added option in `_util_sklearn.predict` to set feature_names or target_name to None to satisfy the linter again Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- src/safeds/ml/classical/_util_sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 6d1fefd81..19c13e5b3 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -81,7 +81,7 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: # noinspection PyProtectedMember -def predict(model: Any, dataset: Table, feature_names: list[str], target_name: str) -> TaggedTable: +def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_name: str | None) -> TaggedTable: """ Predict a target vector using a dataset containing feature vectors. The model has to be trained first. @@ -91,9 +91,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str], target_name: s Classifier or regressor from scikit-learn. dataset : Table The dataset containing the features. - target_name : str + target_name : Optional[str] The name of the target column. - feature_names : list[str] + feature_names : Optional[list[str]] The names of the feature columns. Returns From 6faaccfe72fcee47c080ae2fd32d3cde2633a107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 16 Jun 2023 10:09:04 +0200 Subject: [PATCH 08/11] refactor: Added a reason for the parameters in `MLModelRaiseValueErrorOnFitAndPredict` to satisfy the linter Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- tests/safeds/ml/classical/test_util_sklearn.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/safeds/ml/classical/test_util_sklearn.py b/tests/safeds/ml/classical/test_util_sklearn.py index 225ce7161..1e7c4e6b8 100644 --- a/tests/safeds/ml/classical/test_util_sklearn.py +++ b/tests/safeds/ml/classical/test_util_sklearn.py @@ -24,20 +24,27 @@ def test_predict_should_not_warn_about_feature_names() -> None: class MLModelRaiseValueErrorOnFitAndPredict: - def fit(self, x, y): + x, y = None, None + + def fit(self, x, y) -> None: + # The Linter does not want unnecessary parameters, so we just assign them to the class values + self.x = x + self.y = y raise ValueError("Raise ValueError (LearningError) in fit for Test") - def predict(self, x): + def predict(self, x) -> None: + # The Linter does not want unnecessary parameters, so we just assign it to the class value + self.x = x raise ValueError("Raise ValueError (PredictionError) in predict for Test") -def test_should_raise_learning_error(): +def test_should_raise_learning_error() -> None: tagged_table = Table({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}).tag_columns("col3") with pytest.raises(LearningError, match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test"): fit(MLModelRaiseValueErrorOnFitAndPredict(), tagged_table) -def test_should_raise_prediction_error(): +def test_should_raise_prediction_error() -> None: table = Table({"col1": [1, 2], "col2": [3, 4]}) with pytest.raises(PredictionError, match=r"Error occurred while predicting: Raise ValueError \(PredictionError\) in predict for Test"): predict(MLModelRaiseValueErrorOnFitAndPredict(), table, ["col1", "col2"], "col3") From 408dead165d8154c2f4637e725ce90f467de6b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 16 Jun 2023 10:13:37 +0200 Subject: [PATCH 09/11] refactor: Added type annotations for the parameters in `MLModelRaiseValueErrorOnFitAndPredict` to satisfy the linter Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- tests/safeds/ml/classical/test_util_sklearn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/safeds/ml/classical/test_util_sklearn.py b/tests/safeds/ml/classical/test_util_sklearn.py index 1e7c4e6b8..04f8e979f 100644 --- a/tests/safeds/ml/classical/test_util_sklearn.py +++ b/tests/safeds/ml/classical/test_util_sklearn.py @@ -1,4 +1,5 @@ import warnings +from typing import Any import pytest @@ -26,13 +27,13 @@ def test_predict_should_not_warn_about_feature_names() -> None: class MLModelRaiseValueErrorOnFitAndPredict: x, y = None, None - def fit(self, x, y) -> None: + def fit(self, x: Any, y: Any) -> None: # The Linter does not want unnecessary parameters, so we just assign them to the class values self.x = x self.y = y raise ValueError("Raise ValueError (LearningError) in fit for Test") - def predict(self, x) -> None: + def predict(self, x: Any) -> None: # The Linter does not want unnecessary parameters, so we just assign it to the class value self.x = x raise ValueError("Raise ValueError (PredictionError) in predict for Test") From d680e3e6f5fa11d9ca985b205955a297d2b47294 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 16 Jun 2023 08:15:44 +0000 Subject: [PATCH 10/11] style: apply automated linter fixes --- tests/safeds/ml/classical/test_util_sklearn.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/safeds/ml/classical/test_util_sklearn.py b/tests/safeds/ml/classical/test_util_sklearn.py index 04f8e979f..5176b8e95 100644 --- a/tests/safeds/ml/classical/test_util_sklearn.py +++ b/tests/safeds/ml/classical/test_util_sklearn.py @@ -2,7 +2,6 @@ from typing import Any import pytest - from safeds.data.tabular.containers import Table from safeds.exceptions import LearningError, PredictionError from safeds.ml.classical._util_sklearn import fit, predict @@ -41,11 +40,16 @@ def predict(self, x: Any) -> None: def test_should_raise_learning_error() -> None: tagged_table = Table({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}).tag_columns("col3") - with pytest.raises(LearningError, match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test"): + with pytest.raises( + LearningError, match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test", + ): fit(MLModelRaiseValueErrorOnFitAndPredict(), tagged_table) def test_should_raise_prediction_error() -> None: table = Table({"col1": [1, 2], "col2": [3, 4]}) - with pytest.raises(PredictionError, match=r"Error occurred while predicting: Raise ValueError \(PredictionError\) in predict for Test"): + with pytest.raises( + PredictionError, + match=r"Error occurred while predicting: Raise ValueError \(PredictionError\) in predict for Test", + ): predict(MLModelRaiseValueErrorOnFitAndPredict(), table, ["col1", "col2"], "col3") From 5014a5afc348b24d79c49324bfbf6ba760a35134 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 16 Jun 2023 08:17:24 +0000 Subject: [PATCH 11/11] style: apply automated linter fixes --- tests/safeds/ml/classical/test_util_sklearn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/safeds/ml/classical/test_util_sklearn.py b/tests/safeds/ml/classical/test_util_sklearn.py index 5176b8e95..741b53d7f 100644 --- a/tests/safeds/ml/classical/test_util_sklearn.py +++ b/tests/safeds/ml/classical/test_util_sklearn.py @@ -41,7 +41,8 @@ def predict(self, x: Any) -> None: def test_should_raise_learning_error() -> None: tagged_table = Table({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}).tag_columns("col3") with pytest.raises( - LearningError, match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test", + LearningError, + match=r"Error occurred while learning: Raise ValueError \(LearningError\) in fit for Test", ): fit(MLModelRaiseValueErrorOnFitAndPredict(), tagged_table)