From 002479bac8ac8e404a94445a90bc693b2ffc0a4d Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 13 Jun 2024 15:12:10 +0200 Subject: [PATCH 01/94] optimization for AdaBoost --- src/safeds/exceptions/__init__.py | 4 ++ src/safeds/exceptions/_ml.py | 13 ++++++ .../ml/classical/_bases/_ada_boost_base.py | 16 ++++--- src/safeds/ml/classical/_supervised_model.py | 23 ++++++++++ .../classification/_ada_boost_classifier.py | 25 +++++++++-- .../classical/classification/_classifier.py | 42 +++++++++++++++++-- src/safeds/ml/metrics/__init__.py | 6 +++ src/safeds/ml/metrics/_classifier_metric.py | 8 ++++ src/safeds/ml/metrics/_regressor_metric.py | 7 ++++ .../classification/test_ada_boost.py | 8 ++++ 10 files changed, 141 insertions(+), 11 deletions(-) create mode 100644 src/safeds/ml/metrics/_classifier_metric.py create mode 100644 src/safeds/ml/metrics/_regressor_metric.py diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 8f1e9de6d..a025ef810 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -17,6 +17,8 @@ DatasetMissesDataError, DatasetMissesFeaturesError, FeatureDataMismatchError, + FittingWithChoiceError, + FittingWithoutChoiceError, InputSizeError, InvalidModelStructureError, LearningError, @@ -69,6 +71,8 @@ class OutOfBoundsError(SafeDsError): "DatasetMissesDataError", "DatasetMissesFeaturesError", "FeatureDataMismatchError", + "FittingWithChoiceError", + "FittingWithoutChoiceError", "InputSizeError", "InvalidModelStructureError", "LearningError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index d84395485..97854e0f8 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -22,6 +22,19 @@ def __init__(self) -> None: super().__init__("Dataset contains no rows") +class FittingWithChoiceError(Exception): + """Raised when a model is fitted with a choice object as a parameter.""" + def __init__(self): + super().__init__(f"Error occurred while fitting: Trying to fit with a Choice Parameter. Please use " + f"fit_by_exhaustive_search() instead.") + +class FittingWithoutChoiceError(Exception): + """Raised when a model is fitted by exhaustive search without a choice object as a parameter.""" + def __init__(self): + super().__init__(f"Error occurred while fitting: Trying to fit by exhaustive search without a Choice " + f"Parameter. Please use fit() instead.") + + class LearningError(Exception): """ Raised when an error occurred while training a model. diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index 5cacf341e..3973e8304 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -1,10 +1,12 @@ from __future__ import annotations +import types from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Collection from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _OpenBound +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from safeds.ml.classical import SupervisedModel @@ -18,15 +20,19 @@ class _AdaBoostBase(ABC): @abstractmethod def __init__( self, - max_learner_count: int, + max_learner_count: int | Choice[int], learning_rate: float, ) -> None: # Validation - _check_bounds("max_learner_count", max_learner_count, lower_bound=_ClosedBound(1)) + if isinstance(max_learner_count, Choice): + for value in max_learner_count: + _check_bounds("max_learner_count", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("max_learner_count", max_learner_count, lower_bound=_ClosedBound(1)) _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) # Hyperparameters - self._max_learner_count: int = max_learner_count + self._max_learner_count: int | Choice[int] = max_learner_count self._learning_rate: float = learning_rate def __hash__(self) -> int: @@ -40,7 +46,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def max_learner_count(self) -> int: + def max_learner_count(self) -> int | Choice[int]: """The maximum number of learners in the ensemble.""" return self._max_learner_count diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index a075f1855..c205ec690 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -17,6 +17,7 @@ PlainTableError, PredictionError, ) +from safeds.ml.metrics import ClassifierMetric, RegressorMetric if TYPE_CHECKING: from sklearn.base import ClassifierMixin, RegressorMixin @@ -244,6 +245,18 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N The training data containing the features and target. """ + def _check_additional_fit_by_exhaustive_search_preconditions(self, + training_set: TabularDataset, optimization_metric: ClassifierMetric | RegressorMetric, positive_class: Any = None) -> None: # noqa: B027 + """ + Check additional preconditions for fitting the model by exhaustive search and raise an error if any are + violated. + + Parameters + ---------- + training_set: + The training data containing the features and target. + """ + def _check_additional_predict_preconditions(self, dataset: Table | TabularDataset) -> None: # noqa: B027 """ Check additional preconditions for predicting with the model and raise an error if any are violated. @@ -254,6 +267,16 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase The dataset containing at least the features. """ + def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 + """ + Check additional preconditions for predicting with the model and raise an error if any are violated. + + Returns + ------- + model_list: + A list of every possible model, given all Choice Parameters + """ + @abstractmethod def _clone(self) -> Self: """ diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index e8fa50ae9..6439ec08e 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -1,11 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self, Any from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError, LearningError from safeds.ml.classical._bases import _AdaBoostBase from ._classifier import Classifier +from ...hyperparameters import Choice +from ...metrics import ClassifierMetric if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -40,7 +44,7 @@ def __init__( self, *, learner: Classifier | None = None, - max_learner_count: int = 50, + max_learner_count: int | Choice[int] = 50, learning_rate: float = 1.0, ) -> None: # Initialize superclasses @@ -83,10 +87,25 @@ def _clone(self) -> AdaBoostClassifier: def _get_sklearn_model(self) -> ClassifierMixin: from sklearn.ensemble import AdaBoostClassifier as SklearnAdaBoostClassifier - learner = self.learner._get_sklearn_model() if self.learner is not None else None return SklearnAdaBoostClassifier( estimator=learner, n_estimators=self._max_learner_count, learning_rate=self._learning_rate, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._max_learner_count, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset, optimization_metric: ClassifierMetric, positive_class: Any = None) -> None: + if isinstance(self._max_learner_count, int): + raise FittingWithoutChoiceError + if optimization_metric in {"precision", "recall", "f1score"} and positive_class is None: + raise LearningError(f"Please provide a positive class when using optimization metric {optimization_metric.value}.") + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + for value in self.max_learner_count: + models.append(AdaBoostClassifier(learner=self.learner, max_learner_count=value, learning_rate=self.learning_rate)) + return models diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index c05159d69..569a354bc 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -1,12 +1,12 @@ from __future__ import annotations from abc import ABC -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import ModelNotFittedError +from safeds.exceptions import ModelNotFittedError, PlainTableError, DatasetMissesDataError from safeds.ml.classical import SupervisedModel -from safeds.ml.metrics import ClassificationMetrics +from safeds.ml.metrics import ClassificationMetrics, ClassifierMetric if TYPE_CHECKING: from typing import Any @@ -212,6 +212,42 @@ def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: positive_class, ) + def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_metric: ClassifierMetric, + positive_class: Any = None) -> Self: + if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): + raise PlainTableError + if training_set.to_table().row_count == 0: + raise DatasetMissesDataError + + self._check_additional_fit_by_exhaustive_search_preconditions(training_set, optimization_metric, positive_class) + + #TODO Cross Validation + + list_of_models = self._get_models_for_all_choices() + list_of_fitted_models = [] + for model in list_of_models: + list_of_fitted_models.append(model.fit(training_set)) + + best_model = None + for fitted_model in list_of_fitted_models: + if best_model is None: + best_model = fitted_model + else: + match optimization_metric.value: + case "accuracy": + if fitted_model.accuracy(training_set) > best_model.accuracy(training_set): + best_model = fitted_model + case "precision": + if fitted_model.precision(training_set, positive_class) > best_model.precision(training_set, positive_class): + best_model = fitted_model + case "recall": + if fitted_model.recall(training_set, positive_class) > best_model.recall(training_set, positive_class): + best_model = fitted_model + case "f1score": + if fitted_model.f1_score(training_set, positive_class) > best_model.f1_score(training_set, positive_class): + best_model = fitted_model + return best_model + def _extract_table(table_or_dataset: Table | TabularDataset) -> Table: """Extract the table from the given table or dataset.""" diff --git a/src/safeds/ml/metrics/__init__.py b/src/safeds/ml/metrics/__init__.py index aa465cff0..e430ca7c2 100644 --- a/src/safeds/ml/metrics/__init__.py +++ b/src/safeds/ml/metrics/__init__.py @@ -6,17 +6,23 @@ if TYPE_CHECKING: from ._classification_metrics import ClassificationMetrics + from ._classifier_metric import ClassifierMetric from ._regression_metrics import RegressionMetrics + from ._regressor_metric import RegressorMetric apipkg.initpkg( __name__, { "ClassificationMetrics": "._classification_metrics:ClassificationMetrics", "RegressionMetrics": "._regression_metrics:RegressionMetrics", + "RegressorMetric": "._regressor_metric:RegressorMetric", + "ClassifierMetric": "._classifier_metric:ClassifierMetric", }, ) __all__ = [ + "ClassifierMetric", "ClassificationMetrics", + "RegressorMetric", "RegressionMetrics", ] diff --git a/src/safeds/ml/metrics/_classifier_metric.py b/src/safeds/ml/metrics/_classifier_metric.py new file mode 100644 index 000000000..8af70a951 --- /dev/null +++ b/src/safeds/ml/metrics/_classifier_metric.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class ClassifierMetric(Enum): + ACCURACY = "accuracy" + PRECISION = "precision" + RECALL = "recall" + F1_SCORE = "f1_score" diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py new file mode 100644 index 000000000..db631807e --- /dev/null +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class RegressorMetric(Enum): + MEAN_SQUARED_ERROR = "mean_squared_error" + MEAN_ABSOLUTE_ERROR = "mean_absolute_error" + diff --git a/tests/safeds/ml/classical/classification/test_ada_boost.py b/tests/safeds/ml/classical/classification/test_ada_boost.py index ecfe6f3f4..3e807fbef 100644 --- a/tests/safeds/ml/classical/classification/test_ada_boost.py +++ b/tests/safeds/ml/classical/classification/test_ada_boost.py @@ -3,6 +3,8 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import AdaBoostClassifier +from safeds.ml.hyperparameters import Choice +from safeds.ml.metrics import ClassifierMetric @pytest.fixture() @@ -39,6 +41,12 @@ def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int) - with pytest.raises(OutOfBoundsError): AdaBoostClassifier(max_learner_count=max_learner_count) + def test_choice_(self): + model = AdaBoostClassifier(max_learner_count=Choice(10, 20, 300, 413, 2000, 1456, 6666, 81, 9321, 2422)) + model = model.fit_by_exhaustive_search(Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}).to_tabular_dataset("col1"), ClassifierMetric.PRECISION, 2) + print(model.max_learner_count) + pred = model.predict(Table.from_dict({"col2": [10, 20, 30, -40]})) + print(pred) class TestLearningRate: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: From 5c41c51ac7b2e0598b83b78e593d1261a88861fd Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 16 Jun 2024 16:59:36 +0200 Subject: [PATCH 02/94] small change --- src/safeds/ml/classical/_supervised_model.py | 3 +-- .../ml/classical/classification/_ada_boost_classifier.py | 5 ++--- src/safeds/ml/classical/classification/_classifier.py | 7 +++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index c205ec690..f562d3378 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -245,8 +245,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N The training data containing the features and target. """ - def _check_additional_fit_by_exhaustive_search_preconditions(self, - training_set: TabularDataset, optimization_metric: ClassifierMetric | RegressorMetric, positive_class: Any = None) -> None: # noqa: B027 + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: # noqa: B027 """ Check additional preconditions for fitting the model by exhaustive search and raise an error if any are violated. diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 6439ec08e..39bce86cc 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -98,11 +98,10 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N if isinstance(self._max_learner_count, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset, optimization_metric: ClassifierMetric, positive_class: Any = None) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: if isinstance(self._max_learner_count, int): raise FittingWithoutChoiceError - if optimization_metric in {"precision", "recall", "f1score"} and positive_class is None: - raise LearningError(f"Please provide a positive class when using optimization metric {optimization_metric.value}.") + def _get_models_for_all_choices(self) -> list[Self]: models = [] diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 569a354bc..74eed5c31 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import ModelNotFittedError, PlainTableError, DatasetMissesDataError +from safeds.exceptions import ModelNotFittedError, PlainTableError, DatasetMissesDataError, LearningError from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import ClassificationMetrics, ClassifierMetric @@ -218,11 +218,14 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me raise PlainTableError if training_set.to_table().row_count == 0: raise DatasetMissesDataError + if optimization_metric in {"precision", "recall", "f1score"} and positive_class is None: + raise LearningError(f"Please provide a positive class when using optimization metric {optimization_metric.value}.") - self._check_additional_fit_by_exhaustive_search_preconditions(training_set, optimization_metric, positive_class) + self._check_additional_fit_by_exhaustive_search_preconditions(training_set) #TODO Cross Validation + #TODO Multiprocessing list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] for model in list_of_models: From 89139c28cb94563b7e0843ea4bcd0bd2b01619f6 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 16 Jun 2024 19:08:07 +0200 Subject: [PATCH 03/94] add adaboost tests --- .../classification/_ada_boost_classifier.py | 2 +- .../classical/classification/_classifier.py | 4 +- .../classification/test_ada_boost.py | 40 +++++++++++++++---- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 39bce86cc..50ff2785a 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -105,6 +105,6 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: def _get_models_for_all_choices(self) -> list[Self]: models = [] - for value in self.max_learner_count: + for value in self._max_learner_count: models.append(AdaBoostClassifier(learner=self.learner, max_learner_count=value, learning_rate=self.learning_rate)) return models diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 74eed5c31..a8480e2bb 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -218,8 +218,8 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me raise PlainTableError if training_set.to_table().row_count == 0: raise DatasetMissesDataError - if optimization_metric in {"precision", "recall", "f1score"} and positive_class is None: - raise LearningError(f"Please provide a positive class when using optimization metric {optimization_metric.value}.") + if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None: + raise LearningError(f"Please provide a positive class when using optimization metric '{optimization_metric.value}'") self._check_additional_fit_by_exhaustive_search_preconditions(training_set) diff --git a/tests/safeds/ml/classical/classification/test_ada_boost.py b/tests/safeds/ml/classical/classification/test_ada_boost.py index 3e807fbef..4e40142a2 100644 --- a/tests/safeds/ml/classical/classification/test_ada_boost.py +++ b/tests/safeds/ml/classical/classification/test_ada_boost.py @@ -1,7 +1,7 @@ import pytest from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.exceptions import OutOfBoundsError +from safeds.exceptions import OutOfBoundsError, FittingWithChoiceError, FittingWithoutChoiceError, LearningError from safeds.ml.classical.classification import AdaBoostClassifier from safeds.ml.hyperparameters import Choice from safeds.ml.metrics import ClassifierMetric @@ -26,6 +26,33 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert isinstance(fitted_model._wrapped_model.estimator, type(learner._get_sklearn_model())) +class TestChoice: + def test_should_raise_if_model_is_fitted_with_choice(self, training_set: TabularDataset) -> None: + with pytest.raises(FittingWithChoiceError): + AdaBoostClassifier(max_learner_count=Choice(1, 2, 3)).fit(training_set) + + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, + training_set: TabularDataset) -> None: + with pytest.raises(FittingWithoutChoiceError): + AdaBoostClassifier(max_learner_count=2).fit_by_exhaustive_search(training_set, + optimization_metric=ClassifierMetric.ACCURACY) + + def test_should_raise_if_no_positive_class_is_provided(self, training_set: TabularDataset) -> None: + with pytest.raises(LearningError): + AdaBoostClassifier(max_learner_count=Choice(1, 2)).fit_by_exhaustive_search(training_set, + optimization_metric=ClassifierMetric.PRECISION) + + def test_workflow_with_choice_parameter(self): + model = AdaBoostClassifier(max_learner_count=Choice(1, 2)) + model = model.fit_by_exhaustive_search( + Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}).to_tabular_dataset("col1"), + ClassifierMetric.ACCURACY) + assert isinstance(model, AdaBoostClassifier) + pred = model.predict(Table.from_dict({"col2": [10, 20, 30, 40]})) + assert isinstance(pred, TabularDataset) + + + class TestMaxLearnerCount: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: fitted_model = AdaBoostClassifier(max_learner_count=2).fit(training_set) @@ -41,12 +68,11 @@ def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int) - with pytest.raises(OutOfBoundsError): AdaBoostClassifier(max_learner_count=max_learner_count) - def test_choice_(self): - model = AdaBoostClassifier(max_learner_count=Choice(10, 20, 300, 413, 2000, 1456, 6666, 81, 9321, 2422)) - model = model.fit_by_exhaustive_search(Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}).to_tabular_dataset("col1"), ClassifierMetric.PRECISION, 2) - print(model.max_learner_count) - pred = model.predict(Table.from_dict({"col2": [10, 20, 30, -40]})) - print(pred) + @pytest.mark.parametrize("max_learner_count", [Choice(-1, 1), Choice(1, 0)], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0_in_choice(self, max_learner_count: Choice[int]) -> None: + with pytest.raises(OutOfBoundsError): + AdaBoostClassifier(max_learner_count=max_learner_count) + class TestLearningRate: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: From 701655e98852fd0079b8706eacd5209fa47a846a Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 01:40:48 +0200 Subject: [PATCH 04/94] add decisiontree tests and move some tests to classifier class --- .../classical/_bases/_decision_tree_base.py | 29 ++++++----- .../_decision_tree_classifier.py | 31 +++++++++-- .../classification/test_ada_boost.py | 39 ++------------ .../classification/test_classifier.py | 52 ++++++++++++++++++- .../classification/test_decision_tree.py | 11 ++-- 5 files changed, 106 insertions(+), 56 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_decision_tree_base.py b/src/safeds/ml/classical/_bases/_decision_tree_base.py index 0b5d22823..f200b5ca3 100644 --- a/src/safeds/ml/classical/_bases/_decision_tree_base.py +++ b/src/safeds/ml/classical/_bases/_decision_tree_base.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound +from safeds.ml.hyperparameters import Choice class _DecisionTreeBase(ABC): @@ -14,20 +15,24 @@ class _DecisionTreeBase(ABC): @abstractmethod def __init__( self, - max_depth: int | None, - min_sample_count_in_leaves: int, + max_depth: int | Choice[int] | None, + min_sample_count_in_leaves: int | Choice[int], ) -> None: # Validation - _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) - _check_bounds( - "min_sample_count_in_leaves", - min_sample_count_in_leaves, - lower_bound=_ClosedBound(1), - ) + if isinstance(max_depth, Choice): + for value in max_depth: + _check_bounds("max_depth", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) + if isinstance(min_sample_count_in_leaves, Choice): + for value in min_sample_count_in_leaves: + _check_bounds("min_sample_count_in_leaves", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("min_sample_count_in_leaves", min_sample_count_in_leaves, lower_bound=_ClosedBound(1)) # Hyperparameters - self._max_depth: int | None = max_depth - self._min_sample_count_in_leaves: int = min_sample_count_in_leaves + self._max_depth: int | Choice[int] | None = max_depth + self._min_sample_count_in_leaves: int | Choice[int] = min_sample_count_in_leaves def __hash__(self) -> int: return _structural_hash( @@ -40,11 +45,11 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def max_depth(self) -> int | None: + def max_depth(self) -> int | Choice[int] | None: """The maximum depth of the tree.""" return self._max_depth @property - def min_sample_count_in_leaves(self) -> int: + def min_sample_count_in_leaves(self) -> int | Choice[int]: """The minimum number of samples that must remain in the leaves of the tree.""" return self._min_sample_count_in_leaves diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 9821573d6..7bd2169a2 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase from ._classifier import Classifier +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -37,8 +40,8 @@ class DecisionTreeClassifier(Classifier, _DecisionTreeBase): def __init__( self, *, - max_depth: int | None = None, - min_sample_count_in_leaves: int = 1, + max_depth: int | Choice[int] | None = None, + min_sample_count_in_leaves: int | Choice[int] = 1, ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -71,3 +74,25 @@ def _get_sklearn_model(self) -> ClassifierMixin: max_depth=self._max_depth, min_samples_leaf=self._min_sample_count_in_leaves, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._max_depth, int) or isinstance(self._min_sample_count_in_leaves, int): + raise FittingWithChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + if isinstance(self._max_depth, Choice) and isinstance(self._min_sample_count_in_leaves, Choice): + for max_depth in self._max_depth: + for min_sample in self._min_sample_count_in_leaves: + models.append(DecisionTreeClassifier(max_depth=max_depth, min_sample_count_in_leaves=min_sample)) + elif isinstance(self._max_depth, Choice): + for max_depth in self._max_depth: + models.append(DecisionTreeClassifier(max_depth=max_depth, min_sample_count_in_leaves=self._min_sample_count_in_leaves)) + else: # _min_sample_count_in_leaves is a Choice + for min_sample in self._min_sample_count_in_leaves: + models.append(DecisionTreeClassifier(max_depth=self._max_depth, min_sample_count_in_leaves=min_sample)) + return models diff --git a/tests/safeds/ml/classical/classification/test_ada_boost.py b/tests/safeds/ml/classical/classification/test_ada_boost.py index 4e40142a2..0df5cf73f 100644 --- a/tests/safeds/ml/classical/classification/test_ada_boost.py +++ b/tests/safeds/ml/classical/classification/test_ada_boost.py @@ -1,10 +1,9 @@ import pytest from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.exceptions import OutOfBoundsError, FittingWithChoiceError, FittingWithoutChoiceError, LearningError +from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import AdaBoostClassifier from safeds.ml.hyperparameters import Choice -from safeds.ml.metrics import ClassifierMetric @pytest.fixture() @@ -26,33 +25,6 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert isinstance(fitted_model._wrapped_model.estimator, type(learner._get_sklearn_model())) -class TestChoice: - def test_should_raise_if_model_is_fitted_with_choice(self, training_set: TabularDataset) -> None: - with pytest.raises(FittingWithChoiceError): - AdaBoostClassifier(max_learner_count=Choice(1, 2, 3)).fit(training_set) - - def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, - training_set: TabularDataset) -> None: - with pytest.raises(FittingWithoutChoiceError): - AdaBoostClassifier(max_learner_count=2).fit_by_exhaustive_search(training_set, - optimization_metric=ClassifierMetric.ACCURACY) - - def test_should_raise_if_no_positive_class_is_provided(self, training_set: TabularDataset) -> None: - with pytest.raises(LearningError): - AdaBoostClassifier(max_learner_count=Choice(1, 2)).fit_by_exhaustive_search(training_set, - optimization_metric=ClassifierMetric.PRECISION) - - def test_workflow_with_choice_parameter(self): - model = AdaBoostClassifier(max_learner_count=Choice(1, 2)) - model = model.fit_by_exhaustive_search( - Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}).to_tabular_dataset("col1"), - ClassifierMetric.ACCURACY) - assert isinstance(model, AdaBoostClassifier) - pred = model.predict(Table.from_dict({"col2": [10, 20, 30, 40]})) - assert isinstance(pred, TabularDataset) - - - class TestMaxLearnerCount: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: fitted_model = AdaBoostClassifier(max_learner_count=2).fit(training_set) @@ -63,13 +35,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("max_learner_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int) -> None: - with pytest.raises(OutOfBoundsError): - AdaBoostClassifier(max_learner_count=max_learner_count) - - @pytest.mark.parametrize("max_learner_count", [Choice(-1, 1), Choice(1, 0)], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0_in_choice(self, max_learner_count: Choice[int]) -> None: + @pytest.mark.parametrize("max_learner_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): AdaBoostClassifier(max_learner_count=max_learner_count) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 52d6a926d..b479184db 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -12,7 +12,7 @@ MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, + PlainTableError, FittingWithoutChoiceError, LearningError, FittingWithChoiceError, ) from safeds.ml.classical.classification import ( AdaBoostClassifier, @@ -24,6 +24,8 @@ RandomForestClassifier, SupportVectorClassifier, ) +from safeds.ml.hyperparameters import Choice +from safeds.ml.metrics import ClassifierMetric if TYPE_CHECKING: from _pytest.fixtures import FixtureRequest @@ -53,6 +55,29 @@ def classifiers() -> list[Classifier]: ] +def classifiers_with_choices() -> list[Classifier]: + """ + Return the list of classifiers with Choices as Parameters to test choice functionality. + + After you implemented a new classifier, add it to this list to ensure its `fit_by_exhaustive_search` method works as + expected. Place tests of methods that are specific to your classifier in a separate test file. + + Returns + ------- + classifiers : list[Classifier] + The list of classifiers to test. + """ + return [ + AdaBoostClassifier(max_learner_count=Choice(1, 2)), + DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), + GradientBoostingClassifier(), #TODO + KNearestNeighborsClassifier(2), #TODO + LogisticClassifier(), #TODO + RandomForestClassifier(), #TODO + SupportVectorClassifier(), #TODO + ] + + @pytest.fixture() def valid_data() -> TabularDataset: return Table( @@ -65,6 +90,31 @@ def valid_data() -> TabularDataset: ).to_tabular_dataset(target_name="target", extra_names=["id"]) +@pytest.mark.parametrize("classifier_with_choice", classifiers_with_choices(), ids=lambda x: x.__class__.__name__) +class TestChoiceClassifiers: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, + classifier_with_choice: Classifier, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithoutChoiceError): + classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) + + def test_should_raise_if_no_positive_class_is_provided(self, classifier_with_choice: Classifier, + valid_data: TabularDataset) -> None: + with pytest.raises(LearningError): + classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.PRECISION) + + def test_workflow_with_choice_parameter(self, classifier_with_choice: Classifier, valid_data: TabularDataset): + model = classifier_with_choice.fit_by_exhaustive_search(valid_data, ClassifierMetric.ACCURACY) + assert isinstance(model, type(classifier_with_choice)) + pred = model.predict(valid_data) + assert isinstance(pred, TabularDataset) + + def test_should_raise_if_model_is_fitted_with_choice(self, classifier_with_choice: Classifier, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithChoiceError): + classifier_with_choice.fit(valid_data) + + @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, classifier: Classifier, valid_data: TabularDataset) -> None: diff --git a/tests/safeds/ml/classical/classification/test_decision_tree.py b/tests/safeds/ml/classical/classification/test_decision_tree.py index f1c35c6be..8be06793b 100644 --- a/tests/safeds/ml/classical/classification/test_decision_tree.py +++ b/tests/safeds/ml/classical/classification/test_decision_tree.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import DecisionTreeClassifier +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.max_depth == 2 - @pytest.mark.parametrize("max_depth", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int) -> None: + @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeClassifier(max_depth=max_depth) @@ -37,7 +38,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int) -> None: + @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeClassifier(min_sample_count_in_leaves=min_sample_count_in_leaves) + + From 8f1d5767d0a59ad61e17210d78debb72c138520d Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 02:09:51 +0200 Subject: [PATCH 05/94] add learning rate choice for adaboost and refactor --- .../ml/classical/_bases/_ada_boost_base.py | 13 +++++++---- .../classification/_ada_boost_classifier.py | 17 +++++++++----- .../_decision_tree_classifier.py | 22 +++++++++---------- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index 3973e8304..04f615b46 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -21,7 +21,7 @@ class _AdaBoostBase(ABC): def __init__( self, max_learner_count: int | Choice[int], - learning_rate: float, + learning_rate: float | Choice[float], ) -> None: # Validation if isinstance(max_learner_count, Choice): @@ -29,11 +29,16 @@ def __init__( _check_bounds("max_learner_count", value, lower_bound=_ClosedBound(1)) else: _check_bounds("max_learner_count", max_learner_count, lower_bound=_ClosedBound(1)) - _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) + + if isinstance(learning_rate, Choice): + for value in learning_rate: + _check_bounds("learning_rate", value, lower_bound=_OpenBound(0)) + else: + _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) # Hyperparameters self._max_learner_count: int | Choice[int] = max_learner_count - self._learning_rate: float = learning_rate + self._learning_rate: float | Choice[float] = learning_rate def __hash__(self) -> int: return _structural_hash( @@ -51,7 +56,7 @@ def max_learner_count(self) -> int | Choice[int]: return self._max_learner_count @property - def learning_rate(self) -> float: + def learning_rate(self) -> float | Choice[float]: """The learning rate.""" return self._learning_rate diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 50ff2785a..f048668ce 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -45,7 +45,7 @@ def __init__( *, learner: Classifier | None = None, max_learner_count: int | Choice[int] = 50, - learning_rate: float = 1.0, + learning_rate: float | Choice[float] = 1.0, ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -95,16 +95,21 @@ def _get_sklearn_model(self) -> ClassifierMixin: ) def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: - if isinstance(self._max_learner_count, Choice): + if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: - if isinstance(self._max_learner_count, int): + if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ + self._max_learner_count] + learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ + self._learning_rate] + models = [] - for value in self._max_learner_count: - models.append(AdaBoostClassifier(learner=self.learner, max_learner_count=value, learning_rate=self.learning_rate)) + for mlc in max_learner_count_choices: + for lr in learning_rate_choices: + models.append(AdaBoostClassifier(max_learner_count=mlc, learning_rate=lr)) return models diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 7bd2169a2..ca1dfc3d1 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -80,19 +80,17 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: - if isinstance(self._max_depth, int) or isinstance(self._min_sample_count_in_leaves, int): - raise FittingWithChoiceError + if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): + raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[Self]: + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ + self._max_depth] + min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ + self._min_sample_count_in_leaves] + models = [] - if isinstance(self._max_depth, Choice) and isinstance(self._min_sample_count_in_leaves, Choice): - for max_depth in self._max_depth: - for min_sample in self._min_sample_count_in_leaves: - models.append(DecisionTreeClassifier(max_depth=max_depth, min_sample_count_in_leaves=min_sample)) - elif isinstance(self._max_depth, Choice): - for max_depth in self._max_depth: - models.append(DecisionTreeClassifier(max_depth=max_depth, min_sample_count_in_leaves=self._min_sample_count_in_leaves)) - else: # _min_sample_count_in_leaves is a Choice - for min_sample in self._min_sample_count_in_leaves: - models.append(DecisionTreeClassifier(max_depth=self._max_depth, min_sample_count_in_leaves=min_sample)) + for md in max_depth_choices: + for msc in min_sample_count_choices: + models.append(DecisionTreeClassifier(max_depth=md, min_sample_count_in_leaves=msc)) return models From 3b31cb6c7a6739261af5456d3e7838e33cbdbd3d Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 02:21:03 +0200 Subject: [PATCH 06/94] add gradientboosting --- .../_bases/_gradient_boosting_base.py | 22 ++++++++++----- .../_gradient_boosting_classifier.py | 27 +++++++++++++++++-- .../classification/test_gradient_boosting.py | 9 ++++--- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py index 63a1370a0..7758606cd 100644 --- a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py +++ b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _OpenBound +from safeds.ml.hyperparameters import Choice class _GradientBoostingBase(ABC): @@ -14,12 +15,21 @@ class _GradientBoostingBase(ABC): @abstractmethod def __init__( self, - tree_count: int, - learning_rate: float, + tree_count: int | Choice[int], + learning_rate: float | Choice[float], ) -> None: # Validation - _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) - _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) + if isinstance(tree_count, Choice): + for value in tree_count: + _check_bounds("tree_count", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) + + if isinstance(learning_rate, Choice): + for value in learning_rate: + _check_bounds("learning_rate", value, lower_bound=_OpenBound(0)) + else: + _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) # Hyperparameters self._tree_count = tree_count @@ -36,11 +46,11 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def tree_count(self) -> int: + def tree_count(self) -> int | Choice[int]: """The number of trees (estimators) in the ensemble.""" return self._tree_count @property - def learning_rate(self) -> float: + def learning_rate(self) -> float | Choice[float]: """The learning rate.""" return self._learning_rate diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index f2b78bace..5b0b16b63 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -3,9 +3,12 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase from ._classifier import Classifier +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -37,8 +40,8 @@ class GradientBoostingClassifier(Classifier, _GradientBoostingBase): def __init__( self, *, - tree_count: int = 100, - learning_rate: float = 0.1, + tree_count: int | Choice[int] = 100, + learning_rate: float | Choice[float] = 0.1, ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -71,3 +74,23 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_estimators=self._tree_count, learning_rate=self._learning_rate, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ + self._tree_count] + learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ + self._learning_rate] + + models = [] + for tc in tree_count_choices: + for lr in learning_rate_choices: + models.append(GradientBoostingClassifier(tree_count=tc, learning_rate=lr)) + return models diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting.py b/tests/safeds/ml/classical/classification/test_gradient_boosting.py index 31f62d822..14aae9aa2 100644 --- a/tests/safeds/ml/classical/classification/test_gradient_boosting.py +++ b/tests/safeds/ml/classical/classification/test_gradient_boosting.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import GradientBoostingClassifier +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("tree_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_1(self, tree_count: int) -> None: + @pytest.mark.parametrize("tree_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_1(self, tree_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): GradientBoostingClassifier(tree_count=tree_count) @@ -37,7 +38,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.learning_rate == 2 - @pytest.mark.parametrize("learning_rate", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float) -> None: + @pytest.mark.parametrize("learning_rate", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): GradientBoostingClassifier(learning_rate=learning_rate) From 4c4595d45d2e349c459d8baa6cba79a5a537516d Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 02:24:11 +0200 Subject: [PATCH 07/94] adjust test file --- tests/safeds/ml/classical/classification/test_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index b479184db..21b130992 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -68,9 +68,9 @@ def classifiers_with_choices() -> list[Classifier]: The list of classifiers to test. """ return [ - AdaBoostClassifier(max_learner_count=Choice(1, 2)), + AdaBoostClassifier(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), - GradientBoostingClassifier(), #TODO + GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(2), #TODO LogisticClassifier(), #TODO RandomForestClassifier(), #TODO From b5ecbf57a3309f61ff2372b4ec03e6f5451652a8 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 02:37:16 +0200 Subject: [PATCH 08/94] add knearestneighbors --- .../_bases/_k_nearest_neighbors_base.py | 11 ++++-- .../_k_nearest_neighbors_classifier.py | 36 +++++++++++++------ .../classification/test_classifier.py | 3 +- .../test_k_nearest_neighbors.py | 5 +-- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py index 2113c4d9e..2d6527d38 100644 --- a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py +++ b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound +from safeds.ml.hyperparameters import Choice class _KNearestNeighborsBase(ABC): @@ -14,10 +15,14 @@ class _KNearestNeighborsBase(ABC): @abstractmethod def __init__( self, - neighbor_count: int, + neighbor_count: int | Choice[int], ) -> None: # Validation - _check_bounds("neighbor_count", neighbor_count, lower_bound=_ClosedBound(1)) + if isinstance(neighbor_count, Choice): + for value in neighbor_count: + _check_bounds("neighbor_count", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("neighbor_count", neighbor_count, lower_bound=_ClosedBound(1)) # Hyperparameters self._neighbor_count = neighbor_count @@ -32,6 +37,6 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def neighbor_count(self) -> int: + def neighbor_count(self) -> int | Choice[int]: """The number of neighbors used for interpolation.""" return self._neighbor_count diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 0dabbffc0..a69959d8e 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -1,11 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _KNearestNeighborsBase from ._classifier import Classifier +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -35,7 +37,7 @@ class KNearestNeighborsClassifier(Classifier, _KNearestNeighborsBase): def __init__( self, - neighbor_count: int, + neighbor_count: int | Choice[int], ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -54,15 +56,6 @@ def __hash__(self) -> int: # Template methods # ------------------------------------------------------------------------------------------------------------------ - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: - if self._neighbor_count > training_set._table.row_count: - raise ValueError( - ( - f"The parameter 'neighbor_count' ({self._neighbor_count}) has to be less than or equal to" - f" the sample size ({training_set._table.row_count})." - ), - ) - def _clone(self) -> KNearestNeighborsClassifier: return KNearestNeighborsClassifier( neighbor_count=self._neighbor_count, @@ -75,3 +68,24 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_neighbors=self._neighbor_count, n_jobs=-1, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._neighbor_count, Choice): + raise FittingWithChoiceError + if self._neighbor_count > training_set._table.row_count: + raise ValueError( + ( + f"The parameter 'neighbor_count' ({self._neighbor_count}) has to be less than or equal to" + f" the sample size ({training_set._table.row_count})." + ), + ) + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._neighbor_count, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + for nc in self._neighbor_count: + models.append(KNearestNeighborsClassifier(neighbor_count=nc)) + return models diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 21b130992..74af9151a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -71,8 +71,7 @@ def classifiers_with_choices() -> list[Classifier]: AdaBoostClassifier(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), - KNearestNeighborsClassifier(2), #TODO - LogisticClassifier(), #TODO + KNearestNeighborsClassifier(neighbor_count=Choice(2, 5)), RandomForestClassifier(), #TODO SupportVectorClassifier(), #TODO ] diff --git a/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py b/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py index 1cd420ea8..66437d782 100644 --- a/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/classical/classification/test_k_nearest_neighbors.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import KNearestNeighborsClassifier +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_neighbors == 2 - @pytest.mark.parametrize("neighbor_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, neighbor_count: int) -> None: + @pytest.mark.parametrize("neighbor_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, neighbor_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): KNearestNeighborsClassifier(neighbor_count=neighbor_count) From 1b62a6e2d550ab72002c692a4853db768c2a18bf Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 03:00:43 +0200 Subject: [PATCH 09/94] add randomforest --- .../classical/_bases/_random_forest_base.py | 43 ++++++++++++------- .../_random_forest_classifier.py | 33 ++++++++++++-- .../classification/test_classifier.py | 2 +- .../classification/test_random_forest.py | 13 +++--- 4 files changed, 65 insertions(+), 26 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_random_forest_base.py b/src/safeds/ml/classical/_bases/_random_forest_base.py index 56786e6ad..32eb70641 100644 --- a/src/safeds/ml/classical/_bases/_random_forest_base.py +++ b/src/safeds/ml/classical/_bases/_random_forest_base.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound +from safeds.ml.hyperparameters import Choice class _RandomForestBase(ABC): @@ -14,23 +15,33 @@ class _RandomForestBase(ABC): @abstractmethod def __init__( self, - tree_count: int, - max_depth: int | None, - min_sample_count_in_leaves: int, + tree_count: int | Choice[int], + max_depth: int | Choice[int] | None, + min_sample_count_in_leaves: int | Choice[int], ) -> None: # Validation - _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) - _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) - _check_bounds( - "min_sample_count_in_leaves", - min_sample_count_in_leaves, - lower_bound=_ClosedBound(1), - ) + if isinstance(tree_count, Choice): + for value in tree_count: + _check_bounds("tree_count", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) + + if isinstance(max_depth, Choice): + for value in max_depth: + _check_bounds("max_depth", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) + + if isinstance(min_sample_count_in_leaves, Choice): + for value in min_sample_count_in_leaves: + _check_bounds("min_sample_count_in_leaves", value, lower_bound=_ClosedBound(1)) + else: + _check_bounds("min_sample_count_in_leaves", min_sample_count_in_leaves, lower_bound=_ClosedBound(1)) # Hyperparameters - self._tree_count: int = tree_count - self._max_depth: int | None = max_depth - self._min_sample_count_in_leaves: int = min_sample_count_in_leaves + self._tree_count: int | Choice[int] = tree_count + self._max_depth: int | Choice[int] | None = max_depth + self._min_sample_count_in_leaves: int | Choice[int] = min_sample_count_in_leaves def __hash__(self) -> int: return _structural_hash( @@ -44,16 +55,16 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def tree_count(self) -> int: + def tree_count(self) -> int | Choice[int]: """The number of trees used in the random forest.""" return self._tree_count @property - def max_depth(self) -> int | None: + def max_depth(self) -> int | Choice[int] | None: """The maximum depth of each tree.""" return self._max_depth @property - def min_sample_count_in_leaves(self) -> int: + def min_sample_count_in_leaves(self) -> int | Choice[int]: """The minimum number of samples that must remain in the leaves of each tree.""" return self._min_sample_count_in_leaves diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 3603ab292..4ea862066 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -3,9 +3,12 @@ from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase from ._classifier import Classifier +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -41,9 +44,9 @@ class RandomForestClassifier(Classifier, _RandomForestBase): def __init__( self, *, - tree_count: int = 100, - max_depth: int | None = None, - min_sample_count_in_leaves: int = 1, + tree_count: int | Choice[int] = 100, + max_depth: int | Choice[int] | None = None, + min_sample_count_in_leaves: int | Choice[int] = 1, ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -81,3 +84,27 @@ def _get_sklearn_model(self) -> ClassifierMixin: random_state=_get_random_seed(), n_jobs=-1, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( + self._min_sample_count_in_leaves, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance( + self._min_sample_count_in_leaves, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ + self._tree_count] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ + self._max_depth] + min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [self._min_sample_count_in_leaves] + + models = [] + for tc in tree_count_choices: + for md in max_depth_choices: + for msc in min_sample_count_choices: + models.append(RandomForestClassifier(tree_count=tc, max_depth=md, min_sample_count_in_leaves=msc)) + return models diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 74af9151a..fa6c58349 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -72,7 +72,7 @@ def classifiers_with_choices() -> list[Classifier]: DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(2, 5)), - RandomForestClassifier(), #TODO + RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorClassifier(), #TODO ] diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index 2fe1950a2..d71590104 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import RandomForestClassifier +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("tree_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, tree_count: int) -> None: + @pytest.mark.parametrize("tree_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, tree_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestClassifier(tree_count=tree_count) @@ -37,8 +38,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.max_depth == 2 - @pytest.mark.parametrize("max_depth", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int) -> None: + @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestClassifier(max_depth=max_depth) @@ -53,7 +54,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int) -> None: + @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestClassifier(min_sample_count_in_leaves=min_sample_count_in_leaves) From 6cecab7d553ff73e09ef2deaea0ca0a6f7010175 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 13:53:30 +0200 Subject: [PATCH 10/94] add svm --- .../_bases/_support_vector_machine_base.py | 13 ++++++++---- .../_support_vector_classifier.py | 21 +++++++++++++++++-- .../test_support_vector_machine.py | 5 +++-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py index fc85a4b58..091f690aa 100644 --- a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py +++ b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py @@ -6,6 +6,7 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _OpenBound +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.svm import SVC as SklearnSVC # noqa: N811 @@ -76,17 +77,21 @@ def sigmoid() -> _SupportVectorMachineBase.Kernel: @abstractmethod def __init__( self, - c: float, + c: float | Choice[float], kernel: _SupportVectorMachineBase.Kernel | None, ) -> None: if kernel is None: kernel = _SupportVectorMachineBase.Kernel.radial_basis_function() # Validation - _check_bounds("c", c, lower_bound=_OpenBound(0)) + if isinstance(c, Choice): + for value in c: + _check_bounds("c", value, lower_bound=_OpenBound(0)) + else: + _check_bounds("c", c, lower_bound=_OpenBound(0)) # Hyperparameters - self._c: float = c + self._c: float | Choice[float] = c self._kernel: _SupportVectorMachineBase.Kernel = kernel def __hash__(self) -> int: @@ -100,7 +105,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def c(self) -> float: + def c(self) -> float | Choice[float]: """The regularization strength.""" return self._c diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 407c8f97a..7753f104a 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -1,10 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _SupportVectorMachineBase from safeds.ml.classical.classification import Classifier +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -34,7 +37,7 @@ class SupportVectorClassifier(Classifier, _SupportVectorMachineBase): def __init__( self, *, - c: float = 1.0, + c: float | Choice[float] = 1.0, kernel: SupportVectorClassifier.Kernel | None = None, ) -> None: # Initialize superclasses @@ -79,3 +82,17 @@ def _get_sklearn_model(self) -> ClassifierMixin: ) self._kernel._apply(result) return result + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._c, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._c, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + for c in self._c: + models.append(SupportVectorClassifier(c=c)) + return models diff --git a/tests/safeds/ml/classical/classification/test_support_vector_machine.py b/tests/safeds/ml/classical/classification/test_support_vector_machine.py index a601d5cf8..6fae902ed 100644 --- a/tests/safeds/ml/classical/classification/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/classification/test_support_vector_machine.py @@ -6,6 +6,7 @@ from safeds.exceptions import OutOfBoundsError from safeds.ml.classical._bases._support_vector_machine_base import _Linear, _Polynomial from safeds.ml.classical.classification import SupportVectorClassifier +from safeds.ml.hyperparameters import Choice def kernels() -> list[SupportVectorClassifier.Kernel]: @@ -44,8 +45,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.C == 2 - @pytest.mark.parametrize("c", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, c: float) -> None: + @pytest.mark.parametrize("c", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, c: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): SupportVectorClassifier(c=c) From 95d693a504a2324865727fa49d1e7446f047bcc4 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 17 Jun 2024 13:57:48 +0200 Subject: [PATCH 11/94] add svm --- tests/safeds/ml/classical/classification/test_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index fa6c58349..69e30b4c8 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -73,7 +73,7 @@ def classifiers_with_choices() -> list[Classifier]: GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(2, 5)), RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), - SupportVectorClassifier(), #TODO + SupportVectorClassifier(c=Choice(0.5, 1.0)), ] From 20cc993d132cc1dd2afbdf08713b1897df41be5b Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:10:59 +0200 Subject: [PATCH 12/94] add ada boost regression --- .../regression/_ada_boost_regressor.py | 29 ++++++++++-- .../ml/classical/regression/_regressor.py | 43 ++++++++++++++++-- src/safeds/ml/metrics/_regressor_metric.py | 2 + .../classification/test_ada_boost.py | 4 +- .../ml/classical/regression/test_ada_boost.py | 9 ++-- .../ml/classical/regression/test_regressor.py | 45 ++++++++++++++++++- 6 files changed, 119 insertions(+), 13 deletions(-) diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 25f509229..0d13308e8 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -40,8 +43,8 @@ def __init__( self, *, learner: Regressor | None = None, - max_learner_count: int = 50, - learning_rate: float = 1.0, + max_learner_count: int | Choice[int] = 50 , + learning_rate: float | Choice[float] = 1.0, ) -> None: # Initialize superclasses Regressor.__init__(self) @@ -90,3 +93,23 @@ def _get_sklearn_model(self) -> RegressorMixin: n_estimators=self._max_learner_count, learning_rate=self._learning_rate, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ + self._max_learner_count] + learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ + self._learning_rate] + + models = [] + for mlc in max_learner_count_choices: + for lr in learning_rate_choices: + models.append(AdaBoostRegressor(max_learner_count=mlc, learning_rate=lr)) + return models diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index ce08bd506..646f0c925 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -1,12 +1,13 @@ from __future__ import annotations from abc import ABC -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError +from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, LearningError, \ + DatasetMissesDataError from safeds.ml.classical import SupervisedModel -from safeds.ml.metrics import RegressionMetrics +from safeds.ml.metrics import RegressionMetrics, RegressorMetric if TYPE_CHECKING: from safeds.data.tabular.containers import Column, Table @@ -244,6 +245,41 @@ def median_absolute_deviation(self, validation_or_test_set: Table | TabularDatas validation_or_test_set.get_column(self.get_target_name()), ) + def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_metric: RegressorMetric) -> Self: + if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): + raise PlainTableError + if training_set.to_table().row_count == 0: + raise DatasetMissesDataError + + self._check_additional_fit_by_exhaustive_search_preconditions(training_set) + + # TODO Cross Validation + + # TODO Multiprocessing + list_of_models = self._get_models_for_all_choices() + list_of_fitted_models = [] + for model in list_of_models: + list_of_fitted_models.append(model.fit(training_set)) + + best_model = None + for fitted_model in list_of_fitted_models: + if best_model is None: + best_model = fitted_model + else: + match optimization_metric.value: + case "mean_squared_error": + if fitted_model.mean_squared_error(training_set) < best_model.mean_squared_error(training_set): + best_model = fitted_model + case "mean_absolute_error": + if fitted_model.mean_absolute_error(training_set) < best_model.mean_absolute_error(training_set): + best_model = fitted_model + case "median_absolute_deviation": + if fitted_model.median_absolute_deviation(training_set) < best_model.median_absolute_deviation(training_set): + best_model = fitted_model + case "coefficient_of_determination": + if fitted_model.coefficient_of_determination(training_set) > best_model.coefficient_of_determination(training_set): + best_model = fitted_model + return best_model def _check_metrics_preconditions(actual: Column, expected: Column) -> None: # pragma: no cover if not actual.type.is_numeric: @@ -262,6 +298,7 @@ def _check_metrics_preconditions(actual: Column, expected: Column) -> None: # p ) + def _extract_table(table_or_dataset: Table | TabularDataset) -> Table: """Extract the table from the given table or dataset.""" if isinstance(table_or_dataset, TabularDataset): diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py index db631807e..390025051 100644 --- a/src/safeds/ml/metrics/_regressor_metric.py +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -4,4 +4,6 @@ class RegressorMetric(Enum): MEAN_SQUARED_ERROR = "mean_squared_error" MEAN_ABSOLUTE_ERROR = "mean_absolute_error" + MEDIAN_ABSOLUTE_DEVIATION = "median_absolute_deviation" + COEFFICIENT_OF_DETERMINATION = "coefficient_of_determination" diff --git a/tests/safeds/ml/classical/classification/test_ada_boost.py b/tests/safeds/ml/classical/classification/test_ada_boost.py index 0df5cf73f..28e44851b 100644 --- a/tests/safeds/ml/classical/classification/test_ada_boost.py +++ b/tests/safeds/ml/classical/classification/test_ada_boost.py @@ -51,7 +51,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.learning_rate == 2 - @pytest.mark.parametrize("learning_rate", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float) -> None: + @pytest.mark.parametrize("learning_rate", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): AdaBoostClassifier(learning_rate=learning_rate) diff --git a/tests/safeds/ml/classical/regression/test_ada_boost.py b/tests/safeds/ml/classical/regression/test_ada_boost.py index 36cd9cb64..58eebdd97 100644 --- a/tests/safeds/ml/classical/regression/test_ada_boost.py +++ b/tests/safeds/ml/classical/regression/test_ada_boost.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import AdaBoostRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -34,8 +35,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("max_learner_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int) -> None: + @pytest.mark.parametrize("max_learner_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_learner_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): AdaBoostRegressor(max_learner_count=max_learner_count) @@ -50,7 +51,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.learning_rate == 2 - @pytest.mark.parametrize("learning_rate", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float) -> None: + @pytest.mark.parametrize("learning_rate", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): AdaBoostRegressor(learning_rate=learning_rate) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 829418b14..82141d597 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -13,7 +13,7 @@ MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, + PlainTableError, FittingWithoutChoiceError, FittingWithChoiceError, ) from safeds.ml.classical.regression import ( AdaBoostRegressor, @@ -29,6 +29,8 @@ SupportVectorRegressor, ) from safeds.ml.classical.regression._regressor import _check_metrics_preconditions +from safeds.ml.hyperparameters import Choice +from safeds.ml.metrics import RegressorMetric if TYPE_CHECKING: from _pytest.fixtures import FixtureRequest @@ -60,6 +62,26 @@ def regressors() -> list[Regressor]: SupportVectorRegressor(), ] +def regressors_with_choices() -> list[Regressor]: + """ + Return the list of regressors with Choices as Parameters to test choice functionality. + + After you implemented a new regressor, add it to this list to ensure its `fit_by_exhaustive_search` method works as + expected. Place tests of methods that are specific to your regressor in a separate test file. + + Returns + ------- + regressors : list[Regressor] + The list of regressors to test. + """ + return [ + AdaBoostRegressor(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), + GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + KNearestNeighborsRegressor(neighbor_count=Choice(2, 5)), + RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), + SupportVectorRegressor(c=Choice(0.5, 1.0)), + ] @pytest.fixture() def valid_data() -> TabularDataset: @@ -73,6 +95,27 @@ def valid_data() -> TabularDataset: ).to_tabular_dataset(target_name="target", extra_names=["id"]) +@pytest.mark.parametrize("regressor_with_choice", regressors_with_choices(), ids=lambda x: x.__class__.__name__) +class TestChoiceRegressors: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, + regressor_with_choice: Regressor, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithoutChoiceError): + regressor_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) + + + def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, valid_data: TabularDataset): + model = (regressor_with_choice.fit_by_exhaustive_search(valid_data, RegressorMetric.MEAN_SQUARED_ERROR)) + assert isinstance(model, type(regressor_with_choice)) + pred = model.predict(valid_data) + assert isinstance(pred, TabularDataset) + + def test_should_raise_if_model_is_fitted_with_choice(self, regressor_with_choice: Regressor, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithChoiceError): + regressor_with_choice.fit(valid_data) + + @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, regressor: Regressor, valid_data: TabularDataset) -> None: From 963c6c07e5f429da97a3af54c91dbc8f39d36f2f Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:17:03 +0200 Subject: [PATCH 13/94] add decision tree regression --- .../regression/_decision_tree_regressor.py | 29 +++++++++++++++++-- .../regression/test_decision_tree.py | 9 +++--- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 24ed8565c..06fa1cdb3 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -37,8 +40,8 @@ class DecisionTreeRegressor(Regressor, _DecisionTreeBase): def __init__( self, *, - max_depth: int | None = None, - min_sample_count_in_leaves: int = 5, + max_depth: int | Choice[int] | None = None, + min_sample_count_in_leaves: int | Choice[int] = 5, ) -> None: # Initialize superclasses Regressor.__init__(self) @@ -71,3 +74,23 @@ def _get_sklearn_model(self) -> RegressorMixin: max_depth=self._max_depth, min_samples_leaf=self._min_sample_count_in_leaves, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ + self._max_depth] + min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ + self._min_sample_count_in_leaves] + + models = [] + for md in max_depth_choices: + for msc in min_sample_count_choices: + models.append(DecisionTreeRegressor(max_depth=md, min_sample_count_in_leaves=msc)) + return models diff --git a/tests/safeds/ml/classical/regression/test_decision_tree.py b/tests/safeds/ml/classical/regression/test_decision_tree.py index 0cf2beb20..1276dc6c2 100644 --- a/tests/safeds/ml/classical/regression/test_decision_tree.py +++ b/tests/safeds/ml/classical/regression/test_decision_tree.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import DecisionTreeRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.max_depth == 2 - @pytest.mark.parametrize("max_depth", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int) -> None: + @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeRegressor(max_depth=max_depth) @@ -37,7 +38,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int) -> None: + @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeRegressor(min_sample_count_in_leaves=min_sample_count_in_leaves) From 92859f98e4fefd8bf724c2d4f5f77f13d1ad8b39 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:26:28 +0200 Subject: [PATCH 14/94] add gradient boosting regression --- .../_gradient_boosting_regressor.py | 29 +++++++++++++++++-- .../regression/test_gradient_boosting.py | 9 +++--- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index deef0971b..c6955900b 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -37,8 +40,8 @@ class GradientBoostingRegressor(Regressor, _GradientBoostingBase): def __init__( self, *, - tree_count: int = 100, - learning_rate: float = 0.1, + tree_count: int | Choice[int] = 100, + learning_rate: float | Choice[float] = 0.1, ) -> None: # Initialize superclasses Regressor.__init__(self) @@ -71,3 +74,23 @@ def _get_sklearn_model(self) -> RegressorMixin: n_estimators=self._tree_count, learning_rate=self._learning_rate, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ + self._tree_count] + learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ + self._learning_rate] + + models = [] + for tc in tree_count_choices: + for lr in learning_rate_choices: + models.append(GradientBoostingRegressor(tree_count=tc, learning_rate=lr)) + return models diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting.py b/tests/safeds/ml/classical/regression/test_gradient_boosting.py index f72a5a9fd..99c37277a 100644 --- a/tests/safeds/ml/classical/regression/test_gradient_boosting.py +++ b/tests/safeds/ml/classical/regression/test_gradient_boosting.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import GradientBoostingRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("tree_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_1(self, tree_count: int) -> None: + @pytest.mark.parametrize("tree_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_1(self, tree_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): GradientBoostingRegressor(tree_count=tree_count) @@ -37,7 +38,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.learning_rate == 2 - @pytest.mark.parametrize("learning_rate", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float) -> None: + @pytest.mark.parametrize("learning_rate", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, learning_rate: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): GradientBoostingRegressor(learning_rate=learning_rate) From b4211920fa4e812ba5de3dfea7b260fb6685f76d Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:30:58 +0200 Subject: [PATCH 15/94] add knearestneighbors regression --- .../_k_nearest_neighbors_regressor.py | 36 +++++++++++++------ .../regression/test_k_nearest_neighbors.py | 5 +-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index 2766fdcbb..1ee035c74 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -1,11 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _KNearestNeighborsBase from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -33,7 +35,7 @@ class KNearestNeighborsRegressor(Regressor, _KNearestNeighborsBase): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, neighbor_count: int) -> None: + def __init__(self, neighbor_count: int | Choice[int]) -> None: # Initialize superclasses Regressor.__init__(self) _KNearestNeighborsBase.__init__( @@ -51,15 +53,6 @@ def __hash__(self) -> int: # Template methods # ------------------------------------------------------------------------------------------------------------------ - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: - if self._neighbor_count > training_set.to_table().row_count: - raise ValueError( - ( - f"The parameter 'neighbor_count' ({self._neighbor_count}) has to be less than or equal to" - f" the sample size ({training_set.to_table().row_count})." - ), - ) - def _clone(self) -> KNearestNeighborsRegressor: return KNearestNeighborsRegressor( neighbor_count=self._neighbor_count, @@ -72,3 +65,24 @@ def _get_sklearn_model(self) -> RegressorMixin: n_neighbors=self._neighbor_count, n_jobs=-1, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._neighbor_count, Choice): + raise FittingWithChoiceError + if self._neighbor_count > training_set._table.row_count: + raise ValueError( + ( + f"The parameter 'neighbor_count' ({self._neighbor_count}) has to be less than or equal to" + f" the sample size ({training_set._table.row_count})." + ), + ) + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._neighbor_count, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + for nc in self._neighbor_count: + models.append(KNearestNeighborsRegressor(neighbor_count=nc)) + return models diff --git a/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py b/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py index 1bd09af73..1e1342d56 100644 --- a/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py +++ b/tests/safeds/ml/classical/regression/test_k_nearest_neighbors.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import KNearestNeighborsRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_neighbors == 2 - @pytest.mark.parametrize("neighbor_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, neighbor_count: int) -> None: + @pytest.mark.parametrize("neighbor_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, neighbor_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): KNearestNeighborsRegressor(neighbor_count=neighbor_count) From 3dc12c561c2a95a0d933d5463239f1ba34e2dc0c Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:35:04 +0200 Subject: [PATCH 16/94] add random forest regression --- .../regression/_random_forest_regressor.py | 35 ++++++++++++++++--- .../regression/test_random_forest.py | 13 +++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 711f12edb..ec689f4a5 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -41,9 +44,9 @@ class RandomForestRegressor(Regressor, _RandomForestBase): def __init__( self, *, - tree_count: int = 100, - max_depth: int | None = None, - min_sample_count_in_leaves: int = 5, + tree_count: int | Choice[int] = 100, + max_depth: int | Choice[int] | None = None, + min_sample_count_in_leaves: int | Choice[int] = 5, ) -> None: # Initialize superclasses Regressor.__init__(self) @@ -81,3 +84,27 @@ def _get_sklearn_model(self) -> RegressorMixin: random_state=_get_random_seed(), n_jobs=-1, ) + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( + self._min_sample_count_in_leaves, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance( + self._min_sample_count_in_leaves, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ + self._tree_count] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ + self._max_depth] + min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [self._min_sample_count_in_leaves] + + models = [] + for tc in tree_count_choices: + for md in max_depth_choices: + for msc in min_sample_count_choices: + models.append(RandomForestRegressor(tree_count=tc, max_depth=md, min_sample_count_in_leaves=msc)) + return models diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index b1b12fb26..681e400e4 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import RandomForestRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,8 +22,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.n_estimators == 2 - @pytest.mark.parametrize("tree_count", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, tree_count: int) -> None: + @pytest.mark.parametrize("tree_count", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, tree_count: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestRegressor(tree_count=tree_count) @@ -37,8 +38,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.max_depth == 2 - @pytest.mark.parametrize("max_depth", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int) -> None: + @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestRegressor(max_depth=max_depth) @@ -53,7 +54,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int) -> None: + @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestRegressor(min_sample_count_in_leaves=min_sample_count_in_leaves) From 20c8d10304a98d0192f57ff60dbfa745a55e7471 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:35:28 +0200 Subject: [PATCH 17/94] add random forest regression --- .../ml/classical/classification/_random_forest_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 4ea862066..db6a39da6 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash from safeds.data.labeled.containers import TabularDataset From facde0cc98d61f464eb080340eb90018ea3f30db Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 16:38:41 +0200 Subject: [PATCH 18/94] add svm regression --- .../regression/_support_vector_regressor.py | 21 +++++++++++++++++-- .../regression/test_support_vector_machine.py | 5 +++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 03c08d664..dd6b8bfa5 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -1,10 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _SupportVectorMachineBase from safeds.ml.classical.regression import Regressor +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -34,7 +37,7 @@ class SupportVectorRegressor(Regressor, _SupportVectorMachineBase): def __init__( self, *, - c: float = 1.0, + c: float | Choice[float] = 1.0, kernel: SupportVectorRegressor.Kernel | None = None, ) -> None: # Initialize superclasses @@ -78,3 +81,17 @@ def _get_sklearn_model(self) -> RegressorMixin: ) self._kernel._apply(result) return result + + def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + if isinstance(self._c, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + if not isinstance(self._c, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[Self]: + models = [] + for c in self._c: + models.append(SupportVectorRegressor(c=c)) + return models diff --git a/tests/safeds/ml/classical/regression/test_support_vector_machine.py b/tests/safeds/ml/classical/regression/test_support_vector_machine.py index 173e688b2..86d79fbd6 100644 --- a/tests/safeds/ml/classical/regression/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/regression/test_support_vector_machine.py @@ -6,6 +6,7 @@ from safeds.exceptions import OutOfBoundsError from safeds.ml.classical._bases._support_vector_machine_base import _Linear, _Polynomial from safeds.ml.classical.regression import SupportVectorRegressor +from safeds.ml.hyperparameters import Choice def kernels() -> list[SupportVectorRegressor.Kernel]: @@ -44,8 +45,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.C == 2 - @pytest.mark.parametrize("c", [-1.0, 0.0], ids=["minus_one", "zero"]) - def test_should_raise_if_less_than_or_equal_to_0(self, c: float) -> None: + @pytest.mark.parametrize("c", [-1.0, 0.0, Choice(-1.0)], ids=["minus_one", "zero", "invalid_choice"]) + def test_should_raise_if_less_than_or_equal_to_0(self, c: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): SupportVectorRegressor(c=c) From 24735a220ee0df9e6fa8ab1aca70ddea9fc35b5a Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 17:37:20 +0200 Subject: [PATCH 19/94] fix error --- tests/safeds/ml/classical/classification/test_classifier.py | 2 +- tests/safeds/ml/classical/regression/test_regressor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 69e30b4c8..47cb1c6f5 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -71,7 +71,7 @@ def classifiers_with_choices() -> list[Classifier]: AdaBoostClassifier(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), - KNearestNeighborsClassifier(neighbor_count=Choice(2, 5)), + KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorClassifier(c=Choice(0.5, 1.0)), ] diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 82141d597..1f51c7fef 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -78,7 +78,7 @@ def regressors_with_choices() -> list[Regressor]: AdaBoostRegressor(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), - KNearestNeighborsRegressor(neighbor_count=Choice(2, 5)), + KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorRegressor(c=Choice(0.5, 1.0)), ] From 0b2e1cceccd79f0d17effab98fa833080667fb41 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 17:56:53 +0200 Subject: [PATCH 20/94] fix error --- .../classical/classification/test_classifier.py | 13 ++++++++----- .../ml/classical/regression/test_regressor.py | 17 +++++++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 47cb1c6f5..49469f313 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -91,11 +91,6 @@ def valid_data() -> TabularDataset: @pytest.mark.parametrize("classifier_with_choice", classifiers_with_choices(), ids=lambda x: x.__class__.__name__) class TestChoiceClassifiers: - def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, - classifier_with_choice: Classifier, - valid_data: TabularDataset) -> None: - with pytest.raises(FittingWithoutChoiceError): - classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) def test_should_raise_if_no_positive_class_is_provided(self, classifier_with_choice: Classifier, valid_data: TabularDataset) -> None: @@ -114,6 +109,14 @@ def test_should_raise_if_model_is_fitted_with_choice(self, classifier_with_choic classifier_with_choice.fit(valid_data) +@pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) +class TestFitByExhaustiveSearch: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, + classifier: Classifier, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithoutChoiceError): + classifier.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) + @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, classifier: Classifier, valid_data: TabularDataset) -> None: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 1f51c7fef..30f6803ff 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -62,6 +62,7 @@ def regressors() -> list[Regressor]: SupportVectorRegressor(), ] + def regressors_with_choices() -> list[Regressor]: """ Return the list of regressors with Choices as Parameters to test choice functionality. @@ -83,6 +84,7 @@ def regressors_with_choices() -> list[Regressor]: SupportVectorRegressor(c=Choice(0.5, 1.0)), ] + @pytest.fixture() def valid_data() -> TabularDataset: return Table( @@ -97,12 +99,6 @@ def valid_data() -> TabularDataset: @pytest.mark.parametrize("regressor_with_choice", regressors_with_choices(), ids=lambda x: x.__class__.__name__) class TestChoiceRegressors: - def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, - regressor_with_choice: Regressor, - valid_data: TabularDataset) -> None: - with pytest.raises(FittingWithoutChoiceError): - regressor_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) - def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, valid_data: TabularDataset): model = (regressor_with_choice.fit_by_exhaustive_search(valid_data, RegressorMetric.MEAN_SQUARED_ERROR)) @@ -116,6 +112,15 @@ def test_should_raise_if_model_is_fitted_with_choice(self, regressor_with_choice regressor_with_choice.fit(valid_data) +@pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) +class TestFitByExhaustiveSearch: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, + regressor: Regressor, + valid_data: TabularDataset) -> None: + with pytest.raises(FittingWithoutChoiceError): + regressor.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) + + @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, regressor: Regressor, valid_data: TabularDataset) -> None: From cfa19e4356da2edbc19202236a7e4d01e75b45c5 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 18 Jun 2024 18:00:44 +0200 Subject: [PATCH 21/94] add warnings for models without choices --- .../ml/classical/classification/_logistic_classifier.py | 5 +++++ src/safeds/ml/classical/regression/_linear_regressor.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index e312e6b25..5d8cecff9 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -3,6 +3,8 @@ from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithoutChoiceError from ._classifier import Classifier @@ -39,3 +41,6 @@ def _get_sklearn_model(self) -> ClassifierMixin: random_state=_get_random_seed(), n_jobs=-1, ) + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 8a61d13fd..8f765eaa9 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -3,6 +3,8 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds.data.labeled.containers import TabularDataset +from safeds.exceptions import FittingWithoutChoiceError from ._regressor import Regressor @@ -36,3 +38,6 @@ def _get_sklearn_model(self) -> RegressorMixin: from sklearn.linear_model import LinearRegression as sk_LinearRegression return sk_LinearRegression(n_jobs=-1) + + def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + raise FittingWithoutChoiceError From 7708b4c7b2aa33e2b97d355a1c2956921e0ebf69 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 19 Jun 2024 17:52:14 +0200 Subject: [PATCH 22/94] add cross validation --- .../classical/classification/_classifier.py | 22 ++++++++++++++----- .../ml/classical/regression/_regressor.py | 22 ++++++++++++++----- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index a8480e2bb..6ef1f4d48 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -223,31 +223,41 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions(training_set) - #TODO Cross Validation + [train_split, test_split] = training_set.to_table().split_rows(0.75) #TODO Multiprocessing list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] for model in list_of_models: - list_of_fitted_models.append(model.fit(training_set)) + list_of_fitted_models.append(model.fit(train_split)) best_model = None + best_metric_value = None for fitted_model in list_of_fitted_models: if best_model is None: best_model = fitted_model + match optimization_metric.value: + case "accuracy": + best_metric_value = fitted_model.accuracy(test_split) + case "precision": + best_metric_value = fitted_model.precision(test_split, positive_class) + case "recall": + best_metric_value = fitted_model.recall(test_split, positive_class) + case "f1score": + best_metric_value = fitted_model.recall(test_split, positive_class) else: match optimization_metric.value: case "accuracy": - if fitted_model.accuracy(training_set) > best_model.accuracy(training_set): + if fitted_model.accuracy(test_split) > best_metric_value: best_model = fitted_model case "precision": - if fitted_model.precision(training_set, positive_class) > best_model.precision(training_set, positive_class): + if fitted_model.precision(test_split, positive_class) > best_metric_value: best_model = fitted_model case "recall": - if fitted_model.recall(training_set, positive_class) > best_model.recall(training_set, positive_class): + if fitted_model.recall(test_split, positive_class) > best_metric_value: best_model = fitted_model case "f1score": - if fitted_model.f1_score(training_set, positive_class) > best_model.f1_score(training_set, positive_class): + if fitted_model.f1_score(test_split, positive_class) > best_metric_value: best_model = fitted_model return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 646f0c925..84e2a0acc 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -253,31 +253,41 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions(training_set) - # TODO Cross Validation + [train_split, test_split] = training_set.to_table().split_rows(0.75) # TODO Multiprocessing list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] for model in list_of_models: - list_of_fitted_models.append(model.fit(training_set)) + list_of_fitted_models.append(model.fit(train_split)) best_model = None + best_metric_value = None for fitted_model in list_of_fitted_models: if best_model is None: best_model = fitted_model + match optimization_metric.value: + case "mean_squared_error": + best_metric_value = fitted_model.mean_squared_error(test_split) + case "mean_absolute_error": + best_metric_value = fitted_model.mean_absolute_error(test_split) + case "median_absolute_deviation": + best_metric_value = fitted_model.median_absolute_deviation(test_split) + case "coefficient_of_determination": + best_metric_value = fitted_model.coefficient_of_determination(test_split) else: match optimization_metric.value: case "mean_squared_error": - if fitted_model.mean_squared_error(training_set) < best_model.mean_squared_error(training_set): + if fitted_model.mean_squared_error(test_split) < best_metric_value: best_model = fitted_model case "mean_absolute_error": - if fitted_model.mean_absolute_error(training_set) < best_model.mean_absolute_error(training_set): + if fitted_model.mean_absolute_error(test_split) < best_metric_value: best_model = fitted_model case "median_absolute_deviation": - if fitted_model.median_absolute_deviation(training_set) < best_model.median_absolute_deviation(training_set): + if fitted_model.median_absolute_deviation(test_split) < best_metric_value: best_model = fitted_model case "coefficient_of_determination": - if fitted_model.coefficient_of_determination(training_set) > best_model.coefficient_of_determination(training_set): + if fitted_model.coefficient_of_determination(test_split) > best_metric_value: best_model = fitted_model return best_model From bfe0d1ea2cb6d3149bce2e19466bfb53a5f29e6b Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 19 Jun 2024 21:02:46 +0200 Subject: [PATCH 23/94] add multi processing --- .../ml/classical/classification/_classifier.py | 15 ++++++++++++--- .../ml/classical/regression/_regressor.py | 17 ++++++++++++++--- .../classical/classification/test_classifier.py | 8 ++++---- .../ml/classical/regression/test_regressor.py | 8 ++++---- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 6ef1f4d48..d9961b233 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC +from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset @@ -224,12 +225,20 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions(training_set) [train_split, test_split] = training_set.to_table().split_rows(0.75) + train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) + test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) - #TODO Multiprocessing list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - for model in list_of_models: - list_of_fitted_models.append(model.fit(train_split)) + + with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: + futures = [] + for model in list_of_models: + futures.append(executor.submit(model.fit, train_split)) + [done, _] = wait(futures, return_when=ALL_COMPLETED) + for future in done: + list_of_fitted_models.append(future.result()) + executor.shutdown() best_model = None best_metric_value = None diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 84e2a0acc..2bfd36dff 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC +from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset @@ -254,12 +255,22 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions(training_set) [train_split, test_split] = training_set.to_table().split_rows(0.75) + train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, + extra_names=training_set.extras.column_names) + test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, + extra_names=training_set.extras.column_names) - # TODO Multiprocessing list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - for model in list_of_models: - list_of_fitted_models.append(model.fit(train_split)) + + with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: + futures = [] + for model in list_of_models: + futures.append(executor.submit(model.fit, train_split)) + [done, _] = wait(futures, return_when=ALL_COMPLETED) + for future in done: + list_of_fitted_models.append(future.result()) + executor.shutdown() best_model = None best_metric_value = None diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 49469f313..c0c870d57 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -81,10 +81,10 @@ def classifiers_with_choices() -> list[Classifier]: def valid_data() -> TabularDataset: return Table( { - "id": [1, 4], - "feat1": [2, 5], - "feat2": [3, 6], - "target": [0, 1], + "id": [1, 4, 7, 10], + "feat1": [2, 5, 8, 11], + "feat2": [3, 6, 9, 12], + "target": [0, 1, 0, 1], }, ).to_tabular_dataset(target_name="target", extra_names=["id"]) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 30f6803ff..aacf8f1e9 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -89,10 +89,10 @@ def regressors_with_choices() -> list[Regressor]: def valid_data() -> TabularDataset: return Table( { - "id": [1, 4], - "feat1": [2, 5], - "feat2": [3, 6], - "target": [0, 1], + "id": [1, 4, 7, 10], + "feat1": [2, 5, 8, 11], + "feat2": [3, 6, 9, 12], + "target": [0, 1, 0, 1], }, ).to_tabular_dataset(target_name="target", extra_names=["id"]) From a63ad5e80c0c5c8e7d25a6350a1d2db998750de1 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 20 Jun 2024 17:08:36 +0200 Subject: [PATCH 24/94] linter fixes --- src/safeds/ml/classical/_supervised_model.py | 3 +-- .../classification/_ada_boost_classifier.py | 7 +++---- .../ml/classical/classification/_classifier.py | 12 +++++++----- .../classification/_decision_tree_classifier.py | 2 +- .../classification/_gradient_boosting_classifier.py | 6 +++--- .../_k_nearest_neighbors_classifier.py | 2 +- .../classification/_random_forest_classifier.py | 2 +- .../ml/classical/regression/_ada_boost_regressor.py | 2 +- .../classical/regression/_decision_tree_regressor.py | 4 ++-- .../regression/_gradient_boosting_regressor.py | 2 +- .../regression/_k_nearest_neighbors_regressor.py | 2 +- .../classical/regression/_random_forest_regressor.py | 2 +- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index f562d3378..1886358f6 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -247,8 +247,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: # noqa: B027 """ - Check additional preconditions for fitting the model by exhaustive search and raise an error if any are - violated. + Check additional preconditions for fitting the model by exhaustive search and raise an error if any are violated. Parameters ---------- diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index f048668ce..b93cf0606 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -1,15 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self, Any +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError, LearningError +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase from ._classifier import Classifier -from ...hyperparameters import Choice -from ...metrics import ClassifierMetric +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index d9961b233..c943c4141 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -4,6 +4,7 @@ from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED from typing import TYPE_CHECKING, Self +from safeds.data.tabular.containers import Table from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import ModelNotFittedError, PlainTableError, DatasetMissesDataError, LearningError from safeds.ml.classical import SupervisedModel @@ -12,8 +13,6 @@ if TYPE_CHECKING: from typing import Any - from safeds.data.tabular.containers import Table - class Classifier(SupervisedModel, ABC): """A model for classification tasks.""" @@ -220,13 +219,16 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me if training_set.to_table().row_count == 0: raise DatasetMissesDataError if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None: - raise LearningError(f"Please provide a positive class when using optimization metric '{optimization_metric.value}'") + raise LearningError( + f"Please provide a positive class when using optimization metric '{optimization_metric.value}'") self._check_additional_fit_by_exhaustive_search_preconditions(training_set) [train_split, test_split] = training_set.to_table().split_rows(0.75) - train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) - test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) + train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, + extra_names=training_set.extras.column_names) + test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, + extra_names=training_set.extras.column_names) list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index ca1dfc3d1..41dbf2a54 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -8,7 +8,7 @@ from safeds.ml.classical._bases import _DecisionTreeBase from ._classifier import Classifier -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 5b0b16b63..1c89ecbde 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -1,17 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase from ._classifier import Classifier -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from safeds.data.labeled.containers import TabularDataset class GradientBoostingClassifier(Classifier, _GradientBoostingBase): diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index a69959d8e..3e5cc52b9 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -7,7 +7,7 @@ from safeds.ml.classical._bases import _KNearestNeighborsBase from ._classifier import Classifier -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index db6a39da6..352e52fae 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -8,7 +8,7 @@ from safeds.ml.classical._bases import _RandomForestBase from ._classifier import Classifier -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 0d13308e8..5d6903fb3 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -8,7 +8,7 @@ from safeds.ml.classical._bases import _AdaBoostBase from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 06fa1cdb3..b2375f034 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -3,15 +3,15 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin + from safeds.data.labeled.containers import TabularDataset class DecisionTreeRegressor(Regressor, _DecisionTreeBase): diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index c6955900b..3e6780d50 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -8,7 +8,7 @@ from safeds.ml.classical._bases import _GradientBoostingBase from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index 1ee035c74..1c8bd8c48 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -7,7 +7,7 @@ from safeds.ml.classical._bases import _KNearestNeighborsBase from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index ec689f4a5..9cf89f2a1 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -8,7 +8,7 @@ from safeds.ml.classical._bases import _RandomForestBase from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin From ae1a492c604bbd8cbddcf12ac269b3a1f99860e2 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 22 Jun 2024 19:13:59 +0200 Subject: [PATCH 25/94] remove unneccesary parameter --- src/safeds/ml/classical/_supervised_model.py | 7 +------ .../ml/classical/classification/_ada_boost_classifier.py | 2 +- .../classification/_decision_tree_classifier.py | 2 +- .../classification/_gradient_boosting_classifier.py | 2 +- .../classification/_k_nearest_neighbors_classifier.py | 2 +- .../ml/classical/classification/_logistic_classifier.py | 2 +- .../classification/_random_forest_classifier.py | 9 +++++---- .../classification/_support_vector_classifier.py | 2 +- 8 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 1886358f6..121829d4c 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -245,14 +245,9 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N The training data containing the features and target. """ - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: # noqa: B027 + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: # noqa: B027 """ Check additional preconditions for fitting the model by exhaustive search and raise an error if any are violated. - - Parameters - ---------- - training_set: - The training data containing the features and target. """ def _check_additional_predict_preconditions(self, dataset: Table | TabularDataset) -> None: # noqa: B027 diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index b93cf0606..989b12536 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -97,7 +97,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 41dbf2a54..287db3325 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -79,7 +79,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 1c89ecbde..868025dd6 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -79,7 +79,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 3e5cc52b9..0f2d9d17f 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -80,7 +80,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N ), ) - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._neighbor_count, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 5d8cecff9..6de39e6fe 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -42,5 +42,5 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_jobs=-1, ) - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 352e52fae..9c503e558 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -90,9 +90,8 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: - if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance( - self._min_sample_count_in_leaves, Choice): + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: + if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[Self]: @@ -100,7 +99,9 @@ def _get_models_for_all_choices(self) -> list[Self]: self._tree_count] max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ self._max_depth] - min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [self._min_sample_count_in_leaves] + min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, + Choice) else [ + self._min_sample_count_in_leaves] models = [] for tc in tree_count_choices: diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 7753f104a..064f150c3 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -87,7 +87,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N if isinstance(self._c, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._c, Choice): raise FittingWithoutChoiceError From 0da15e6cadd4e2bb5a3317e3420e117ffa34a568 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 22 Jun 2024 19:40:09 +0200 Subject: [PATCH 26/94] remove unneccesary parameter --- src/safeds/ml/classical/_supervised_model.py | 4 ++-- .../ml/classical/classification/_ada_boost_classifier.py | 3 +-- .../ml/classical/classification/_decision_tree_classifier.py | 3 +-- .../classification/_gradient_boosting_classifier.py | 3 +-- .../ml/classical/classification/_random_forest_classifier.py | 3 +-- .../classical/classification/_support_vector_classifier.py | 3 +-- src/safeds/ml/classical/regression/_ada_boost_regressor.py | 5 ++--- .../ml/classical/regression/_decision_tree_regressor.py | 5 ++--- .../ml/classical/regression/_gradient_boosting_regressor.py | 5 ++--- .../classical/regression/_k_nearest_neighbors_regressor.py | 2 +- src/safeds/ml/classical/regression/_linear_regressor.py | 3 +-- .../ml/classical/regression/_random_forest_regressor.py | 5 ++--- .../ml/classical/regression/_support_vector_regressor.py | 5 ++--- 13 files changed, 19 insertions(+), 30 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 121829d4c..873978948 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -235,7 +235,7 @@ def get_target_type(self) -> DataType: # Template methods # ------------------------------------------------------------------------------------------------------------------ - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: # noqa: B027 + def _check_additional_fit_preconditions(self, **kwargs) -> None: # noqa: B027 """ Check additional preconditions for fitting the model and raise an error if any are violated. @@ -247,7 +247,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: # noqa: B027 """ - Check additional preconditions for fitting the model by exhaustive search and raise an error if any are violated. + Check additional preconditions for fitting by exhaustive search and raise an error if any are violated. """ def _check_additional_predict_preconditions(self, dataset: Table | TabularDataset) -> None: # noqa: B027 diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 989b12536..60527f979 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase @@ -93,7 +92,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 287db3325..c96b7a17d 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase @@ -75,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 868025dd6..889a7b134 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -11,7 +11,6 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin - from safeds.data.labeled.containers import TabularDataset class GradientBoostingClassifier(Classifier, _GradientBoostingBase): @@ -75,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 9c503e558..938f9cb65 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase @@ -85,7 +84,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 064f150c3..8d6745fad 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _SupportVectorMachineBase from safeds.ml.classical.classification import Classifier @@ -83,7 +82,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 5d6903fb3..504cf5cef 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase @@ -94,11 +93,11 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index b2375f034..3262eb18b 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -11,7 +11,6 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin - from safeds.data.labeled.containers import TabularDataset class DecisionTreeRegressor(Regressor, _DecisionTreeBase): @@ -75,11 +74,11 @@ def _get_sklearn_model(self) -> RegressorMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index 3e6780d50..51f76fb8e 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase @@ -75,11 +74,11 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index 1c8bd8c48..f2d2d8c73 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -77,7 +77,7 @@ def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> N ), ) - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._neighbor_count, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 8f765eaa9..bd2682e4e 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithoutChoiceError from ._regressor import Regressor @@ -39,5 +38,5 @@ def _get_sklearn_model(self) -> RegressorMixin: return sk_LinearRegression(n_jobs=-1) - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 9cf89f2a1..9338603a0 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _get_random_seed, _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase @@ -85,12 +84,12 @@ def _get_sklearn_model(self) -> RegressorMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index dd6b8bfa5..132714aab 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _SupportVectorMachineBase from safeds.ml.classical.regression import Regressor @@ -82,11 +81,11 @@ def _get_sklearn_model(self) -> RegressorMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError - def _check_additional_fit_by_exhaustive_search_preconditions(self, training_set: TabularDataset) -> None: + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._c, Choice): raise FittingWithoutChoiceError From e784d81ab1d824db2824c1691ee1535cb6392760 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 22 Jun 2024 19:46:57 +0200 Subject: [PATCH 27/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 4 +--- src/safeds/ml/classical/regression/_regressor.py | 10 +++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 873978948..73479c199 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -246,9 +246,7 @@ def _check_additional_fit_preconditions(self, **kwargs) -> None: # noqa: B027 """ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: # noqa: B027 - """ - Check additional preconditions for fitting by exhaustive search and raise an error if any are violated. - """ + """Check additional preconditions for fitting by exhaustive search and raise an error if any are violated.""" def _check_additional_predict_preconditions(self, dataset: Table | TabularDataset) -> None: # noqa: B027 """ diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 2bfd36dff..19e7c0a82 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -5,13 +5,13 @@ from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, LearningError, \ - DatasetMissesDataError +from safeds.data.tabular.containers import Table +from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, DatasetMissesDataError from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import RegressionMetrics, RegressorMetric if TYPE_CHECKING: - from safeds.data.tabular.containers import Column, Table + from safeds.data.tabular.containers import Column class Regressor(SupervisedModel, ABC): @@ -252,7 +252,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me if training_set.to_table().row_count == 0: raise DatasetMissesDataError - self._check_additional_fit_by_exhaustive_search_preconditions(training_set) + self._check_additional_fit_by_exhaustive_search_preconditions() [train_split, test_split] = training_set.to_table().split_rows(0.75) train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, @@ -302,6 +302,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me best_model = fitted_model return best_model + def _check_metrics_preconditions(actual: Column, expected: Column) -> None: # pragma: no cover if not actual.type.is_numeric: raise TypeError(f"Column 'actual' is not numerical but {actual.type}.") @@ -319,7 +320,6 @@ def _check_metrics_preconditions(actual: Column, expected: Column) -> None: # p ) - def _extract_table(table_or_dataset: Table | TabularDataset) -> Table: """Extract the table from the given table or dataset.""" if isinstance(table_or_dataset, TabularDataset): From ada2d3db2693b8d283eb7ff247f57a0682a5195c Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 22 Jun 2024 20:48:29 +0200 Subject: [PATCH 28/94] linter fix --- src/safeds/exceptions/_ml.py | 4 ++-- src/safeds/ml/classical/_supervised_model.py | 9 ++++--- .../classical/classification/_classifier.py | 24 +++++++++---------- .../ml/classical/regression/_regressor.py | 22 ++++++++--------- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index f000c7a8f..0295f0584 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -24,13 +24,13 @@ def __init__(self) -> None: class FittingWithChoiceError(Exception): """Raised when a model is fitted with a choice object as a parameter.""" - def __init__(self): + def __init__(self) -> None: super().__init__(f"Error occurred while fitting: Trying to fit with a Choice Parameter. Please use " f"fit_by_exhaustive_search() instead.") class FittingWithoutChoiceError(Exception): """Raised when a model is fitted by exhaustive search without a choice object as a parameter.""" - def __init__(self): + def __init__(self) -> None: super().__init__(f"Error occurred while fitting: Trying to fit by exhaustive search without a Choice " f"Parameter. Please use fit() instead.") diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 73479c199..c5bdc5c9f 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -17,11 +17,11 @@ PlainTableError, PredictionError, ) -from safeds.ml.metrics import ClassifierMetric, RegressorMetric if TYPE_CHECKING: from sklearn.base import ClassifierMixin, RegressorMixin - + from safeds.ml.classical.classification import KNearestNeighborsClassifier + from safeds.ml.classical.regression import KNearestNeighborsRegressor from safeds.data.tabular.typing import DataType, Schema @@ -89,7 +89,10 @@ def fit(self, training_set: TabularDataset) -> Self: if training_set.to_table().row_count == 0: raise DatasetMissesDataError - self._check_additional_fit_preconditions(training_set) + if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): + self._check_additional_fit_preconditions(training_set) + else: + self._check_additional_fit_preconditions() wrapped_model = self._get_sklearn_model() _fit_sklearn_model_in_place(wrapped_model, training_set) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index c943c4141..24198695e 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -222,12 +222,12 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me raise LearningError( f"Please provide a positive class when using optimization metric '{optimization_metric.value}'") - self._check_additional_fit_by_exhaustive_search_preconditions(training_set) + self._check_additional_fit_by_exhaustive_search_preconditions() [train_split, test_split] = training_set.to_table().split_rows(0.75) - train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, + train_data = train_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) - test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, + test_data = test_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) list_of_models = self._get_models_for_all_choices() @@ -236,7 +236,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: futures = [] for model in list_of_models: - futures.append(executor.submit(model.fit, train_split)) + futures.append(executor.submit(model.fit, train_data)) [done, _] = wait(futures, return_when=ALL_COMPLETED) for future in done: list_of_fitted_models.append(future.result()) @@ -249,26 +249,26 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me best_model = fitted_model match optimization_metric.value: case "accuracy": - best_metric_value = fitted_model.accuracy(test_split) + best_metric_value = fitted_model.accuracy(test_data) case "precision": - best_metric_value = fitted_model.precision(test_split, positive_class) + best_metric_value = fitted_model.precision(test_data, positive_class) case "recall": - best_metric_value = fitted_model.recall(test_split, positive_class) + best_metric_value = fitted_model.recall(test_data, positive_class) case "f1score": - best_metric_value = fitted_model.recall(test_split, positive_class) + best_metric_value = fitted_model.recall(test_data, positive_class) else: match optimization_metric.value: case "accuracy": - if fitted_model.accuracy(test_split) > best_metric_value: + if fitted_model.accuracy(test_data) > best_metric_value: best_model = fitted_model case "precision": - if fitted_model.precision(test_split, positive_class) > best_metric_value: + if fitted_model.precision(test_data, positive_class) > best_metric_value: best_model = fitted_model case "recall": - if fitted_model.recall(test_split, positive_class) > best_metric_value: + if fitted_model.recall(test_data, positive_class) > best_metric_value: best_model = fitted_model case "f1score": - if fitted_model.f1_score(test_split, positive_class) > best_metric_value: + if fitted_model.f1_score(test_data, positive_class) > best_metric_value: best_model = fitted_model return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 19e7c0a82..ca32a6d31 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -255,9 +255,9 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions() [train_split, test_split] = training_set.to_table().split_rows(0.75) - train_split = train_split.to_tabular_dataset(target_name=training_set.target.name, + train_data = train_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) - test_split = test_split.to_tabular_dataset(target_name=training_set.target.name, + test_data = test_split.to_tabular_dataset(target_name=training_set.target.name, extra_names=training_set.extras.column_names) list_of_models = self._get_models_for_all_choices() @@ -266,7 +266,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: futures = [] for model in list_of_models: - futures.append(executor.submit(model.fit, train_split)) + futures.append(executor.submit(model.fit, train_data)) [done, _] = wait(futures, return_when=ALL_COMPLETED) for future in done: list_of_fitted_models.append(future.result()) @@ -279,26 +279,26 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me best_model = fitted_model match optimization_metric.value: case "mean_squared_error": - best_metric_value = fitted_model.mean_squared_error(test_split) + best_metric_value = fitted_model.mean_squared_error(test_data) case "mean_absolute_error": - best_metric_value = fitted_model.mean_absolute_error(test_split) + best_metric_value = fitted_model.mean_absolute_error(test_data) case "median_absolute_deviation": - best_metric_value = fitted_model.median_absolute_deviation(test_split) + best_metric_value = fitted_model.median_absolute_deviation(test_data) case "coefficient_of_determination": - best_metric_value = fitted_model.coefficient_of_determination(test_split) + best_metric_value = fitted_model.coefficient_of_determination(test_data) else: match optimization_metric.value: case "mean_squared_error": - if fitted_model.mean_squared_error(test_split) < best_metric_value: + if fitted_model.mean_squared_error(test_data) < best_metric_value: best_model = fitted_model case "mean_absolute_error": - if fitted_model.mean_absolute_error(test_split) < best_metric_value: + if fitted_model.mean_absolute_error(test_data) < best_metric_value: best_model = fitted_model case "median_absolute_deviation": - if fitted_model.median_absolute_deviation(test_split) < best_metric_value: + if fitted_model.median_absolute_deviation(test_data) < best_metric_value: best_model = fitted_model case "coefficient_of_determination": - if fitted_model.coefficient_of_determination(test_split) > best_metric_value: + if fitted_model.coefficient_of_determination(test_data) > best_metric_value: best_model = fitted_model return best_model From ebd1369767474cbd2c0ce9da407f54d75eab6f2b Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 12:49:01 +0200 Subject: [PATCH 29/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 10 ++++++---- .../classical/classification/_ada_boost_classifier.py | 2 +- .../classification/_decision_tree_classifier.py | 2 +- .../classification/_gradient_boosting_classifier.py | 2 +- .../classification/_random_forest_classifier.py | 2 +- .../classification/_support_vector_classifier.py | 2 +- .../ml/classical/regression/_ada_boost_regressor.py | 2 +- .../classical/regression/_decision_tree_regressor.py | 2 +- .../regression/_gradient_boosting_regressor.py | 2 +- .../classical/regression/_random_forest_regressor.py | 2 +- .../classical/regression/_support_vector_regressor.py | 2 +- 11 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index c5bdc5c9f..0d9685980 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -89,10 +89,12 @@ def fit(self, training_set: TabularDataset) -> Self: if training_set.to_table().row_count == 0: raise DatasetMissesDataError - if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): - self._check_additional_fit_preconditions(training_set) - else: - self._check_additional_fit_preconditions() + #if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): + # self._check_additional_fit_preconditions(training_set) + #else: + # self._check_additional_fit_preconditions() + + self._check_additional_fit_preconditions(training_set=training_set) wrapped_model = self._get_sklearn_model() _fit_sklearn_model_in_place(wrapped_model, training_set) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 60527f979..b50710479 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -92,7 +92,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index c96b7a17d..0e0ac82d0 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 889a7b134..33109e70e 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 938f9cb65..46382abf0 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -84,7 +84,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 8d6745fad..ddc9bab78 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -82,7 +82,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 504cf5cef..2d0e82669 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -93,7 +93,7 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 3262eb18b..b061bda5d 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> RegressorMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index 51f76fb8e..5b11066a4 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 9338603a0..117eff7a2 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -84,7 +84,7 @@ def _get_sklearn_model(self) -> RegressorMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 132714aab..218521eec 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -81,7 +81,7 @@ def _get_sklearn_model(self) -> RegressorMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self) -> None: + def _check_additional_fit_preconditions(self, **kwargs) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError From fe0a906c3ed4badc5234b1f63ab8a79ee37a90bd Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 13:02:00 +0200 Subject: [PATCH 30/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 10 ++++------ .../classification/_decision_tree_classifier.py | 2 +- .../classification/_gradient_boosting_classifier.py | 2 +- .../classification/_random_forest_classifier.py | 2 +- .../classification/_support_vector_classifier.py | 2 +- .../ml/classical/regression/_ada_boost_regressor.py | 2 +- .../classical/regression/_decision_tree_regressor.py | 2 +- .../regression/_gradient_boosting_regressor.py | 2 +- .../classical/regression/_random_forest_regressor.py | 2 +- .../classical/regression/_support_vector_regressor.py | 2 +- 10 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 0d9685980..c5bdc5c9f 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -89,12 +89,10 @@ def fit(self, training_set: TabularDataset) -> Self: if training_set.to_table().row_count == 0: raise DatasetMissesDataError - #if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): - # self._check_additional_fit_preconditions(training_set) - #else: - # self._check_additional_fit_preconditions() - - self._check_additional_fit_preconditions(training_set=training_set) + if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): + self._check_additional_fit_preconditions(training_set) + else: + self._check_additional_fit_preconditions() wrapped_model = self._get_sklearn_model() _fit_sklearn_model_in_place(wrapped_model, training_set) diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 0e0ac82d0..c96b7a17d 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 33109e70e..889a7b134 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 46382abf0..938f9cb65 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -84,7 +84,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index ddc9bab78..8d6745fad 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -82,7 +82,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 2d0e82669..504cf5cef 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -93,7 +93,7 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index b061bda5d..3262eb18b 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> RegressorMixin: min_samples_leaf=self._min_sample_count_in_leaves, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_depth, Choice) or isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index 5b11066a4..51f76fb8e 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -74,7 +74,7 @@ def _get_sklearn_model(self) -> RegressorMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 117eff7a2..9338603a0 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -84,7 +84,7 @@ def _get_sklearn_model(self) -> RegressorMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( self._min_sample_count_in_leaves, Choice): raise FittingWithChoiceError diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 218521eec..132714aab 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -81,7 +81,7 @@ def _get_sklearn_model(self) -> RegressorMixin: self._kernel._apply(result) return result - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._c, Choice): raise FittingWithChoiceError From 226ef31974f29441062694839aae7ca4ed3cad41 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 14:16:23 +0200 Subject: [PATCH 31/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 24 ++++--------------- .../classification/_ada_boost_classifier.py | 2 +- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index c5bdc5c9f..3277c6e8e 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -7,6 +7,8 @@ from safeds._utils import _structural_hash from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Column, Table +from safeds.ml.classical.classification import KNearestNeighborsClassifier +from safeds.ml.classical.regression import KNearestNeighborsRegressor from safeds.exceptions import ( DatasetMissesDataError, DatasetMissesFeaturesError, @@ -20,8 +22,6 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin, RegressorMixin - from safeds.ml.classical.classification import KNearestNeighborsClassifier - from safeds.ml.classical.regression import KNearestNeighborsRegressor from safeds.data.tabular.typing import DataType, Schema @@ -238,15 +238,8 @@ def get_target_type(self) -> DataType: # Template methods # ------------------------------------------------------------------------------------------------------------------ - def _check_additional_fit_preconditions(self, **kwargs) -> None: # noqa: B027 - """ - Check additional preconditions for fitting the model and raise an error if any are violated. - - Parameters - ---------- - training_set: - The training data containing the features and target. - """ + def _check_additional_fit_preconditions(self) -> None: # noqa: B027 + """Check additional preconditions for fitting the model and raise an error if any are violated.""" def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: # noqa: B027 """Check additional preconditions for fitting by exhaustive search and raise an error if any are violated.""" @@ -262,14 +255,7 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase """ def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 - """ - Check additional preconditions for predicting with the model and raise an error if any are violated. - - Returns - ------- - model_list: - A list of every possible model, given all Choice Parameters - """ + """Get a list of all possible models, given the Parameter Choices""" @abstractmethod def _clone(self) -> Self: diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index b50710479..60527f979 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -92,7 +92,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: learning_rate=self._learning_rate, ) - def _check_additional_fit_preconditions(self, **kwargs) -> None: + def _check_additional_fit_preconditions(self) -> None: if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): raise FittingWithChoiceError From 798e93230fabc05d018b08b1dc368212696644c0 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 16:39:22 +0200 Subject: [PATCH 32/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 2 +- .../ml/classical/regression/_support_vector_regressor.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 3277c6e8e..21897ed87 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -255,7 +255,7 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase """ def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 - """Get a list of all possible models, given the Parameter Choices""" + """Get a list of all possible models, given the Parameter Choices.""" @abstractmethod def _clone(self) -> Self: diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 132714aab..520992cb3 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError @@ -89,7 +89,8 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._c, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[SupportVectorRegressor]: + assert isinstance(self._c, Choice) models = [] for c in self._c: models.append(SupportVectorRegressor(c=c)) From ca5617784a52802212351024ed68a3978b718d15 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 17:17:52 +0200 Subject: [PATCH 33/94] linter fix --- src/safeds/ml/classical/_bases/_gradient_boosting_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py index 7758606cd..36cd1a5a4 100644 --- a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py +++ b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py @@ -26,8 +26,8 @@ def __init__( _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) if isinstance(learning_rate, Choice): - for value in learning_rate: - _check_bounds("learning_rate", value, lower_bound=_OpenBound(0)) + for lr in learning_rate: + _check_bounds("learning_rate", lr, lower_bound=_OpenBound(0)) else: _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) From 603e356cd4c475a3cd8b3df05d6ad7b6de30f2fc Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 17:25:47 +0200 Subject: [PATCH 34/94] linter fix --- src/safeds/ml/classical/_bases/_ada_boost_base.py | 8 ++++---- .../ml/classical/_bases/_decision_tree_base.py | 8 ++++---- .../ml/classical/_bases/_gradient_boosting_base.py | 4 ++-- .../ml/classical/_bases/_k_nearest_neighbors_base.py | 4 ++-- .../ml/classical/_bases/_random_forest_base.py | 12 ++++++------ 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index 04f615b46..35b0a83ec 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -25,14 +25,14 @@ def __init__( ) -> None: # Validation if isinstance(max_learner_count, Choice): - for value in max_learner_count: - _check_bounds("max_learner_count", value, lower_bound=_ClosedBound(1)) + for mlc in max_learner_count: + _check_bounds("max_learner_count", mlc, lower_bound=_ClosedBound(1)) else: _check_bounds("max_learner_count", max_learner_count, lower_bound=_ClosedBound(1)) if isinstance(learning_rate, Choice): - for value in learning_rate: - _check_bounds("learning_rate", value, lower_bound=_OpenBound(0)) + for lr in learning_rate: + _check_bounds("learning_rate", lr, lower_bound=_OpenBound(0)) else: _check_bounds("learning_rate", learning_rate, lower_bound=_OpenBound(0)) diff --git a/src/safeds/ml/classical/_bases/_decision_tree_base.py b/src/safeds/ml/classical/_bases/_decision_tree_base.py index f200b5ca3..088264d3b 100644 --- a/src/safeds/ml/classical/_bases/_decision_tree_base.py +++ b/src/safeds/ml/classical/_bases/_decision_tree_base.py @@ -20,13 +20,13 @@ def __init__( ) -> None: # Validation if isinstance(max_depth, Choice): - for value in max_depth: - _check_bounds("max_depth", value, lower_bound=_ClosedBound(1)) + for md in max_depth: + _check_bounds("max_depth", md, lower_bound=_ClosedBound(1)) else: _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) if isinstance(min_sample_count_in_leaves, Choice): - for value in min_sample_count_in_leaves: - _check_bounds("min_sample_count_in_leaves", value, lower_bound=_ClosedBound(1)) + for msc in min_sample_count_in_leaves: + _check_bounds("min_sample_count_in_leaves", msc, lower_bound=_ClosedBound(1)) else: _check_bounds("min_sample_count_in_leaves", min_sample_count_in_leaves, lower_bound=_ClosedBound(1)) diff --git a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py index 36cd1a5a4..df1831b74 100644 --- a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py +++ b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py @@ -20,8 +20,8 @@ def __init__( ) -> None: # Validation if isinstance(tree_count, Choice): - for value in tree_count: - _check_bounds("tree_count", value, lower_bound=_ClosedBound(1)) + for tc in tree_count: + _check_bounds("tree_count", tc, lower_bound=_ClosedBound(1)) else: _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) diff --git a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py index 2d6527d38..3f52ebb28 100644 --- a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py +++ b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py @@ -19,8 +19,8 @@ def __init__( ) -> None: # Validation if isinstance(neighbor_count, Choice): - for value in neighbor_count: - _check_bounds("neighbor_count", value, lower_bound=_ClosedBound(1)) + for nc in neighbor_count: + _check_bounds("neighbor_count", nc, lower_bound=_ClosedBound(1)) else: _check_bounds("neighbor_count", neighbor_count, lower_bound=_ClosedBound(1)) diff --git a/src/safeds/ml/classical/_bases/_random_forest_base.py b/src/safeds/ml/classical/_bases/_random_forest_base.py index 32eb70641..d8a20d750 100644 --- a/src/safeds/ml/classical/_bases/_random_forest_base.py +++ b/src/safeds/ml/classical/_bases/_random_forest_base.py @@ -21,20 +21,20 @@ def __init__( ) -> None: # Validation if isinstance(tree_count, Choice): - for value in tree_count: - _check_bounds("tree_count", value, lower_bound=_ClosedBound(1)) + for tc in tree_count: + _check_bounds("tree_count", tc, lower_bound=_ClosedBound(1)) else: _check_bounds("tree_count", tree_count, lower_bound=_ClosedBound(1)) if isinstance(max_depth, Choice): - for value in max_depth: - _check_bounds("max_depth", value, lower_bound=_ClosedBound(1)) + for md in max_depth: + _check_bounds("max_depth", md, lower_bound=_ClosedBound(1)) else: _check_bounds("max_depth", max_depth, lower_bound=_ClosedBound(1)) if isinstance(min_sample_count_in_leaves, Choice): - for value in min_sample_count_in_leaves: - _check_bounds("min_sample_count_in_leaves", value, lower_bound=_ClosedBound(1)) + for msc in min_sample_count_in_leaves: + _check_bounds("min_sample_count_in_leaves", msc, lower_bound=_ClosedBound(1)) else: _check_bounds("min_sample_count_in_leaves", min_sample_count_in_leaves, lower_bound=_ClosedBound(1)) From 45e5d47ebbd5c49a1fcc51d50eb37e5352398a53 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 17:28:12 +0200 Subject: [PATCH 35/94] linter fix --- src/safeds/ml/classical/classification/_ada_boost_classifier.py | 2 +- .../ml/classical/classification/_decision_tree_classifier.py | 2 +- .../classical/classification/_gradient_boosting_classifier.py | 2 +- .../classical/classification/_k_nearest_neighbors_classifier.py | 2 +- .../ml/classical/classification/_random_forest_classifier.py | 2 +- .../ml/classical/classification/_support_vector_classifier.py | 2 +- src/safeds/ml/classical/regression/_ada_boost_regressor.py | 2 +- src/safeds/ml/classical/regression/_decision_tree_regressor.py | 2 +- .../ml/classical/regression/_gradient_boosting_regressor.py | 2 +- .../ml/classical/regression/_k_nearest_neighbors_regressor.py | 2 +- src/safeds/ml/classical/regression/_random_forest_regressor.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 60527f979..038de9723 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -100,7 +100,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]: max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ self._max_learner_count] learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index c96b7a17d..5852b9acd 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -82,7 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[DecisionTreeClassifier]: max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ self._max_depth] min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 889a7b134..1c8182d8a 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -82,7 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[GradientBoostingClassifier]: tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ self._tree_count] learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 0f2d9d17f..912bc0cfd 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -84,7 +84,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._neighbor_count, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[KNearestNeighborsClassifier]: models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsClassifier(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index 938f9cb65..ce68656a0 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -93,7 +93,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[RandomForestClassifier]: tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ self._tree_count] max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 8d6745fad..7f292b95c 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -90,7 +90,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._c, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[SupportVectorClassifier]: models = [] for c in self._c: models.append(SupportVectorClassifier(c=c)) diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 504cf5cef..9630ce822 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -101,7 +101,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[AdaBoostRegressor]: max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ self._max_learner_count] learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 3262eb18b..c6fdfef93 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -82,7 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[DecisionTreeRegressor]: max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ self._max_depth] min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index 51f76fb8e..077be4691 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -82,7 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._tree_count, Choice) and not isinstance(self._learning_rate, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[GradientBoostingRegressor]: tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ self._tree_count] learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index f2d2d8c73..295dc7dec 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -81,7 +81,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: if not isinstance(self._neighbor_count, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[KNearestNeighborsRegressor]: models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsRegressor(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 9338603a0..07b573f4b 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -94,7 +94,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: self._min_sample_count_in_leaves, Choice): raise FittingWithoutChoiceError - def _get_models_for_all_choices(self) -> list[Self]: + def _get_models_for_all_choices(self) -> list[RandomForestRegressor]: tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ self._tree_count] max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ From 107a2224c20310548142a06d83de9ec13c0ff0cd Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 23 Jun 2024 17:51:49 +0200 Subject: [PATCH 36/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 7 +------ .../ml/classical/classification/_classifier.py | 2 ++ .../_k_nearest_neighbors_classifier.py | 5 +++-- src/safeds/ml/classical/regression/_regressor.py | 5 ++++- .../ml/classical/classification/test_classifier.py | 13 +++++++++++-- .../ml/classical/regression/test_regressor.py | 8 ++++++-- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 21897ed87..b97cc1a3c 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -7,8 +7,6 @@ from safeds._utils import _structural_hash from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Column, Table -from safeds.ml.classical.classification import KNearestNeighborsClassifier -from safeds.ml.classical.regression import KNearestNeighborsRegressor from safeds.exceptions import ( DatasetMissesDataError, DatasetMissesFeaturesError, @@ -89,10 +87,7 @@ def fit(self, training_set: TabularDataset) -> Self: if training_set.to_table().row_count == 0: raise DatasetMissesDataError - if isinstance(self, KNearestNeighborsClassifier) or isinstance(self, KNearestNeighborsRegressor): - self._check_additional_fit_preconditions(training_set) - else: - self._check_additional_fit_preconditions() + self._check_additional_fit_preconditions() wrapped_model = self._get_sklearn_model() _fit_sklearn_model_in_place(wrapped_model, training_set) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 24198695e..bbb015a67 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -231,6 +231,8 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me extra_names=training_set.extras.column_names) list_of_models = self._get_models_for_all_choices() + if len(list_of_models) < 1: + raise LearningError("Please provide at least one Value in a Choice Parameter") list_of_fitted_models = [] with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 912bc0cfd..9eeca3565 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -1,12 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _KNearestNeighborsBase +from safeds.ml.classical.classification import Classifier -from ._classifier import Classifier from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: @@ -52,6 +52,7 @@ def __hash__(self) -> int: _KNearestNeighborsBase.__hash__(self), ) + # ------------------------------------------------------------------------------------------------------------------ # Template methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index ca32a6d31..4af4eaff1 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -6,7 +6,8 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, DatasetMissesDataError +from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, DatasetMissesDataError, \ + LearningError from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import RegressionMetrics, RegressorMetric @@ -261,6 +262,8 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me extra_names=training_set.extras.column_names) list_of_models = self._get_models_for_all_choices() + if len(list_of_models) < 1: + raise LearningError("Please provide at least one Value in a Choice Parameter") list_of_fitted_models = [] with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index c0c870d57..b9e962fc6 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -72,7 +72,8 @@ def classifiers_with_choices() -> list[Classifier]: DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), - RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), + RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), + min_sample_count_in_leaves=Choice(1, 2)), SupportVectorClassifier(c=Choice(0.5, 1.0)), ] @@ -109,14 +110,22 @@ def test_should_raise_if_model_is_fitted_with_choice(self, classifier_with_choic classifier_with_choice.fit(valid_data) -@pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFitByExhaustiveSearch: + + @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, classifier: Classifier, valid_data: TabularDataset) -> None: with pytest.raises(FittingWithoutChoiceError): classifier.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) + def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice(self, + valid_data: TabularDataset) -> None: + with pytest.raises(LearningError): + AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search(valid_data, + optimization_metric=ClassifierMetric.ACCURACY) + + @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, classifier: Classifier, valid_data: TabularDataset) -> None: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index aacf8f1e9..faec4cf3c 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -13,7 +13,7 @@ MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, FittingWithoutChoiceError, FittingWithChoiceError, + PlainTableError, FittingWithoutChoiceError, FittingWithChoiceError, LearningError, ) from safeds.ml.classical.regression import ( AdaBoostRegressor, @@ -112,14 +112,18 @@ def test_should_raise_if_model_is_fitted_with_choice(self, regressor_with_choice regressor_with_choice.fit(valid_data) -@pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) class TestFitByExhaustiveSearch: + @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, regressor: Regressor, valid_data: TabularDataset) -> None: with pytest.raises(FittingWithoutChoiceError): regressor.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) + def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice(self, valid_data: TabularDataset) -> None: + with pytest.raises(LearningError): + AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) + @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) class TestFit: From ea0214f8939620912d6886bff87db9b6c27a656e Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 24 Jun 2024 00:08:32 +0200 Subject: [PATCH 37/94] linter fix --- .../classical/classification/_k_nearest_neighbors_classifier.py | 1 + .../ml/classical/classification/_support_vector_classifier.py | 1 + .../ml/classical/regression/_k_nearest_neighbors_regressor.py | 1 + src/safeds/ml/classical/regression/_support_vector_regressor.py | 2 +- tests/safeds/ml/classical/classification/test_classifier.py | 2 +- tests/safeds/ml/classical/regression/test_regressor.py | 2 +- 6 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 9eeca3565..5dbd5d538 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -86,6 +86,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[KNearestNeighborsClassifier]: + assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsClassifier(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 7f292b95c..ca00162f8 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -91,6 +91,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorClassifier]: + assert isinstance(self._c, Choice) # this is always true and just here for linting models = [] for c in self._c: models.append(SupportVectorClassifier(c=c)) diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index 295dc7dec..0d65d3775 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -82,6 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[KNearestNeighborsRegressor]: + assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsRegressor(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 520992cb3..9f8e0ce75 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -90,7 +90,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorRegressor]: - assert isinstance(self._c, Choice) + assert isinstance(self._c, Choice) # this is always true and just here for linting models = [] for c in self._c: models.append(SupportVectorRegressor(c=c)) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index b9e962fc6..8c69bdf05 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -98,7 +98,7 @@ def test_should_raise_if_no_positive_class_is_provided(self, classifier_with_cho with pytest.raises(LearningError): classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.PRECISION) - def test_workflow_with_choice_parameter(self, classifier_with_choice: Classifier, valid_data: TabularDataset): + def test_workflow_with_choice_parameter(self, classifier_with_choice: Classifier, valid_data: TabularDataset) -> None: model = classifier_with_choice.fit_by_exhaustive_search(valid_data, ClassifierMetric.ACCURACY) assert isinstance(model, type(classifier_with_choice)) pred = model.predict(valid_data) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index faec4cf3c..d6136b2c4 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -100,7 +100,7 @@ def valid_data() -> TabularDataset: @pytest.mark.parametrize("regressor_with_choice", regressors_with_choices(), ids=lambda x: x.__class__.__name__) class TestChoiceRegressors: - def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, valid_data: TabularDataset): + def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, valid_data: TabularDataset) -> None: model = (regressor_with_choice.fit_by_exhaustive_search(valid_data, RegressorMetric.MEAN_SQUARED_ERROR)) assert isinstance(model, type(regressor_with_choice)) pred = model.predict(valid_data) From 674d0836bebbee8ece624ad847988930e54b3839 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 24 Jun 2024 01:25:10 +0200 Subject: [PATCH 38/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 4 ++++ .../classification/_k_nearest_neighbors_classifier.py | 5 ++--- .../classical/regression/_k_nearest_neighbors_regressor.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index b97cc1a3c..de68d3633 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -88,6 +88,7 @@ def fit(self, training_set: TabularDataset) -> Self: raise DatasetMissesDataError self._check_additional_fit_preconditions() + self._check_more_additional_fit_preconditions(training_set) wrapped_model = self._get_sklearn_model() _fit_sklearn_model_in_place(wrapped_model, training_set) @@ -236,6 +237,9 @@ def get_target_type(self) -> DataType: def _check_additional_fit_preconditions(self) -> None: # noqa: B027 """Check additional preconditions for fitting the model and raise an error if any are violated.""" + def _check_more_additional_fit_preconditions(self, training_set: TabularDataset) -> None: # noqa: B027 + """Check additional preconditions for fitting the model and raise an error if any are violated.""" + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: # noqa: B027 """Check additional preconditions for fitting by exhaustive search and raise an error if any are violated.""" diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index 5dbd5d538..c9268d24e 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -52,7 +52,6 @@ def __hash__(self) -> int: _KNearestNeighborsBase.__hash__(self), ) - # ------------------------------------------------------------------------------------------------------------------ # Template methods # ------------------------------------------------------------------------------------------------------------------ @@ -70,7 +69,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_more_additional_fit_preconditions(self, training_set: TabularDataset) -> None: if isinstance(self._neighbor_count, Choice): raise FittingWithChoiceError if self._neighbor_count > training_set._table.row_count: @@ -86,7 +85,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[KNearestNeighborsClassifier]: - assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting + assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsClassifier(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index 0d65d3775..e008eaf4f 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -66,7 +66,7 @@ def _get_sklearn_model(self) -> RegressorMixin: n_jobs=-1, ) - def _check_additional_fit_preconditions(self, training_set: TabularDataset) -> None: + def _check_more_additional_fit_preconditions(self, training_set: TabularDataset) -> None: if isinstance(self._neighbor_count, Choice): raise FittingWithChoiceError if self._neighbor_count > training_set._table.row_count: From 7264c09dc95b9c86327fb7084bc42eea444b08e8 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 24 Jun 2024 01:30:02 +0200 Subject: [PATCH 39/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 1 + src/safeds/ml/classical/classification/_classifier.py | 1 + src/safeds/ml/classical/regression/_regressor.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index de68d3633..f90ba6e0f 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -253,6 +253,7 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase The dataset containing at least the features. """ + @abstractmethod def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 """Get a list of all possible models, given the Parameter Choices.""" diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index bbb015a67..d5721c66c 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -272,6 +272,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me case "f1score": if fitted_model.f1_score(test_data, positive_class) > best_metric_value: best_model = fitted_model + assert best_model is not None return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 4af4eaff1..444787394 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -303,6 +303,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me case "coefficient_of_determination": if fitted_model.coefficient_of_determination(test_data) > best_metric_value: best_model = fitted_model + assert best_model is not None return best_model From 9670810f09d3747df0be696ceb9b0c20f35bd50e Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 24 Jun 2024 14:16:23 +0200 Subject: [PATCH 40/94] linter fix --- src/safeds/ml/classical/_supervised_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index f90ba6e0f..3696eebe7 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -253,9 +253,9 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase The dataset containing at least the features. """ - @abstractmethod def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 """Get a list of all possible models, given the Parameter Choices.""" + raise NotImplementedError # pragma: no cover @abstractmethod def _clone(self) -> Self: From 761c1c5d778d67c60becd8fe6d68a2cbb5eaaf42 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:18:05 +0000 Subject: [PATCH 41/94] style: apply automated linter fixes --- src/safeds/exceptions/_ml.py | 17 +++++--- .../ml/classical/_bases/_ada_boost_base.py | 3 +- src/safeds/ml/classical/_supervised_model.py | 3 +- .../classification/_ada_boost_classifier.py | 15 ++++--- .../classical/classification/_classifier.py | 24 ++++++----- .../_decision_tree_classifier.py | 14 ++++--- .../_gradient_boosting_classifier.py | 12 +++--- .../_k_nearest_neighbors_classifier.py | 1 - .../classification/_logistic_classifier.py | 1 - .../_random_forest_classifier.py | 31 ++++++++------ .../_support_vector_classifier.py | 4 +- .../regression/_ada_boost_regressor.py | 16 ++++---- .../regression/_decision_tree_regressor.py | 14 ++++--- .../_gradient_boosting_regressor.py | 12 +++--- .../_k_nearest_neighbors_regressor.py | 6 +-- .../regression/_random_forest_regressor.py | 30 +++++++++----- .../ml/classical/regression/_regressor.py | 21 ++++++---- .../regression/_support_vector_regressor.py | 2 +- src/safeds/ml/metrics/_regressor_metric.py | 1 - .../classification/test_classifier.py | 40 ++++++++++++------- .../classification/test_decision_tree.py | 6 +-- .../classification/test_random_forest.py | 4 +- .../regression/test_decision_tree.py | 4 +- .../regression/test_random_forest.py | 4 +- .../ml/classical/regression/test_regressor.py | 26 +++++++----- 25 files changed, 187 insertions(+), 124 deletions(-) diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 0295f0584..d53fbbd7e 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -24,15 +24,23 @@ def __init__(self) -> None: class FittingWithChoiceError(Exception): """Raised when a model is fitted with a choice object as a parameter.""" + def __init__(self) -> None: - super().__init__(f"Error occurred while fitting: Trying to fit with a Choice Parameter. Please use " - f"fit_by_exhaustive_search() instead.") + super().__init__( + "Error occurred while fitting: Trying to fit with a Choice Parameter. Please use " + "fit_by_exhaustive_search() instead.", + ) + class FittingWithoutChoiceError(Exception): """Raised when a model is fitted by exhaustive search without a choice object as a parameter.""" + def __init__(self) -> None: - super().__init__(f"Error occurred while fitting: Trying to fit by exhaustive search without a Choice " - f"Parameter. Please use fit() instead.") + super().__init__( + "Error occurred while fitting: Trying to fit by exhaustive search without a Choice " + "Parameter. Please use fit() instead.", + ) + class InvalidFitDataError(Exception): """Raised when a Neural Network is fitted on invalid data.""" @@ -41,7 +49,6 @@ def __init__(self, reason: str) -> None: super().__init__(f"The given Fit Data is invalid:\n{reason}") - class LearningError(Exception): """ Raised when an error occurred while training a model. diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index 35b0a83ec..5eff400af 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -1,8 +1,7 @@ from __future__ import annotations -import types from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Collection +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _OpenBound diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 3696eebe7..9062bdf70 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -20,6 +20,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin, RegressorMixin + from safeds.data.tabular.typing import DataType, Schema @@ -253,7 +254,7 @@ def _check_additional_predict_preconditions(self, dataset: Table | TabularDatase The dataset containing at least the features. """ - def _get_models_for_all_choices(self) -> list[Self]: # noqa: B027 + def _get_models_for_all_choices(self) -> list[Self]: """Get a list of all possible models, given the Parameter Choices.""" raise NotImplementedError # pragma: no cover diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 038de9723..080be2763 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase +from safeds.ml.hyperparameters import Choice from ._classifier import Classifier -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -85,6 +85,7 @@ def _clone(self) -> AdaBoostClassifier: def _get_sklearn_model(self) -> ClassifierMixin: from sklearn.ensemble import AdaBoostClassifier as SklearnAdaBoostClassifier + learner = self.learner._get_sklearn_model() if self.learner is not None else None return SklearnAdaBoostClassifier( estimator=learner, @@ -101,10 +102,12 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]: - max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ - self._max_learner_count] - learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ - self._learning_rate] + max_learner_count_choices = ( + self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] + ) + learning_rate_choices = ( + self._learning_rate if isinstance(self._learning_rate, Choice) else [self._learning_rate] + ) models = [] for mlc in max_learner_count_choices: diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index d5721c66c..373a3633f 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -1,12 +1,12 @@ from __future__ import annotations from abc import ABC -from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED +from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, wait from typing import TYPE_CHECKING, Self -from safeds.data.tabular.containers import Table from safeds.data.labeled.containers import TabularDataset -from safeds.exceptions import ModelNotFittedError, PlainTableError, DatasetMissesDataError, LearningError +from safeds.data.tabular.containers import Table +from safeds.exceptions import DatasetMissesDataError, LearningError, ModelNotFittedError, PlainTableError from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import ClassificationMetrics, ClassifierMetric @@ -212,23 +212,27 @@ def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: positive_class, ) - def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_metric: ClassifierMetric, - positive_class: Any = None) -> Self: + def fit_by_exhaustive_search( + self, training_set: TabularDataset, optimization_metric: ClassifierMetric, positive_class: Any = None, + ) -> Self: if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): raise PlainTableError if training_set.to_table().row_count == 0: raise DatasetMissesDataError if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None: raise LearningError( - f"Please provide a positive class when using optimization metric '{optimization_metric.value}'") + f"Please provide a positive class when using optimization metric '{optimization_metric.value}'", + ) self._check_additional_fit_by_exhaustive_search_preconditions() [train_split, test_split] = training_set.to_table().split_rows(0.75) - train_data = train_split.to_tabular_dataset(target_name=training_set.target.name, - extra_names=training_set.extras.column_names) - test_data = test_split.to_tabular_dataset(target_name=training_set.target.name, - extra_names=training_set.extras.column_names) + train_data = train_split.to_tabular_dataset( + target_name=training_set.target.name, extra_names=training_set.extras.column_names, + ) + test_data = test_split.to_tabular_dataset( + target_name=training_set.target.name, extra_names=training_set.extras.column_names, + ) list_of_models = self._get_models_for_all_choices() if len(list_of_models) < 1: diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 5852b9acd..37da7bc13 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase +from safeds.ml.hyperparameters import Choice from ._classifier import Classifier -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -83,10 +83,12 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[DecisionTreeClassifier]: - max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ - self._max_depth] - min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ - self._min_sample_count_in_leaves] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [self._max_depth] + min_sample_count_choices = ( + self._min_sample_count_in_leaves + if isinstance(self._min_sample_count_in_leaves, Choice) + else [self._min_sample_count_in_leaves] + ) models = [] for md in max_depth_choices: diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 1c8182d8a..61d733437 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase +from safeds.ml.hyperparameters import Choice from ._classifier import Classifier -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -83,10 +83,10 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[GradientBoostingClassifier]: - tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ - self._tree_count] - learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ - self._learning_rate] + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [self._tree_count] + learning_rate_choices = ( + self._learning_rate if isinstance(self._learning_rate, Choice) else [self._learning_rate] + ) models = [] for tc in tree_count_choices: diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py index c9268d24e..3181802ae 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py @@ -6,7 +6,6 @@ from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _KNearestNeighborsBase from safeds.ml.classical.classification import Classifier - from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 6de39e6fe..3e632d2ae 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash -from safeds.data.labeled.containers import TabularDataset from safeds.exceptions import FittingWithoutChoiceError from ._classifier import Classifier diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index ce68656a0..e917c7b53 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase +from safeds.ml.hyperparameters import Choice from ._classifier import Classifier -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import ClassifierMixin @@ -85,22 +85,29 @@ def _get_sklearn_model(self) -> ClassifierMixin: ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( - self._min_sample_count_in_leaves, Choice): + if ( + isinstance(self._tree_count, Choice) + or isinstance(self._max_depth, Choice) + or isinstance(self._min_sample_count_in_leaves, Choice) + ): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance(self._min_sample_count_in_leaves, Choice): + if ( + not isinstance(self._tree_count, Choice) + and not isinstance(self._max_depth, Choice) + and not isinstance(self._min_sample_count_in_leaves, Choice) + ): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[RandomForestClassifier]: - tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ - self._tree_count] - max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ - self._max_depth] - min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, - Choice) else [ - self._min_sample_count_in_leaves] + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [self._tree_count] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [self._max_depth] + min_sample_count_choices = ( + self._min_sample_count_in_leaves + if isinstance(self._min_sample_count_in_leaves, Choice) + else [self._min_sample_count_in_leaves] + ) models = [] for tc in tree_count_choices: diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index ca00162f8..37e2897b9 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError @@ -91,7 +91,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorClassifier]: - assert isinstance(self._c, Choice) # this is always true and just here for linting + assert isinstance(self._c, Choice) # this is always true and just here for linting models = [] for c in self._c: models.append(SupportVectorClassifier(c=c)) diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 9630ce822..292bf4f57 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _AdaBoostBase +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -42,7 +42,7 @@ def __init__( self, *, learner: Regressor | None = None, - max_learner_count: int | Choice[int] = 50 , + max_learner_count: int | Choice[int] = 50, learning_rate: float | Choice[float] = 1.0, ) -> None: # Initialize superclasses @@ -102,10 +102,12 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostRegressor]: - max_learner_count_choices = self._max_learner_count if isinstance(self._max_learner_count, Choice) else [ - self._max_learner_count] - learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ - self._learning_rate] + max_learner_count_choices = ( + self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] + ) + learning_rate_choices = ( + self._learning_rate if isinstance(self._learning_rate, Choice) else [self._learning_rate] + ) models = [] for mlc in max_learner_count_choices: diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index c6fdfef93..c81614d1b 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _DecisionTreeBase +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -83,10 +83,12 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[DecisionTreeRegressor]: - max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ - self._max_depth] - min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [ - self._min_sample_count_in_leaves] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [self._max_depth] + min_sample_count_choices = ( + self._min_sample_count_in_leaves + if isinstance(self._min_sample_count_in_leaves, Choice) + else [self._min_sample_count_in_leaves] + ) models = [] for md in max_depth_choices: diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py index 077be4691..50ee2214f 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regressor.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _GradientBoostingBase +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -83,10 +83,10 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[GradientBoostingRegressor]: - tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ - self._tree_count] - learning_rate_choices = self._learning_rate if isinstance(self._learning_rate, Choice) else [ - self._learning_rate] + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [self._tree_count] + learning_rate_choices = ( + self._learning_rate if isinstance(self._learning_rate, Choice) else [self._learning_rate] + ) models = [] for tc in tree_count_choices: diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py index e008eaf4f..d999996b6 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors_regressor.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _KNearestNeighborsBase +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -82,7 +82,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[KNearestNeighborsRegressor]: - assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting + assert isinstance(self._neighbor_count, Choice) # this is always true and just here for linting models = [] for nc in self._neighbor_count: models.append(KNearestNeighborsRegressor(neighbor_count=nc)) diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 07b573f4b..6f2b869da 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.classical._bases import _RandomForestBase +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -85,21 +85,29 @@ def _get_sklearn_model(self) -> RegressorMixin: ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._tree_count, Choice) or isinstance(self._max_depth, Choice) or isinstance( - self._min_sample_count_in_leaves, Choice): + if ( + isinstance(self._tree_count, Choice) + or isinstance(self._max_depth, Choice) + or isinstance(self._min_sample_count_in_leaves, Choice) + ): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._tree_count, Choice) and not isinstance(self._max_depth, Choice) and not isinstance( - self._min_sample_count_in_leaves, Choice): + if ( + not isinstance(self._tree_count, Choice) + and not isinstance(self._max_depth, Choice) + and not isinstance(self._min_sample_count_in_leaves, Choice) + ): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[RandomForestRegressor]: - tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [ - self._tree_count] - max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [ - self._max_depth] - min_sample_count_choices = self._min_sample_count_in_leaves if isinstance(self._min_sample_count_in_leaves, Choice) else [self._min_sample_count_in_leaves] + tree_count_choices = self._tree_count if isinstance(self._tree_count, Choice) else [self._tree_count] + max_depth_choices = self._max_depth if isinstance(self._max_depth, Choice) else [self._max_depth] + min_sample_count_choices = ( + self._min_sample_count_in_leaves + if isinstance(self._min_sample_count_in_leaves, Choice) + else [self._min_sample_count_in_leaves] + ) models = [] for tc in tree_count_choices: diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 444787394..2570cd8f4 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -1,13 +1,18 @@ from __future__ import annotations from abc import ABC -from concurrent.futures import ProcessPoolExecutor, wait, ALL_COMPLETED +from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, wait from typing import TYPE_CHECKING, Self from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.exceptions import ColumnLengthMismatchError, ModelNotFittedError, PlainTableError, DatasetMissesDataError, \ - LearningError +from safeds.exceptions import ( + ColumnLengthMismatchError, + DatasetMissesDataError, + LearningError, + ModelNotFittedError, + PlainTableError, +) from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import RegressionMetrics, RegressorMetric @@ -256,10 +261,12 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me self._check_additional_fit_by_exhaustive_search_preconditions() [train_split, test_split] = training_set.to_table().split_rows(0.75) - train_data = train_split.to_tabular_dataset(target_name=training_set.target.name, - extra_names=training_set.extras.column_names) - test_data = test_split.to_tabular_dataset(target_name=training_set.target.name, - extra_names=training_set.extras.column_names) + train_data = train_split.to_tabular_dataset( + target_name=training_set.target.name, extra_names=training_set.extras.column_names, + ) + test_data = test_split.to_tabular_dataset( + target_name=training_set.target.name, extra_names=training_set.extras.column_names, + ) list_of_models = self._get_models_for_all_choices() if len(list_of_models) < 1: diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 9f8e0ce75..81d6b506e 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -90,7 +90,7 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorRegressor]: - assert isinstance(self._c, Choice) # this is always true and just here for linting + assert isinstance(self._c, Choice) # this is always true and just here for linting models = [] for c in self._c: models.append(SupportVectorRegressor(c=c)) diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py index 390025051..a1ba09374 100644 --- a/src/safeds/ml/metrics/_regressor_metric.py +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -6,4 +6,3 @@ class RegressorMetric(Enum): MEAN_ABSOLUTE_ERROR = "mean_absolute_error" MEDIAN_ABSOLUTE_DEVIATION = "median_absolute_deviation" COEFFICIENT_OF_DETERMINATION = "coefficient_of_determination" - diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 8c69bdf05..cf12c25d4 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -9,10 +9,13 @@ from safeds.exceptions import ( DatasetMissesDataError, DatasetMissesFeaturesError, + FittingWithChoiceError, + FittingWithoutChoiceError, + LearningError, MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, FittingWithoutChoiceError, LearningError, FittingWithChoiceError, + PlainTableError, ) from safeds.ml.classical.classification import ( AdaBoostClassifier, @@ -72,8 +75,9 @@ def classifiers_with_choices() -> list[Classifier]: DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), - RandomForestClassifier(tree_count=Choice(1, 2), max_depth=Choice(1, 2), - min_sample_count_in_leaves=Choice(1, 2)), + RandomForestClassifier( + tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2), + ), SupportVectorClassifier(c=Choice(0.5, 1.0)), ] @@ -93,19 +97,23 @@ def valid_data() -> TabularDataset: @pytest.mark.parametrize("classifier_with_choice", classifiers_with_choices(), ids=lambda x: x.__class__.__name__) class TestChoiceClassifiers: - def test_should_raise_if_no_positive_class_is_provided(self, classifier_with_choice: Classifier, - valid_data: TabularDataset) -> None: + def test_should_raise_if_no_positive_class_is_provided( + self, classifier_with_choice: Classifier, valid_data: TabularDataset, + ) -> None: with pytest.raises(LearningError): classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.PRECISION) - def test_workflow_with_choice_parameter(self, classifier_with_choice: Classifier, valid_data: TabularDataset) -> None: + def test_workflow_with_choice_parameter( + self, classifier_with_choice: Classifier, valid_data: TabularDataset, + ) -> None: model = classifier_with_choice.fit_by_exhaustive_search(valid_data, ClassifierMetric.ACCURACY) assert isinstance(model, type(classifier_with_choice)) pred = model.predict(valid_data) assert isinstance(pred, TabularDataset) - def test_should_raise_if_model_is_fitted_with_choice(self, classifier_with_choice: Classifier, - valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_with_choice( + self, classifier_with_choice: Classifier, valid_data: TabularDataset, + ) -> None: with pytest.raises(FittingWithChoiceError): classifier_with_choice.fit(valid_data) @@ -113,17 +121,19 @@ def test_should_raise_if_model_is_fitted_with_choice(self, classifier_with_choic class TestFitByExhaustiveSearch: @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, - classifier: Classifier, - valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( + self, classifier: Classifier, valid_data: TabularDataset, + ) -> None: with pytest.raises(FittingWithoutChoiceError): classifier.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice(self, - valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( + self, valid_data: TabularDataset, + ) -> None: with pytest.raises(LearningError): - AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search(valid_data, - optimization_metric=ClassifierMetric.ACCURACY) + AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( + valid_data, optimization_metric=ClassifierMetric.ACCURACY, + ) @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) diff --git a/tests/safeds/ml/classical/classification/test_decision_tree.py b/tests/safeds/ml/classical/classification/test_decision_tree.py index 8be06793b..c96ad3f5d 100644 --- a/tests/safeds/ml/classical/classification/test_decision_tree.py +++ b/tests/safeds/ml/classical/classification/test_decision_tree.py @@ -38,9 +38,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + @pytest.mark.parametrize( + "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeClassifier(min_sample_count_in_leaves=min_sample_count_in_leaves) - - diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index d71590104..95e46ef3e 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -54,7 +54,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + @pytest.mark.parametrize( + "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestClassifier(min_sample_count_in_leaves=min_sample_count_in_leaves) diff --git a/tests/safeds/ml/classical/regression/test_decision_tree.py b/tests/safeds/ml/classical/regression/test_decision_tree.py index 1276dc6c2..202928349 100644 --- a/tests/safeds/ml/classical/regression/test_decision_tree.py +++ b/tests/safeds/ml/classical/regression/test_decision_tree.py @@ -38,7 +38,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + @pytest.mark.parametrize( + "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeRegressor(min_sample_count_in_leaves=min_sample_count_in_leaves) diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index 681e400e4..b7d6c6897 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -54,7 +54,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.min_samples_leaf == 2 - @pytest.mark.parametrize("min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) + @pytest.mark.parametrize( + "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): RandomForestRegressor(min_sample_count_in_leaves=min_sample_count_in_leaves) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index d6136b2c4..df3584636 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -10,10 +10,13 @@ ColumnLengthMismatchError, DatasetMissesDataError, DatasetMissesFeaturesError, + FittingWithChoiceError, + FittingWithoutChoiceError, + LearningError, MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, FittingWithoutChoiceError, FittingWithChoiceError, LearningError, + PlainTableError, ) from safeds.ml.classical.regression import ( AdaBoostRegressor, @@ -101,28 +104,33 @@ def valid_data() -> TabularDataset: class TestChoiceRegressors: def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, valid_data: TabularDataset) -> None: - model = (regressor_with_choice.fit_by_exhaustive_search(valid_data, RegressorMetric.MEAN_SQUARED_ERROR)) + model = regressor_with_choice.fit_by_exhaustive_search(valid_data, RegressorMetric.MEAN_SQUARED_ERROR) assert isinstance(model, type(regressor_with_choice)) pred = model.predict(valid_data) assert isinstance(pred, TabularDataset) - def test_should_raise_if_model_is_fitted_with_choice(self, regressor_with_choice: Regressor, - valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_with_choice( + self, regressor_with_choice: Regressor, valid_data: TabularDataset, + ) -> None: with pytest.raises(FittingWithChoiceError): regressor_with_choice.fit(valid_data) class TestFitByExhaustiveSearch: @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice(self, - regressor: Regressor, - valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( + self, regressor: Regressor, valid_data: TabularDataset, + ) -> None: with pytest.raises(FittingWithoutChoiceError): regressor.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice(self, valid_data: TabularDataset) -> None: + def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( + self, valid_data: TabularDataset, + ) -> None: with pytest.raises(LearningError): - AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) + AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( + valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR, + ) @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) From f0e78fe52263da6d4c12f8b2f088b7e6c83ee848 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:19:32 +0000 Subject: [PATCH 42/94] style: apply automated linter fixes --- .../classical/classification/_classifier.py | 11 +++++--- .../ml/classical/regression/_regressor.py | 6 +++-- .../classification/test_classifier.py | 26 ++++++++++++++----- .../classification/test_decision_tree.py | 4 ++- .../classification/test_random_forest.py | 4 ++- .../regression/test_decision_tree.py | 4 ++- .../regression/test_random_forest.py | 4 ++- .../ml/classical/regression/test_regressor.py | 14 +++++++--- 8 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 373a3633f..863d31c41 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -213,7 +213,10 @@ def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: ) def fit_by_exhaustive_search( - self, training_set: TabularDataset, optimization_metric: ClassifierMetric, positive_class: Any = None, + self, + training_set: TabularDataset, + optimization_metric: ClassifierMetric, + positive_class: Any = None, ) -> Self: if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): raise PlainTableError @@ -228,10 +231,12 @@ def fit_by_exhaustive_search( [train_split, test_split] = training_set.to_table().split_rows(0.75) train_data = train_split.to_tabular_dataset( - target_name=training_set.target.name, extra_names=training_set.extras.column_names, + target_name=training_set.target.name, + extra_names=training_set.extras.column_names, ) test_data = test_split.to_tabular_dataset( - target_name=training_set.target.name, extra_names=training_set.extras.column_names, + target_name=training_set.target.name, + extra_names=training_set.extras.column_names, ) list_of_models = self._get_models_for_all_choices() diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 2570cd8f4..2cd152e4b 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -262,10 +262,12 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me [train_split, test_split] = training_set.to_table().split_rows(0.75) train_data = train_split.to_tabular_dataset( - target_name=training_set.target.name, extra_names=training_set.extras.column_names, + target_name=training_set.target.name, + extra_names=training_set.extras.column_names, ) test_data = test_split.to_tabular_dataset( - target_name=training_set.target.name, extra_names=training_set.extras.column_names, + target_name=training_set.target.name, + extra_names=training_set.extras.column_names, ) list_of_models = self._get_models_for_all_choices() diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index cf12c25d4..6cd42382a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -76,7 +76,9 @@ def classifiers_with_choices() -> list[Classifier]: GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), RandomForestClassifier( - tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2), + tree_count=Choice(1, 2), + max_depth=Choice(1, 2), + min_sample_count_in_leaves=Choice(1, 2), ), SupportVectorClassifier(c=Choice(0.5, 1.0)), ] @@ -98,13 +100,17 @@ def valid_data() -> TabularDataset: class TestChoiceClassifiers: def test_should_raise_if_no_positive_class_is_provided( - self, classifier_with_choice: Classifier, valid_data: TabularDataset, + self, + classifier_with_choice: Classifier, + valid_data: TabularDataset, ) -> None: with pytest.raises(LearningError): classifier_with_choice.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.PRECISION) def test_workflow_with_choice_parameter( - self, classifier_with_choice: Classifier, valid_data: TabularDataset, + self, + classifier_with_choice: Classifier, + valid_data: TabularDataset, ) -> None: model = classifier_with_choice.fit_by_exhaustive_search(valid_data, ClassifierMetric.ACCURACY) assert isinstance(model, type(classifier_with_choice)) @@ -112,7 +118,9 @@ def test_workflow_with_choice_parameter( assert isinstance(pred, TabularDataset) def test_should_raise_if_model_is_fitted_with_choice( - self, classifier_with_choice: Classifier, valid_data: TabularDataset, + self, + classifier_with_choice: Classifier, + valid_data: TabularDataset, ) -> None: with pytest.raises(FittingWithChoiceError): classifier_with_choice.fit(valid_data) @@ -122,17 +130,21 @@ class TestFitByExhaustiveSearch: @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( - self, classifier: Classifier, valid_data: TabularDataset, + self, + classifier: Classifier, + valid_data: TabularDataset, ) -> None: with pytest.raises(FittingWithoutChoiceError): classifier.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( - self, valid_data: TabularDataset, + self, + valid_data: TabularDataset, ) -> None: with pytest.raises(LearningError): AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( - valid_data, optimization_metric=ClassifierMetric.ACCURACY, + valid_data, + optimization_metric=ClassifierMetric.ACCURACY, ) diff --git a/tests/safeds/ml/classical/classification/test_decision_tree.py b/tests/safeds/ml/classical/classification/test_decision_tree.py index c96ad3f5d..a7b5c0313 100644 --- a/tests/safeds/ml/classical/classification/test_decision_tree.py +++ b/tests/safeds/ml/classical/classification/test_decision_tree.py @@ -39,7 +39,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.min_samples_leaf == 2 @pytest.mark.parametrize( - "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + "min_sample_count_in_leaves", + [-1, 0, Choice(-1)], + ids=["minus_one", "zero", "invalid_choice"], ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index 95e46ef3e..d92d4dc31 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -55,7 +55,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.min_samples_leaf == 2 @pytest.mark.parametrize( - "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + "min_sample_count_in_leaves", + [-1, 0, Choice(-1)], + ids=["minus_one", "zero", "invalid_choice"], ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/ml/classical/regression/test_decision_tree.py b/tests/safeds/ml/classical/regression/test_decision_tree.py index 202928349..97273809e 100644 --- a/tests/safeds/ml/classical/regression/test_decision_tree.py +++ b/tests/safeds/ml/classical/regression/test_decision_tree.py @@ -39,7 +39,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.min_samples_leaf == 2 @pytest.mark.parametrize( - "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + "min_sample_count_in_leaves", + [-1, 0, Choice(-1)], + ids=["minus_one", "zero", "invalid_choice"], ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index b7d6c6897..d5ea2e437 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -55,7 +55,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.min_samples_leaf == 2 @pytest.mark.parametrize( - "min_sample_count_in_leaves", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"], + "min_sample_count_in_leaves", + [-1, 0, Choice(-1)], + ids=["minus_one", "zero", "invalid_choice"], ) def test_should_raise_if_less_than_or_equal_to_0(self, min_sample_count_in_leaves: int | Choice[int]) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index df3584636..af96a0ea5 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -110,7 +110,9 @@ def test_workflow_with_choice_parameter(self, regressor_with_choice: Regressor, assert isinstance(pred, TabularDataset) def test_should_raise_if_model_is_fitted_with_choice( - self, regressor_with_choice: Regressor, valid_data: TabularDataset, + self, + regressor_with_choice: Regressor, + valid_data: TabularDataset, ) -> None: with pytest.raises(FittingWithChoiceError): regressor_with_choice.fit(valid_data) @@ -119,17 +121,21 @@ def test_should_raise_if_model_is_fitted_with_choice( class TestFitByExhaustiveSearch: @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__) def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( - self, regressor: Regressor, valid_data: TabularDataset, + self, + regressor: Regressor, + valid_data: TabularDataset, ) -> None: with pytest.raises(FittingWithoutChoiceError): regressor.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( - self, valid_data: TabularDataset, + self, + valid_data: TabularDataset, ) -> None: with pytest.raises(LearningError): AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( - valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR, + valid_data, + optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR, ) From 4b4d391100dc7943299966db6cafeb12fb8573cd Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 18:06:38 +0200 Subject: [PATCH 43/94] combine linear, lasso, ridge and elasticnet into ElasticNetRegressor --- .../ml/classical/regression/__init__.py | 9 -- .../regression/_elastic_net_regressor.py | 43 ++++------ .../classical/regression/_lasso_regressor.py | 85 ------------------- .../classical/regression/_linear_regressor.py | 42 --------- .../classical/regression/_ridge_regressor.py | 84 ------------------ 5 files changed, 16 insertions(+), 247 deletions(-) delete mode 100644 src/safeds/ml/classical/regression/_lasso_regressor.py delete mode 100644 src/safeds/ml/classical/regression/_linear_regressor.py delete mode 100644 src/safeds/ml/classical/regression/_ridge_regressor.py diff --git a/src/safeds/ml/classical/regression/__init__.py b/src/safeds/ml/classical/regression/__init__.py index ed8c2bcbb..66ba2c143 100644 --- a/src/safeds/ml/classical/regression/__init__.py +++ b/src/safeds/ml/classical/regression/__init__.py @@ -11,11 +11,8 @@ from ._elastic_net_regressor import ElasticNetRegressor from ._gradient_boosting_regressor import GradientBoostingRegressor from ._k_nearest_neighbors_regressor import KNearestNeighborsRegressor - from ._lasso_regressor import LassoRegressor - from ._linear_regressor import LinearRegressor from ._random_forest_regressor import RandomForestRegressor from ._regressor import Regressor - from ._ridge_regressor import RidgeRegressor from ._support_vector_regressor import SupportVectorRegressor apipkg.initpkg( @@ -27,11 +24,8 @@ "ElasticNetRegressor": "._elastic_net_regressor:ElasticNetRegressor", "GradientBoostingRegressor": "._gradient_boosting_regressor:GradientBoostingRegressor", "KNearestNeighborsRegressor": "._k_nearest_neighbors_regressor:KNearestNeighborsRegressor", - "LassoRegressor": "._lasso_regressor:LassoRegressor", - "LinearRegressor": "._linear_regressor:LinearRegressor", "RandomForestRegressor": "._random_forest_regressor:RandomForestRegressor", "Regressor": "._regressor:Regressor", - "RidgeRegressor": "._ridge_regressor:RidgeRegressor", "SupportVectorRegressor": "._support_vector_regressor:SupportVectorRegressor", }, ) @@ -43,10 +37,7 @@ "ElasticNetRegressor", "GradientBoostingRegressor", "KNearestNeighborsRegressor", - "LassoRegressor", - "LinearRegressor", "RandomForestRegressor", "Regressor", - "RidgeRegressor", "SupportVectorRegressor", ] diff --git a/src/safeds/ml/classical/regression/_elastic_net_regressor.py b/src/safeds/ml/classical/regression/_elastic_net_regressor.py index 0e0fedc64..dde7f3038 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regressor.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regressor.py @@ -20,6 +20,7 @@ class ElasticNetRegressor(Regressor): ---------- alpha: Controls the regularization of the model. The higher the value, the more regularized it becomes. + If 0, a linear model is used. lasso_ratio: Number between 0 and 1 that controls the ratio between Lasso and Ridge regularization. If 0, only Ridge regularization is used. If 1, only Lasso regularization is used. @@ -39,33 +40,7 @@ def __init__(self, *, alpha: float = 1.0, lasso_ratio: float = 0.5) -> None: # Validation _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) - if alpha == 0: - warn( - ( - "Setting alpha to zero makes this model equivalent to LinearRegression. You should use " - "LinearRegression instead for better numerical stability." - ), - UserWarning, - stacklevel=2, - ) - _check_bounds("lasso_ratio", lasso_ratio, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) - if lasso_ratio == 0: - warnings.warn( - ( - "ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression." - " Use RidgeRegression instead for better numerical stability." - ), - stacklevel=2, - ) - elif lasso_ratio == 1: - warnings.warn( - ( - "ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression." - " Use LassoRegression instead for better numerical stability." - ), - stacklevel=2, - ) # Hyperparameters self._alpha = alpha @@ -104,8 +79,22 @@ def _clone(self) -> ElasticNetRegressor: def _get_sklearn_model(self) -> RegressorMixin: from sklearn.linear_model import ElasticNet as SklearnElasticNet + from sklearn.linear_model import LinearRegression as sk_LinearRegression + from sklearn.linear_model import Ridge as SklearnRidge + from sklearn.linear_model import Lasso as SklearnLasso + + + #TODO Does Linear Regression have priority over other models? Should this always be a linear model if alpha is zero or does the lasso ratio still mater in that case? Might have do modify the order of model creation here. + if self._alpha == 0: # Linear Regression + return sk_LinearRegression(n_jobs=-1) + + if self._lasso_ratio == 0: # Ridge Regression + return SklearnRidge(alpha=self._alpha) + + if self._lasso_ratio == 1: # Lasso Regression + return SklearnLasso(alpha=self._alpha) - return SklearnElasticNet( + return SklearnElasticNet( # Elastic Net Regression alpha=self._alpha, l1_ratio=self._lasso_ratio, ) diff --git a/src/safeds/ml/classical/regression/_lasso_regressor.py b/src/safeds/ml/classical/regression/_lasso_regressor.py deleted file mode 100644 index f9cae7daa..000000000 --- a/src/safeds/ml/classical/regression/_lasso_regressor.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING -from warnings import warn - -from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound - -from ._regressor import Regressor - -if TYPE_CHECKING: - from sklearn.base import RegressorMixin - - -class LassoRegressor(Regressor): - """Lasso regression. - - Parameters - ---------- - alpha: - Controls the regularization of the model. The higher the value, the more regularized it becomes. - - Raises - ------ - OutOfBoundsError - If `alpha` is negative. - """ - - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, *, alpha: float = 1.0) -> None: - super().__init__() - - # Validation - _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) - if alpha == 0: - warn( - ( - "Setting alpha to zero makes this model equivalent to LinearRegression. You should use " - "LinearRegression instead for better numerical stability." - ), - UserWarning, - stacklevel=2, - ) - - # Hyperparameters - self._alpha = alpha - - def __hash__(self) -> int: - return _structural_hash( - super().__hash__(), - self._alpha, - ) - - # ------------------------------------------------------------------------------------------------------------------ - # Properties - # ------------------------------------------------------------------------------------------------------------------ - - @property - def alpha(self) -> float: - """ - Get the regularization of the model. - - Returns - ------- - result: - The regularization of the model. - """ - return self._alpha - - # ------------------------------------------------------------------------------------------------------------------ - # Template methods - # ------------------------------------------------------------------------------------------------------------------ - - def _clone(self) -> LassoRegressor: - return LassoRegressor( - alpha=self._alpha, - ) - - def _get_sklearn_model(self) -> RegressorMixin: - from sklearn.linear_model import Lasso as SklearnLasso - - return SklearnLasso(alpha=self._alpha) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py deleted file mode 100644 index bd2682e4e..000000000 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from safeds._utils import _structural_hash -from safeds.exceptions import FittingWithoutChoiceError - -from ._regressor import Regressor - -if TYPE_CHECKING: - from sklearn.base import RegressorMixin - - -class LinearRegressor(Regressor): - """Linear regression.""" - - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self) -> None: - super().__init__() - - def __hash__(self) -> int: - return _structural_hash( - super().__hash__(), - ) - - # ------------------------------------------------------------------------------------------------------------------ - # Template methods - # ------------------------------------------------------------------------------------------------------------------ - - def _clone(self) -> LinearRegressor: - return LinearRegressor() - - def _get_sklearn_model(self) -> RegressorMixin: - from sklearn.linear_model import LinearRegression as sk_LinearRegression - - return sk_LinearRegression(n_jobs=-1) - - def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - raise FittingWithoutChoiceError diff --git a/src/safeds/ml/classical/regression/_ridge_regressor.py b/src/safeds/ml/classical/regression/_ridge_regressor.py deleted file mode 100644 index d6226793d..000000000 --- a/src/safeds/ml/classical/regression/_ridge_regressor.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import annotations - -import warnings -from typing import TYPE_CHECKING - -from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound - -from ._regressor import Regressor - -if TYPE_CHECKING: - from sklearn.base import RegressorMixin - - -class RidgeRegressor(Regressor): - """ - Ridge regression. - - Parameters - ---------- - alpha: - Controls the regularization of the model. The higher the value, the more regularized it becomes. - - Raises - ------ - OutOfBoundsError - If `alpha` is negative. - """ - - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, *, alpha: float = 1.0) -> None: - super().__init__() - - # Validation - _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) - if alpha == 0.0: - warnings.warn( - ( - "Setting alpha to zero makes this model equivalent to LinearRegression. You should use " - "LinearRegression instead for better numerical stability." - ), - UserWarning, - stacklevel=2, - ) - - # Hyperparameters - self._alpha = alpha - - def __hash__(self) -> int: - return _structural_hash( - super().__hash__(), - self._alpha, - ) - - # ------------------------------------------------------------------------------------------------------------------ - # Properties - # ------------------------------------------------------------------------------------------------------------------ - - @property - def alpha(self) -> float: - """ - Get the regularization of the model. - - Returns - ------- - result: - The regularization of the model. - """ - return self._alpha - - # ------------------------------------------------------------------------------------------------------------------ - # Template methods - # ------------------------------------------------------------------------------------------------------------------ - - def _clone(self) -> RidgeRegressor: - return RidgeRegressor(alpha=self._alpha) - - def _get_sklearn_model(self) -> RegressorMixin: - from sklearn.linear_model import Ridge as SklearnRidge - - return SklearnRidge(alpha=self._alpha) From 6505378464f3be05b3b09a089444cba71afb58ec Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 18:44:54 +0200 Subject: [PATCH 44/94] Add Choice to ElasticNetRegressor --- .../regression/_elastic_net_regressor.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/classical/regression/_elastic_net_regressor.py b/src/safeds/ml/classical/regression/_elastic_net_regressor.py index dde7f3038..cdf49fa64 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regressor.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regressor.py @@ -8,6 +8,7 @@ from safeds._validation import _check_bounds, _ClosedBound from ._regressor import Regressor +from ...hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -35,12 +36,21 @@ class ElasticNetRegressor(Regressor): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, *, alpha: float = 1.0, lasso_ratio: float = 0.5) -> None: + def __init__(self, *, alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5) -> None: super().__init__() # Validation - _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) - _check_bounds("lasso_ratio", lasso_ratio, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) + if isinstance(alpha, Choice): + for a in alpha: + _check_bounds("alpha", a, lower_bound=_ClosedBound(0)) + else: + _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) + + if isinstance(lasso_ratio, Choice): + for lr in lasso_ratio: + _check_bounds("lasso_ratio", lr, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) + else: + _check_bounds("lasso_ratio", lasso_ratio, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) # Hyperparameters self._alpha = alpha @@ -58,12 +68,12 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def alpha(self) -> float: + def alpha(self) -> float | Choice[float]: """The regularization of the model.""" return self._alpha @property - def lasso_ratio(self) -> float: + def lasso_ratio(self) -> float | Choice[float]: """Rhe ratio between Lasso and Ridge regularization.""" return self._lasso_ratio @@ -85,7 +95,7 @@ def _get_sklearn_model(self) -> RegressorMixin: #TODO Does Linear Regression have priority over other models? Should this always be a linear model if alpha is zero or does the lasso ratio still mater in that case? Might have do modify the order of model creation here. - if self._alpha == 0: # Linear Regression + if self._alpha == 0: # Linear Regression return sk_LinearRegression(n_jobs=-1) if self._lasso_ratio == 0: # Ridge Regression @@ -98,3 +108,13 @@ def _get_sklearn_model(self) -> RegressorMixin: alpha=self._alpha, l1_ratio=self._lasso_ratio, ) + + def _get_models_for_all_choices(self) -> list[ElasticNetRegressor]: + alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] + lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] + + models = [] + for a in alpha_choices: + for lasso in lasso_choices: + models.append(ElasticNetRegressor(alpha=a, lasso_ratio=lasso)) + return models From 1682ff7590c27356f15de6562d90b71f4db2f8a9 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 18:49:53 +0200 Subject: [PATCH 45/94] add precondition methods to ElasticNetRegressor --- .../classical/regression/_elastic_net_regressor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/classical/regression/_elastic_net_regressor.py b/src/safeds/ml/classical/regression/_elastic_net_regressor.py index cdf49fa64..d59d174b5 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regressor.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regressor.py @@ -1,14 +1,12 @@ from __future__ import annotations - -import warnings from typing import TYPE_CHECKING -from warnings import warn from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from ._regressor import Regressor -from ...hyperparameters import Choice +from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -109,6 +107,14 @@ def _get_sklearn_model(self) -> RegressorMixin: l1_ratio=self._lasso_ratio, ) + def _check_additional_fit_preconditions(self) -> None: + if isinstance(self._alpha, Choice) or isinstance(self._lasso_ratio, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: + if not isinstance(self._alpha, Choice) and not isinstance(self._lasso_ratio, Choice): + raise FittingWithoutChoiceError + def _get_models_for_all_choices(self) -> list[ElasticNetRegressor]: alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] From 4d1ec98e3564029d72840b4a93867ed2ae907354 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 18:56:01 +0200 Subject: [PATCH 46/94] Add tests for ElasticNetRegressor and remove tests of lasso and ridge regression --- .../regression/test_elastic_net_regression.py | 38 +++---------------- .../regression/test_lasso_regression.py | 37 ------------------ .../ml/classical/regression/test_regressor.py | 7 +--- .../regression/test_ridge_regression.py | 37 ------------------ 4 files changed, 6 insertions(+), 113 deletions(-) delete mode 100644 tests/safeds/ml/classical/regression/test_lasso_regression.py delete mode 100644 tests/safeds/ml/classical/regression/test_ridge_regression.py diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py index 1cb19d1cf..006db46a0 100644 --- a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py +++ b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py @@ -3,6 +3,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import ElasticNetRegressor +from safeds.ml.hyperparameters import Choice @pytest.fixture() @@ -21,21 +22,11 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.alpha == 1 - @pytest.mark.parametrize("alpha", [-0.5], ids=["minus_0_point_5"]) - def test_should_raise_if_less_than_0(self, alpha: float) -> None: + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) + def test_should_raise_if_less_than_0(self, alpha: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): ElasticNetRegressor(alpha=alpha) - def test_should_warn_if_equal_to_0(self) -> None: - with pytest.warns( - UserWarning, - match=( - "Setting alpha to zero makes this model equivalent to LinearRegression. You " - "should use LinearRegression instead for better numerical stability." - ), - ): - ElasticNetRegressor(alpha=0) - class TestLassoRatio: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: @@ -47,27 +38,8 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.l1_ratio == 0.3 - @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5], ids=["minus_zero_point_5", "one_point_5"]) - def test_should_raise_if_not_between_0_and_1(self, lasso_ratio: float) -> None: + @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_zero_point_5", "one_point_5", "invalid_choice"]) + def test_should_raise_if_not_between_0_and_1(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): ElasticNetRegressor(lasso_ratio=lasso_ratio) - def test_should_warn_if_0(self) -> None: - with pytest.warns( - UserWarning, - match=( - "ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression." - " Use RidgeRegression instead for better numerical stability." - ), - ): - ElasticNetRegressor(lasso_ratio=0) - - def test_should_warn_if_1(self) -> None: - with pytest.warns( - UserWarning, - match=( - "ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression." - " Use LassoRegression instead for better numerical stability." - ), - ): - ElasticNetRegressor(lasso_ratio=1) diff --git a/tests/safeds/ml/classical/regression/test_lasso_regression.py b/tests/safeds/ml/classical/regression/test_lasso_regression.py deleted file mode 100644 index 294b8b421..000000000 --- a/tests/safeds/ml/classical/regression/test_lasso_regression.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest -from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table -from safeds.exceptions import OutOfBoundsError -from safeds.ml.classical.regression import LassoRegressor - - -@pytest.fixture() -def training_set() -> TabularDataset: - table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1") - - -class TestAlpha: - def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: - fitted_model = LassoRegressor(alpha=1).fit(training_set) - assert fitted_model.alpha == 1 - - def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: - fitted_model = LassoRegressor(alpha=1).fit(training_set) - assert fitted_model._wrapped_model is not None - assert fitted_model._wrapped_model.alpha == 1 - - @pytest.mark.parametrize("alpha", [-0.5], ids=["minus_zero_point_5"]) - def test_should_raise_if_less_than_0(self, alpha: float) -> None: - with pytest.raises(OutOfBoundsError): - LassoRegressor(alpha=alpha) - - def test_should_warn_if_equal_to_0(self) -> None: - with pytest.warns( - UserWarning, - match=( - "Setting alpha to zero makes this model equivalent to LinearRegression. You " - "should use LinearRegression instead for better numerical stability." - ), - ): - LassoRegressor(alpha=0) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index af96a0ea5..cda896857 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -24,11 +24,8 @@ ElasticNetRegressor, GradientBoostingRegressor, KNearestNeighborsRegressor, - LassoRegressor, - LinearRegressor, RandomForestRegressor, Regressor, - RidgeRegressor, SupportVectorRegressor, ) from safeds.ml.classical.regression._regressor import _check_metrics_preconditions @@ -58,10 +55,7 @@ def regressors() -> list[Regressor]: ElasticNetRegressor(), GradientBoostingRegressor(), KNearestNeighborsRegressor(2), - LassoRegressor(), - LinearRegressor(), RandomForestRegressor(), - RidgeRegressor(), SupportVectorRegressor(), ] @@ -81,6 +75,7 @@ def regressors_with_choices() -> list[Regressor]: return [ AdaBoostRegressor(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), + ElasticNetRegressor(alpha=Choice(0, 0.5, 1), lasso_ratio=Choice(0, 0.5, 1)), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), diff --git a/tests/safeds/ml/classical/regression/test_ridge_regression.py b/tests/safeds/ml/classical/regression/test_ridge_regression.py deleted file mode 100644 index 141c526bc..000000000 --- a/tests/safeds/ml/classical/regression/test_ridge_regression.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest -from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table -from safeds.exceptions import OutOfBoundsError -from safeds.ml.classical.regression import RidgeRegressor - - -@pytest.fixture() -def training_set() -> TabularDataset: - table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1") - - -class TestAlpha: - def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: - fitted_model = RidgeRegressor(alpha=1).fit(training_set) - assert fitted_model.alpha == 1 - - def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: - fitted_model = RidgeRegressor(alpha=1).fit(training_set) - assert fitted_model._wrapped_model is not None - assert fitted_model._wrapped_model.alpha == 1 - - @pytest.mark.parametrize("alpha", [-0.5], ids=["minus_zero_point_5"]) - def test_should_raise_if_less_than_0(self, alpha: float) -> None: - with pytest.raises(OutOfBoundsError): - RidgeRegressor(alpha=alpha) - - def test_should_warn_if_equal_to_0(self) -> None: - with pytest.warns( - UserWarning, - match=( - "Setting alpha to zero makes this model equivalent to LinearRegression. You " - "should use LinearRegression instead for better numerical stability." - ), - ): - RidgeRegressor(alpha=0) From 1a605c2f483e0a40332c4da320d1070d0c95c54c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 25 Jun 2024 16:57:37 +0000 Subject: [PATCH 47/94] style: apply automated linter fixes --- .../regression/_elastic_net_regressor.py | 16 ++++++++-------- .../regression/test_elastic_net_regression.py | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/safeds/ml/classical/regression/_elastic_net_regressor.py b/src/safeds/ml/classical/regression/_elastic_net_regressor.py index d59d174b5..a08aab18a 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regressor.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regressor.py @@ -1,12 +1,13 @@ from __future__ import annotations + from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError +from safeds.ml.hyperparameters import Choice from ._regressor import Regressor -from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from sklearn.base import RegressorMixin @@ -87,22 +88,21 @@ def _clone(self) -> ElasticNetRegressor: def _get_sklearn_model(self) -> RegressorMixin: from sklearn.linear_model import ElasticNet as SklearnElasticNet + from sklearn.linear_model import Lasso as SklearnLasso from sklearn.linear_model import LinearRegression as sk_LinearRegression from sklearn.linear_model import Ridge as SklearnRidge - from sklearn.linear_model import Lasso as SklearnLasso - - #TODO Does Linear Regression have priority over other models? Should this always be a linear model if alpha is zero or does the lasso ratio still mater in that case? Might have do modify the order of model creation here. - if self._alpha == 0: # Linear Regression + # TODO Does Linear Regression have priority over other models? Should this always be a linear model if alpha is zero or does the lasso ratio still mater in that case? Might have do modify the order of model creation here. + if self._alpha == 0: # Linear Regression return sk_LinearRegression(n_jobs=-1) - if self._lasso_ratio == 0: # Ridge Regression + if self._lasso_ratio == 0: # Ridge Regression return SklearnRidge(alpha=self._alpha) - if self._lasso_ratio == 1: # Lasso Regression + if self._lasso_ratio == 1: # Lasso Regression return SklearnLasso(alpha=self._alpha) - return SklearnElasticNet( # Elastic Net Regression + return SklearnElasticNet( # Elastic Net Regression alpha=self._alpha, l1_ratio=self._lasso_ratio, ) diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py index 006db46a0..ec8e71024 100644 --- a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py +++ b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py @@ -38,8 +38,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model is not None assert fitted_model._wrapped_model.l1_ratio == 0.3 - @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_zero_point_5", "one_point_5", "invalid_choice"]) + @pytest.mark.parametrize( + "lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_zero_point_5", "one_point_5", "invalid_choice"], + ) def test_should_raise_if_not_between_0_and_1(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): ElasticNetRegressor(lasso_ratio=lasso_ratio) - From e5faec2cad4164eb6de4a6ccebc0273dd21b4d0e Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 25 Jun 2024 16:59:19 +0000 Subject: [PATCH 48/94] style: apply automated linter fixes --- .../ml/classical/regression/test_elastic_net_regression.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py index ec8e71024..754f60ebe 100644 --- a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py +++ b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py @@ -39,7 +39,9 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.l1_ratio == 0.3 @pytest.mark.parametrize( - "lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_zero_point_5", "one_point_5", "invalid_choice"], + "lasso_ratio", + [-0.5, 1.5, Choice(-0.5)], + ids=["minus_zero_point_5", "one_point_5", "invalid_choice"], ) def test_should_raise_if_not_between_0_and_1(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): From 4c4481a76f29f088157428313e96646dba66c971 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 18:59:35 +0200 Subject: [PATCH 49/94] adjust arima test case where lassoRegression was used --- tests/safeds/ml/classical/regression/test_arima_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_arima_model.py b/tests/safeds/ml/classical/regression/test_arima_model.py index 0e68b7160..f0eb11ff9 100644 --- a/tests/safeds/ml/classical/regression/test_arima_model.py +++ b/tests/safeds/ml/classical/regression/test_arima_model.py @@ -9,7 +9,7 @@ ModelNotFittedError, NonNumericColumnError, ) -from safeds.ml.classical.regression import ArimaModelRegressor, LassoRegressor +from safeds.ml.classical.regression import ArimaModelRegressor, AdaBoostRegressor from tests.helpers import resolve_resource_path @@ -177,7 +177,7 @@ def test_should_return_same_hash_for_equal_regressor() -> None: def test_should_return_different_hash_for_unequal_regressor() -> None: regressor1 = ArimaModelRegressor() - regressor2 = LassoRegressor() + regressor2 = AdaBoostRegressor() assert hash(regressor1) != hash(regressor2) From e35e4b95468561f25b29ba456a333b777a3f0659 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 25 Jun 2024 17:01:24 +0000 Subject: [PATCH 50/94] style: apply automated linter fixes --- tests/safeds/ml/classical/regression/test_arima_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/ml/classical/regression/test_arima_model.py b/tests/safeds/ml/classical/regression/test_arima_model.py index f0eb11ff9..06a114bc7 100644 --- a/tests/safeds/ml/classical/regression/test_arima_model.py +++ b/tests/safeds/ml/classical/regression/test_arima_model.py @@ -9,7 +9,7 @@ ModelNotFittedError, NonNumericColumnError, ) -from safeds.ml.classical.regression import ArimaModelRegressor, AdaBoostRegressor +from safeds.ml.classical.regression import AdaBoostRegressor, ArimaModelRegressor from tests.helpers import resolve_resource_path From 233816a8af4bdc323d3760ec2f2de826e9d98aba Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 19:21:12 +0200 Subject: [PATCH 51/94] add docstrings --- src/safeds/ml/classical/_supervised_model.py | 6 ++++ .../classical/classification/_classifier.py | 31 +++++++++++++++++++ .../ml/classical/regression/_regressor.py | 28 +++++++++++++++++ src/safeds/ml/metrics/_classifier_metric.py | 1 + src/safeds/ml/metrics/_regressor_metric.py | 1 + 5 files changed, 67 insertions(+) diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 9062bdf70..8a4c06a7b 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -80,6 +80,12 @@ def fit(self, training_set: TabularDataset) -> Self: Raises ------ + PlainTableError + If a table is passed instead of a TabularDataset. + DatasetMissesDataError + If the given training set contains no data. + FittingWithChoiceError + When trying to call this method on a model with hyperparameter choices. LearningError If the training data contains invalid values or if the training failed. """ diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 863d31c41..da619a175 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -218,6 +218,37 @@ def fit_by_exhaustive_search( optimization_metric: ClassifierMetric, positive_class: Any = None, ) -> Self: + """ + Use the hyperparameter choices to create multiple models and fit them. + + **Note:** This model is not modified. + + Parameters + ---------- + training_set: + The training data containing the features and target. + optimization_metric: + The metric that should be used for determining the performance of a model. + positive_class: + The class to be considered positive. All other classes are considered negative. + Needs to be provided when choosing precision, f1score or recall as optimization metric. + + Returns + ------- + best_model: + The model that performed the best out of all possible models given the Choices of hyperparameters. + + Raises + ------ + PlainTableError + If a table is passed instead of a TabularDataset. + DatasetMissesDataError + If the given training set contains no data. + FittingWithoutChoiceError + When trying to call this method on a model without hyperparameter choices. + LearningError + If the training data contains invalid values or if the training failed. + """ if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): raise PlainTableError if training_set.to_table().row_count == 0: diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 2cd152e4b..5b01d57bb 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -253,6 +253,34 @@ def median_absolute_deviation(self, validation_or_test_set: Table | TabularDatas ) def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_metric: RegressorMetric) -> Self: + """ + Use the hyperparameter choices to create multiple models and fit them. + + **Note:** This model is not modified. + + Parameters + ---------- + training_set: + The training data containing the features and target. + optimization_metric: + The metric that should be used for determining the performance of a model. + + Returns + ------- + best_model: + The model that performed the best out of all possible models given the Choices of hyperparameters. + + Raises + ------ + PlainTableError + If a table is passed instead of a TabularDataset. + DatasetMissesDataError + If the given training set contains no data. + FittingWithoutChoiceError + When trying to call this method on a model without hyperparameter choices. + LearningError + If the training data contains invalid values or if the training failed. + """ if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): raise PlainTableError if training_set.to_table().row_count == 0: diff --git a/src/safeds/ml/metrics/_classifier_metric.py b/src/safeds/ml/metrics/_classifier_metric.py index 8af70a951..e50dcacd4 100644 --- a/src/safeds/ml/metrics/_classifier_metric.py +++ b/src/safeds/ml/metrics/_classifier_metric.py @@ -2,6 +2,7 @@ class ClassifierMetric(Enum): + """An Enum of possible Metrics for a Classifier""" ACCURACY = "accuracy" PRECISION = "precision" RECALL = "recall" diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py index a1ba09374..899c28972 100644 --- a/src/safeds/ml/metrics/_regressor_metric.py +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -2,6 +2,7 @@ class RegressorMetric(Enum): + """An Enum of possible Metrics for a Regressor""" MEAN_SQUARED_ERROR = "mean_squared_error" MEAN_ABSOLUTE_ERROR = "mean_absolute_error" MEDIAN_ABSOLUTE_DEVIATION = "median_absolute_deviation" From 041d770127e58b52d64ce29558c64d96d2c98cea Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 19:23:23 +0200 Subject: [PATCH 52/94] add docstrings --- src/safeds/ml/metrics/_classifier_metric.py | 2 +- src/safeds/ml/metrics/_regressor_metric.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/metrics/_classifier_metric.py b/src/safeds/ml/metrics/_classifier_metric.py index e50dcacd4..ee411b03b 100644 --- a/src/safeds/ml/metrics/_classifier_metric.py +++ b/src/safeds/ml/metrics/_classifier_metric.py @@ -2,7 +2,7 @@ class ClassifierMetric(Enum): - """An Enum of possible Metrics for a Classifier""" + """An Enum of possible Metrics for a Classifier.""" ACCURACY = "accuracy" PRECISION = "precision" RECALL = "recall" diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py index 899c28972..248c03c04 100644 --- a/src/safeds/ml/metrics/_regressor_metric.py +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -2,7 +2,7 @@ class RegressorMetric(Enum): - """An Enum of possible Metrics for a Regressor""" + """An Enum of possible Metrics for a Regressor.""" MEAN_SQUARED_ERROR = "mean_squared_error" MEAN_ABSOLUTE_ERROR = "mean_absolute_error" MEDIAN_ABSOLUTE_DEVIATION = "median_absolute_deviation" From 901981cf67cf8829c79f4b9eeac920a7a7aa8c48 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 25 Jun 2024 17:25:34 +0000 Subject: [PATCH 53/94] style: apply automated linter fixes --- src/safeds/ml/metrics/_classifier_metric.py | 1 + src/safeds/ml/metrics/_regressor_metric.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/safeds/ml/metrics/_classifier_metric.py b/src/safeds/ml/metrics/_classifier_metric.py index ee411b03b..4f69c2607 100644 --- a/src/safeds/ml/metrics/_classifier_metric.py +++ b/src/safeds/ml/metrics/_classifier_metric.py @@ -3,6 +3,7 @@ class ClassifierMetric(Enum): """An Enum of possible Metrics for a Classifier.""" + ACCURACY = "accuracy" PRECISION = "precision" RECALL = "recall" diff --git a/src/safeds/ml/metrics/_regressor_metric.py b/src/safeds/ml/metrics/_regressor_metric.py index 248c03c04..421ce4b08 100644 --- a/src/safeds/ml/metrics/_regressor_metric.py +++ b/src/safeds/ml/metrics/_regressor_metric.py @@ -3,6 +3,7 @@ class RegressorMetric(Enum): """An Enum of possible Metrics for a Regressor.""" + MEAN_SQUARED_ERROR = "mean_squared_error" MEAN_ABSOLUTE_ERROR = "mean_absolute_error" MEDIAN_ABSOLUTE_DEVIATION = "median_absolute_deviation" From e3b519528808cc9172c5bc20734959593fa8f3ff Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 25 Jun 2024 20:29:24 +0200 Subject: [PATCH 54/94] change tutorial which used linearregressor --- docs/tutorials/machine_learning.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorials/machine_learning.ipynb b/docs/tutorials/machine_learning.ipynb index 8f7c46cc2..9c7719298 100644 --- a/docs/tutorials/machine_learning.ipynb +++ b/docs/tutorials/machine_learning.ipynb @@ -43,7 +43,7 @@ "source": [ "## Create and train model\n", "\n", - "In this example, we want to predict the column `result`, which is the sum of `a`, `b`, and `c`. We will train a linear regression model with this training data. In Safe-DS, machine learning models are modeled as classes. First, their constructor must be called to configure hyperparameters, which returns a model object. Then, training is started by calling the `fit` method on the model object and passing the training data:" + "In this example, we want to predict the column `result`, which is the sum of `a`, `b`, and `c`. We will train an elastic net regression model with this training data. In Safe-DS, machine learning models are modeled as classes. First, their constructor must be called to configure hyperparameters, which returns a model object. Then, training is started by calling the `fit` method on the model object and passing the training data:" ], "metadata": { "collapsed": false @@ -52,9 +52,9 @@ { "cell_type": "code", "source": [ - "from safeds.ml.classical.regression import LinearRegressor\n", + "from safeds.ml.classical.regression import ElasticNetRegressor\n", "\n", - "model = LinearRegressor()\n", + "model = ElasticNetRegressor(alpha=0.0)\n", "fitted_model = model.fit(tabular_dataset)" ], "metadata": { From b417348a43c98d19bd261cc22b836c29ba4d20e0 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 16:02:33 +0200 Subject: [PATCH 55/94] add learner param of adaboost as choice --- .../ml/classical/_bases/_ada_boost_base.py | 2 +- .../classification/_ada_boost_classifier.py | 21 ++++++++++++------- .../regression/_ada_boost_regressor.py | 20 +++++++++++------- .../classification/test_classifier.py | 2 +- .../ml/classical/regression/test_regressor.py | 2 +- 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index 5eff400af..28de75da6 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -61,5 +61,5 @@ def learning_rate(self) -> float | Choice[float]: @property @abstractmethod - def learner(self) -> SupervisedModel | None: + def learner(self) -> SupervisedModel | None | Choice[SupervisedModel | None]: """The base learner used for training the ensemble.""" diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 080be2763..f7c81b6ec 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -41,7 +41,7 @@ class AdaBoostClassifier(Classifier, _AdaBoostBase): def __init__( self, *, - learner: Classifier | None = None, + learner: Classifier | None | Choice[Classifier | None] = None, max_learner_count: int | Choice[int] = 50, learning_rate: float | Choice[float] = 1.0, ) -> None: @@ -54,7 +54,7 @@ def __init__( ) # Hyperparameters - self._learner: Classifier | None = learner + self._learner: Classifier | None | Choice[Classifier | None] = learner def __hash__(self) -> int: return _structural_hash( @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Classifier | None: + def learner(self) -> Classifier | None | Choice[Classifier | None]: """The base learner used for training the ensemble.""" return self._learner @@ -91,17 +91,21 @@ def _get_sklearn_model(self) -> ClassifierMixin: estimator=learner, n_estimators=self._max_learner_count, learning_rate=self._learning_rate, + algorithm="SAMME" # Will be the default in sklearn 1.6, remove this line then ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): + if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice) or isinstance(self._learner, Choice): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): + if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice) and not isinstance(self._learner, Choice): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]: + learner_choices = ( + self._learner if isinstance(self._learner, Choice) else [self._learner] + ) max_learner_count_choices = ( self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] ) @@ -110,7 +114,8 @@ def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]: ) models = [] - for mlc in max_learner_count_choices: - for lr in learning_rate_choices: - models.append(AdaBoostClassifier(max_learner_count=mlc, learning_rate=lr)) + for learner in learner_choices: + for mlc in max_learner_count_choices: + for lr in learning_rate_choices: + models.append(AdaBoostClassifier(learner=learner, max_learner_count=mlc, learning_rate=lr)) return models diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 292bf4f57..7980a61b3 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -41,7 +41,7 @@ class AdaBoostRegressor(Regressor, _AdaBoostBase): def __init__( self, *, - learner: Regressor | None = None, + learner: Regressor | None | Choice[Regressor | None] = None, max_learner_count: int | Choice[int] = 50, learning_rate: float | Choice[float] = 1.0, ) -> None: @@ -54,7 +54,7 @@ def __init__( ) # Hyperparameters - self._learner: Regressor | None = learner + self._learner: Regressor | None | Choice[Regressor | None] = learner def __hash__(self) -> int: return _structural_hash( @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Regressor | None: + def learner(self) -> Regressor | None | Choice[Regressor | None]: """The base learner used for training the ensemble.""" return self._learner @@ -94,14 +94,17 @@ def _get_sklearn_model(self) -> RegressorMixin: ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice): + if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice) or isinstance(self._learner, Choice): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice): + if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice) and not isinstance(self._learner, Choice): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostRegressor]: + learner_choices = ( + self._learner if isinstance(self._learner, Choice) else [self._learner] + ) max_learner_count_choices = ( self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] ) @@ -110,7 +113,8 @@ def _get_models_for_all_choices(self) -> list[AdaBoostRegressor]: ) models = [] - for mlc in max_learner_count_choices: - for lr in learning_rate_choices: - models.append(AdaBoostRegressor(max_learner_count=mlc, learning_rate=lr)) + for learner in learner_choices: + for mlc in max_learner_count_choices: + for lr in learning_rate_choices: + models.append(AdaBoostRegressor(learner=learner, max_learner_count=mlc, learning_rate=lr)) return models diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 6cd42382a..607eec7c2 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -71,7 +71,7 @@ def classifiers_with_choices() -> list[Classifier]: The list of classifiers to test. """ return [ - AdaBoostClassifier(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + AdaBoostClassifier(learner=Choice(AdaBoostClassifier(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index cda896857..381c6eba6 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -73,7 +73,7 @@ def regressors_with_choices() -> list[Regressor]: The list of regressors to test. """ return [ - AdaBoostRegressor(max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + AdaBoostRegressor(learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), ElasticNetRegressor(alpha=Choice(0, 0.5, 1), lasso_ratio=Choice(0, 0.5, 1)), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), From 17896ab23458385d1a47dab9e52c45528c1432c7 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 16:05:50 +0200 Subject: [PATCH 56/94] adjust baseline models --- .../ml/classical/regression/_baseline_regressor.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/classical/regression/_baseline_regressor.py b/src/safeds/ml/classical/regression/_baseline_regressor.py index 4562ed122..0ecf36b56 100644 --- a/src/safeds/ml/classical/regression/_baseline_regressor.py +++ b/src/safeds/ml/classical/regression/_baseline_regressor.py @@ -15,11 +15,8 @@ DecisionTreeRegressor, ElasticNetRegressor, GradientBoostingRegressor, - LassoRegressor, - LinearRegressor, RandomForestRegressor, Regressor, - RidgeRegressor, SupportVectorRegressor, ) @@ -47,15 +44,15 @@ def __init__(self, include_slower_models: bool = False): self._list_of_model_types = [ AdaBoostRegressor(), DecisionTreeRegressor(), - LinearRegressor(), + ElasticNetRegressor(alpha=0.0), + ElasticNetRegressor(lasso_ratio=0), RandomForestRegressor(), - RidgeRegressor(), SupportVectorRegressor(), ] if include_slower_models: self._list_of_model_types.extend( - [ElasticNetRegressor(), LassoRegressor(), GradientBoostingRegressor()], + [ElasticNetRegressor(), ElasticNetRegressor(lasso_ratio=1), GradientBoostingRegressor()], ) # pragma: no cover self._fitted_models: list[Regressor] = [] From 65be84d43562a672ab3b21ebbe9f77196e05677c Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 16:13:56 +0200 Subject: [PATCH 57/94] adjust decision tree max_depth choice to take None as well --- src/safeds/ml/classical/_bases/_decision_tree_base.py | 6 +++--- .../classical/classification/_decision_tree_classifier.py | 2 +- .../ml/classical/regression/_decision_tree_regressor.py | 2 +- .../ml/classical/classification/test_decision_tree.py | 2 +- tests/safeds/ml/classical/regression/test_decision_tree.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_decision_tree_base.py b/src/safeds/ml/classical/_bases/_decision_tree_base.py index 088264d3b..5502a5edb 100644 --- a/src/safeds/ml/classical/_bases/_decision_tree_base.py +++ b/src/safeds/ml/classical/_bases/_decision_tree_base.py @@ -15,7 +15,7 @@ class _DecisionTreeBase(ABC): @abstractmethod def __init__( self, - max_depth: int | Choice[int] | None, + max_depth: int | None | Choice[int | None], min_sample_count_in_leaves: int | Choice[int], ) -> None: # Validation @@ -31,7 +31,7 @@ def __init__( _check_bounds("min_sample_count_in_leaves", min_sample_count_in_leaves, lower_bound=_ClosedBound(1)) # Hyperparameters - self._max_depth: int | Choice[int] | None = max_depth + self._max_depth: int | None | Choice[int | None] = max_depth self._min_sample_count_in_leaves: int | Choice[int] = min_sample_count_in_leaves def __hash__(self) -> int: @@ -45,7 +45,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def max_depth(self) -> int | Choice[int] | None: + def max_depth(self) -> int | None | Choice[int | None]: """The maximum depth of the tree.""" return self._max_depth diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 37da7bc13..6032631f0 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -39,7 +39,7 @@ class DecisionTreeClassifier(Classifier, _DecisionTreeBase): def __init__( self, *, - max_depth: int | Choice[int] | None = None, + max_depth: int | None | Choice[int | None], min_sample_count_in_leaves: int | Choice[int] = 1, ) -> None: # Initialize superclasses diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index c81614d1b..3fd1cba04 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -39,7 +39,7 @@ class DecisionTreeRegressor(Regressor, _DecisionTreeBase): def __init__( self, *, - max_depth: int | Choice[int] | None = None, + max_depth: int | None | Choice[int | None], min_sample_count_in_leaves: int | Choice[int] = 5, ) -> None: # Initialize superclasses diff --git a/tests/safeds/ml/classical/classification/test_decision_tree.py b/tests/safeds/ml/classical/classification/test_decision_tree.py index a7b5c0313..22aa809aa 100644 --- a/tests/safeds/ml/classical/classification/test_decision_tree.py +++ b/tests/safeds/ml/classical/classification/test_decision_tree.py @@ -23,7 +23,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.max_depth == 2 @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | None | Choice[int | None]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeClassifier(max_depth=max_depth) diff --git a/tests/safeds/ml/classical/regression/test_decision_tree.py b/tests/safeds/ml/classical/regression/test_decision_tree.py index 97273809e..1887f9d52 100644 --- a/tests/safeds/ml/classical/regression/test_decision_tree.py +++ b/tests/safeds/ml/classical/regression/test_decision_tree.py @@ -23,7 +23,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.max_depth == 2 @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | None | Choice[int | None]) -> None: with pytest.raises(OutOfBoundsError): DecisionTreeRegressor(max_depth=max_depth) From d1454eed084e487a4f1ec7f65f1d99ccf335c1c9 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 16:19:44 +0200 Subject: [PATCH 58/94] adjust random forest max_depth choice to take None as well --- src/safeds/ml/classical/_bases/_random_forest_base.py | 6 +++--- .../classical/classification/_random_forest_classifier.py | 2 +- .../ml/classical/regression/_random_forest_regressor.py | 2 +- .../ml/classical/classification/test_random_forest.py | 2 +- tests/safeds/ml/classical/regression/test_random_forest.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_random_forest_base.py b/src/safeds/ml/classical/_bases/_random_forest_base.py index d8a20d750..85e2a1acc 100644 --- a/src/safeds/ml/classical/_bases/_random_forest_base.py +++ b/src/safeds/ml/classical/_bases/_random_forest_base.py @@ -16,7 +16,7 @@ class _RandomForestBase(ABC): def __init__( self, tree_count: int | Choice[int], - max_depth: int | Choice[int] | None, + max_depth: int | None | Choice[int | None], min_sample_count_in_leaves: int | Choice[int], ) -> None: # Validation @@ -40,7 +40,7 @@ def __init__( # Hyperparameters self._tree_count: int | Choice[int] = tree_count - self._max_depth: int | Choice[int] | None = max_depth + self._max_depth: int | None | Choice[int | None] = max_depth self._min_sample_count_in_leaves: int | Choice[int] = min_sample_count_in_leaves def __hash__(self) -> int: @@ -60,7 +60,7 @@ def tree_count(self) -> int | Choice[int]: return self._tree_count @property - def max_depth(self) -> int | Choice[int] | None: + def max_depth(self) -> int | None | Choice[int | None]: """The maximum depth of each tree.""" return self._max_depth diff --git a/src/safeds/ml/classical/classification/_random_forest_classifier.py b/src/safeds/ml/classical/classification/_random_forest_classifier.py index e917c7b53..ea0aef340 100644 --- a/src/safeds/ml/classical/classification/_random_forest_classifier.py +++ b/src/safeds/ml/classical/classification/_random_forest_classifier.py @@ -44,7 +44,7 @@ def __init__( self, *, tree_count: int | Choice[int] = 100, - max_depth: int | Choice[int] | None = None, + max_depth: int | None | Choice[int | None] = None, min_sample_count_in_leaves: int | Choice[int] = 1, ) -> None: # Initialize superclasses diff --git a/src/safeds/ml/classical/regression/_random_forest_regressor.py b/src/safeds/ml/classical/regression/_random_forest_regressor.py index 6f2b869da..4de218521 100644 --- a/src/safeds/ml/classical/regression/_random_forest_regressor.py +++ b/src/safeds/ml/classical/regression/_random_forest_regressor.py @@ -44,7 +44,7 @@ def __init__( self, *, tree_count: int | Choice[int] = 100, - max_depth: int | Choice[int] | None = None, + max_depth: int | None | Choice[int | None] = None, min_sample_count_in_leaves: int | Choice[int] = 5, ) -> None: # Initialize superclasses diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index d92d4dc31..8f9efabad 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -39,7 +39,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.max_depth == 2 @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | None | Choice[int | None]) -> None: with pytest.raises(OutOfBoundsError): RandomForestClassifier(max_depth=max_depth) diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index d5ea2e437..40c84108d 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -39,7 +39,7 @@ def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None assert fitted_model._wrapped_model.max_depth == 2 @pytest.mark.parametrize("max_depth", [-1, 0, Choice(-1)], ids=["minus_one", "zero", "invalid_choice"]) - def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | Choice[int]) -> None: + def test_should_raise_if_less_than_or_equal_to_0(self, max_depth: int | None | Choice[int | None]) -> None: with pytest.raises(OutOfBoundsError): RandomForestRegressor(max_depth=max_depth) From f607bbfb86d03a95249ae3408850c3b09276a436 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 18:00:12 +0200 Subject: [PATCH 59/94] add svm kernel choice --- .../_bases/_support_vector_machine_base.py | 6 +++--- .../_decision_tree_classifier.py | 2 +- .../_support_vector_classifier.py | 18 +++++++++++------- .../regression/_decision_tree_regressor.py | 2 +- .../regression/_support_vector_regressor.py | 18 +++++++++++------- .../classification/test_classifier.py | 18 +++++++++--------- .../ml/classical/regression/test_regressor.py | 3 ++- 7 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py index 091f690aa..05047531d 100644 --- a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py +++ b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py @@ -78,7 +78,7 @@ def sigmoid() -> _SupportVectorMachineBase.Kernel: def __init__( self, c: float | Choice[float], - kernel: _SupportVectorMachineBase.Kernel | None, + kernel: _SupportVectorMachineBase.Kernel | None | Choice[_SupportVectorMachineBase.Kernel | None], ) -> None: if kernel is None: kernel = _SupportVectorMachineBase.Kernel.radial_basis_function() @@ -92,7 +92,7 @@ def __init__( # Hyperparameters self._c: float | Choice[float] = c - self._kernel: _SupportVectorMachineBase.Kernel = kernel + self._kernel: _SupportVectorMachineBase.Kernel | Choice[_SupportVectorMachineBase.Kernel | None] = kernel def __hash__(self) -> int: return _structural_hash( @@ -112,7 +112,7 @@ def c(self) -> float | Choice[float]: # This property is abstract, so subclasses must declare a public return type. @property @abstractmethod - def kernel(self) -> _SupportVectorMachineBase.Kernel: + def kernel(self) -> _SupportVectorMachineBase.Kernel | Choice[_SupportVectorMachineBase.Kernel | None]: """The type of kernel used.""" diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index 6032631f0..a50cd6493 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -39,7 +39,7 @@ class DecisionTreeClassifier(Classifier, _DecisionTreeBase): def __init__( self, *, - max_depth: int | None | Choice[int | None], + max_depth: int | None | Choice[int | None] = None, min_sample_count_in_leaves: int | Choice[int] = 1, ) -> None: # Initialize superclasses diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 37e2897b9..d0cddb148 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -37,7 +37,7 @@ def __init__( self, *, c: float | Choice[float] = 1.0, - kernel: SupportVectorClassifier.Kernel | None = None, + kernel: SupportVectorClassifier.Kernel | None | Choice[SupportVectorClassifier.Kernel | None] = None, ) -> None: # Initialize superclasses Classifier.__init__(self) @@ -58,7 +58,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def kernel(self) -> SupportVectorClassifier.Kernel: + def kernel(self) -> SupportVectorClassifier.Kernel | Choice[SupportVectorClassifier.Kernel | None]: """The type of kernel used.""" return self._kernel @@ -83,16 +83,20 @@ def _get_sklearn_model(self) -> ClassifierMixin: return result def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._c, Choice): + if isinstance(self._c, Choice) or isinstance(self._kernel, Choice): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._c, Choice): + if not isinstance(self._c, Choice) and not isinstance(self._kernel, Choice): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorClassifier]: - assert isinstance(self._c, Choice) # this is always true and just here for linting + #assert isinstance(self._c, Choice) # this is always true and just here for linting + c_choices = (self._c if isinstance(self._c, Choice) else [self._c]) + kernel_choices = (self.kernel if isinstance(self.kernel, Choice) else [self.kernel]) + models = [] - for c in self._c: - models.append(SupportVectorClassifier(c=c)) + for c in c_choices: + for kernel in kernel_choices: + models.append(SupportVectorClassifier(c=c, kernel=kernel)) return models diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 3fd1cba04..1124793c0 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -39,7 +39,7 @@ class DecisionTreeRegressor(Regressor, _DecisionTreeBase): def __init__( self, *, - max_depth: int | None | Choice[int | None], + max_depth: int | None | Choice[int | None] = None, min_sample_count_in_leaves: int | Choice[int] = 5, ) -> None: # Initialize superclasses diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 81d6b506e..11fd985c9 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -37,7 +37,7 @@ def __init__( self, *, c: float | Choice[float] = 1.0, - kernel: SupportVectorRegressor.Kernel | None = None, + kernel: SupportVectorRegressor.Kernel | None | Choice[SupportVectorRegressor.Kernel | None] = None, ) -> None: # Initialize superclasses Regressor.__init__(self) @@ -58,7 +58,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def kernel(self) -> SupportVectorRegressor.Kernel: + def kernel(self) -> SupportVectorRegressor.Kernel | Choice[SupportVectorRegressor.Kernel | None]: """The type of kernel used.""" return self._kernel @@ -82,16 +82,20 @@ def _get_sklearn_model(self) -> RegressorMixin: return result def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._c, Choice): + if isinstance(self._c, Choice) or isinstance(self.kernel, Choice): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._c, Choice): + if not isinstance(self._c, Choice) and not isinstance(self.kernel, Choice): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorRegressor]: - assert isinstance(self._c, Choice) # this is always true and just here for linting + #assert isinstance(self._c, Choice) # this is always true and just here for linting + c_choices = (self._c if isinstance(self._c, Choice) else [self._c]) + kernel_choices = (self.kernel if isinstance(self.kernel, Choice) else [self.kernel]) + models = [] - for c in self._c: - models.append(SupportVectorRegressor(c=c)) + for c in c_choices: + for kernel in kernel_choices: + models.append(SupportVectorRegressor(c=c, kernel=kernel)) return models diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 607eec7c2..3d039a172 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -80,7 +80,7 @@ def classifiers_with_choices() -> list[Classifier]: max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2), ), - SupportVectorClassifier(c=Choice(0.5, 1.0)), + SupportVectorClassifier(kernel=Choice(None, SupportVectorClassifier.Kernel.linear()) ,c=Choice(0.5, 1.0)), ] @@ -99,6 +99,14 @@ def valid_data() -> TabularDataset: @pytest.mark.parametrize("classifier_with_choice", classifiers_with_choices(), ids=lambda x: x.__class__.__name__) class TestChoiceClassifiers: + def test_should_raise_if_model_is_fitted_with_choice( + self, + classifier_with_choice: Classifier, + valid_data: TabularDataset, + ) -> None: + with pytest.raises(FittingWithChoiceError): + classifier_with_choice.fit(valid_data) + def test_should_raise_if_no_positive_class_is_provided( self, classifier_with_choice: Classifier, @@ -117,14 +125,6 @@ def test_workflow_with_choice_parameter( pred = model.predict(valid_data) assert isinstance(pred, TabularDataset) - def test_should_raise_if_model_is_fitted_with_choice( - self, - classifier_with_choice: Classifier, - valid_data: TabularDataset, - ) -> None: - with pytest.raises(FittingWithChoiceError): - classifier_with_choice.fit(valid_data) - class TestFitByExhaustiveSearch: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 381c6eba6..57e31124b 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -60,6 +60,7 @@ def regressors() -> list[Regressor]: ] + def regressors_with_choices() -> list[Regressor]: """ Return the list of regressors with Choices as Parameters to test choice functionality. @@ -79,7 +80,7 @@ def regressors_with_choices() -> list[Regressor]: GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), - SupportVectorRegressor(c=Choice(0.5, 1.0)), + SupportVectorRegressor(kernel=Choice(None, SupportVectorRegressor.Kernel.linear()), c=Choice(0.5, 1.0)), ] From de5318784c0f0c3905570b4eb5dda9bfae3629f2 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 18:53:11 +0200 Subject: [PATCH 60/94] linter fix --- .../ml/classical/classification/_support_vector_classifier.py | 1 + src/safeds/ml/classical/regression/_support_vector_regressor.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index d0cddb148..6d0496b87 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -79,6 +79,7 @@ def _get_sklearn_model(self) -> ClassifierMixin: C=self._c, random_state=_get_random_seed(), ) + assert not isinstance(self._kernel, Choice) self._kernel._apply(result) return result diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index 11fd985c9..e195176d5 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -78,6 +78,7 @@ def _get_sklearn_model(self) -> RegressorMixin: result = SklearnSVR( C=self._c, ) + assert not isinstance(self._kernel, Choice) self._kernel._apply(result) return result From b7fc9751b22c0b8f1bf02bdbb33ba3ec825de928 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 27 Jun 2024 19:01:58 +0200 Subject: [PATCH 61/94] linter fix --- .../ml/classical/classification/_ada_boost_classifier.py | 1 + src/safeds/ml/classical/regression/_ada_boost_regressor.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index f7c81b6ec..5248f8aae 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -86,6 +86,7 @@ def _clone(self) -> AdaBoostClassifier: def _get_sklearn_model(self) -> ClassifierMixin: from sklearn.ensemble import AdaBoostClassifier as SklearnAdaBoostClassifier + assert not isinstance(self.learner, Choice) learner = self.learner._get_sklearn_model() if self.learner is not None else None return SklearnAdaBoostClassifier( estimator=learner, diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index 7980a61b3..a88ab4beb 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Regressor | None | Choice[Regressor | None]: + def learner(self) -> Regressor | None | Choice[Regressor | None]: #pragma: no cover """The base learner used for training the ensemble.""" return self._learner @@ -86,6 +86,7 @@ def _clone(self) -> AdaBoostRegressor: def _get_sklearn_model(self) -> RegressorMixin: from sklearn.ensemble import AdaBoostRegressor as SklearnAdaBoostRegressor + assert not isinstance(self.learner, Choice) learner = self.learner._get_sklearn_model() if self.learner is not None else None return SklearnAdaBoostRegressor( estimator=learner, From ebaaa157b4ab993abf6d4be1a048ae2a2efec6cd Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jun 2024 17:24:03 +0200 Subject: [PATCH 62/94] change elasticnet to linear regressor --- docs/tutorials/machine_learning.ipynb | 6 +- .../ml/classical/regression/__init__.py | 6 +- .../regression/_baseline_regressor.py | 8 +- .../regression/_elastic_net_regressor.py | 126 ------- .../classical/regression/_linear_regressor.py | 317 ++++++++++++++++++ .../regression/test_elastic_net_regression.py | 48 --- .../regression/test_linear_regressor.py | 64 ++++ .../ml/classical/regression/test_regressor.py | 6 +- 8 files changed, 394 insertions(+), 187 deletions(-) delete mode 100644 src/safeds/ml/classical/regression/_elastic_net_regressor.py create mode 100644 src/safeds/ml/classical/regression/_linear_regressor.py delete mode 100644 tests/safeds/ml/classical/regression/test_elastic_net_regression.py create mode 100644 tests/safeds/ml/classical/regression/test_linear_regressor.py diff --git a/docs/tutorials/machine_learning.ipynb b/docs/tutorials/machine_learning.ipynb index 9c7719298..8f7c46cc2 100644 --- a/docs/tutorials/machine_learning.ipynb +++ b/docs/tutorials/machine_learning.ipynb @@ -43,7 +43,7 @@ "source": [ "## Create and train model\n", "\n", - "In this example, we want to predict the column `result`, which is the sum of `a`, `b`, and `c`. We will train an elastic net regression model with this training data. In Safe-DS, machine learning models are modeled as classes. First, their constructor must be called to configure hyperparameters, which returns a model object. Then, training is started by calling the `fit` method on the model object and passing the training data:" + "In this example, we want to predict the column `result`, which is the sum of `a`, `b`, and `c`. We will train a linear regression model with this training data. In Safe-DS, machine learning models are modeled as classes. First, their constructor must be called to configure hyperparameters, which returns a model object. Then, training is started by calling the `fit` method on the model object and passing the training data:" ], "metadata": { "collapsed": false @@ -52,9 +52,9 @@ { "cell_type": "code", "source": [ - "from safeds.ml.classical.regression import ElasticNetRegressor\n", + "from safeds.ml.classical.regression import LinearRegressor\n", "\n", - "model = ElasticNetRegressor(alpha=0.0)\n", + "model = LinearRegressor()\n", "fitted_model = model.fit(tabular_dataset)" ], "metadata": { diff --git a/src/safeds/ml/classical/regression/__init__.py b/src/safeds/ml/classical/regression/__init__.py index d1f09b414..51dc7bb07 100644 --- a/src/safeds/ml/classical/regression/__init__.py +++ b/src/safeds/ml/classical/regression/__init__.py @@ -9,9 +9,9 @@ from ._arima import ArimaModelRegressor from ._baseline_regressor import BaselineRegressor from ._decision_tree_regressor import DecisionTreeRegressor - from ._elastic_net_regressor import ElasticNetRegressor from ._gradient_boosting_regressor import GradientBoostingRegressor from ._k_nearest_neighbors_regressor import KNearestNeighborsRegressor + from ._linear_regressor import LinearRegressor from ._random_forest_regressor import RandomForestRegressor from ._regressor import Regressor from ._support_vector_regressor import SupportVectorRegressor @@ -23,9 +23,9 @@ "ArimaModelRegressor": "._arima:ArimaModelRegressor", "BaselineRegressor": "._baseline_regressor:BaselineRegressor", "DecisionTreeRegressor": "._decision_tree_regressor:DecisionTreeRegressor", - "ElasticNetRegressor": "._elastic_net_regressor:ElasticNetRegressor", "GradientBoostingRegressor": "._gradient_boosting_regressor:GradientBoostingRegressor", "KNearestNeighborsRegressor": "._k_nearest_neighbors_regressor:KNearestNeighborsRegressor", + "LinearRegressor": "._linear_regressor:LinearRegressor", "RandomForestRegressor": "._random_forest_regressor:RandomForestRegressor", "Regressor": "._regressor:Regressor", "SupportVectorRegressor": "._support_vector_regressor:SupportVectorRegressor", @@ -37,9 +37,9 @@ "ArimaModelRegressor", "BaselineRegressor", "DecisionTreeRegressor", - "ElasticNetRegressor", "GradientBoostingRegressor", "KNearestNeighborsRegressor", + "LinearRegressor", "RandomForestRegressor", "Regressor", "SupportVectorRegressor", diff --git a/src/safeds/ml/classical/regression/_baseline_regressor.py b/src/safeds/ml/classical/regression/_baseline_regressor.py index 0ecf36b56..0b373de5c 100644 --- a/src/safeds/ml/classical/regression/_baseline_regressor.py +++ b/src/safeds/ml/classical/regression/_baseline_regressor.py @@ -13,8 +13,8 @@ from safeds.ml.classical.regression import ( AdaBoostRegressor, DecisionTreeRegressor, - ElasticNetRegressor, GradientBoostingRegressor, + LinearRegressor, RandomForestRegressor, Regressor, SupportVectorRegressor, @@ -44,15 +44,15 @@ def __init__(self, include_slower_models: bool = False): self._list_of_model_types = [ AdaBoostRegressor(), DecisionTreeRegressor(), - ElasticNetRegressor(alpha=0.0), - ElasticNetRegressor(lasso_ratio=0), + LinearRegressor(), + LinearRegressor(LinearRegressor.Penalty.ridge()), RandomForestRegressor(), SupportVectorRegressor(), ] if include_slower_models: self._list_of_model_types.extend( - [ElasticNetRegressor(), ElasticNetRegressor(lasso_ratio=1), GradientBoostingRegressor()], + [LinearRegressor(LinearRegressor.Penalty.elastic_net()), LinearRegressor(LinearRegressor.Penalty.lasso()), GradientBoostingRegressor()], ) # pragma: no cover self._fitted_models: list[Regressor] = [] diff --git a/src/safeds/ml/classical/regression/_elastic_net_regressor.py b/src/safeds/ml/classical/regression/_elastic_net_regressor.py deleted file mode 100644 index a08aab18a..000000000 --- a/src/safeds/ml/classical/regression/_elastic_net_regressor.py +++ /dev/null @@ -1,126 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound -from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError -from safeds.ml.hyperparameters import Choice - -from ._regressor import Regressor - -if TYPE_CHECKING: - from sklearn.base import RegressorMixin - - -class ElasticNetRegressor(Regressor): - """Elastic net regression. - - Parameters - ---------- - alpha: - Controls the regularization of the model. The higher the value, the more regularized it becomes. - If 0, a linear model is used. - lasso_ratio: - Number between 0 and 1 that controls the ratio between Lasso and Ridge regularization. If 0, only Ridge - regularization is used. If 1, only Lasso regularization is used. - - Raises - ------ - OutOfBoundsError - If `alpha` is negative or `lasso_ratio` is not between 0 and 1. - """ - - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, *, alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5) -> None: - super().__init__() - - # Validation - if isinstance(alpha, Choice): - for a in alpha: - _check_bounds("alpha", a, lower_bound=_ClosedBound(0)) - else: - _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) - - if isinstance(lasso_ratio, Choice): - for lr in lasso_ratio: - _check_bounds("lasso_ratio", lr, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) - else: - _check_bounds("lasso_ratio", lasso_ratio, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) - - # Hyperparameters - self._alpha = alpha - self._lasso_ratio = lasso_ratio - - def __hash__(self) -> int: - return _structural_hash( - super().__hash__(), - self._alpha, - self._lasso_ratio, - ) - - # ------------------------------------------------------------------------------------------------------------------ - # Properties - # ------------------------------------------------------------------------------------------------------------------ - - @property - def alpha(self) -> float | Choice[float]: - """The regularization of the model.""" - return self._alpha - - @property - def lasso_ratio(self) -> float | Choice[float]: - """Rhe ratio between Lasso and Ridge regularization.""" - return self._lasso_ratio - - # ------------------------------------------------------------------------------------------------------------------ - # Template methods - # ------------------------------------------------------------------------------------------------------------------ - - def _clone(self) -> ElasticNetRegressor: - return ElasticNetRegressor( - alpha=self._alpha, - lasso_ratio=self._lasso_ratio, - ) - - def _get_sklearn_model(self) -> RegressorMixin: - from sklearn.linear_model import ElasticNet as SklearnElasticNet - from sklearn.linear_model import Lasso as SklearnLasso - from sklearn.linear_model import LinearRegression as sk_LinearRegression - from sklearn.linear_model import Ridge as SklearnRidge - - # TODO Does Linear Regression have priority over other models? Should this always be a linear model if alpha is zero or does the lasso ratio still mater in that case? Might have do modify the order of model creation here. - if self._alpha == 0: # Linear Regression - return sk_LinearRegression(n_jobs=-1) - - if self._lasso_ratio == 0: # Ridge Regression - return SklearnRidge(alpha=self._alpha) - - if self._lasso_ratio == 1: # Lasso Regression - return SklearnLasso(alpha=self._alpha) - - return SklearnElasticNet( # Elastic Net Regression - alpha=self._alpha, - l1_ratio=self._lasso_ratio, - ) - - def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._alpha, Choice) or isinstance(self._lasso_ratio, Choice): - raise FittingWithChoiceError - - def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._alpha, Choice) and not isinstance(self._lasso_ratio, Choice): - raise FittingWithoutChoiceError - - def _get_models_for_all_choices(self) -> list[ElasticNetRegressor]: - alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] - lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] - - models = [] - for a in alpha_choices: - for lasso in lasso_choices: - models.append(ElasticNetRegressor(alpha=a, lasso_ratio=lasso)) - return models diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py new file mode 100644 index 000000000..12109ff74 --- /dev/null +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -0,0 +1,317 @@ +from __future__ import annotations + +import sys +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds._validation import _check_bounds, _ClosedBound +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError +from safeds.ml.hyperparameters import Choice + +from sklearn.linear_model import LinearRegression as SklearnLinear +from sklearn.linear_model import Ridge as SklearnRidge +from sklearn.linear_model import Lasso as SklearnLasso +from sklearn.linear_model import ElasticNet as SklearnElasticNet + +from ._regressor import Regressor + +if TYPE_CHECKING: + from sklearn.base import RegressorMixin + + +class LinearRegressor(Regressor): + """ + Linear regression. + + Parameters + ---------- + alpha: + Controls the regularization of the model. The higher the value, the more regularized it becomes. + If 0, a linear model is used. + lasso_ratio: + Number between 0 and 1 that controls the ratio between Lasso and Ridge regularization. If 0, only Ridge + regularization is used. If 1, only Lasso regularization is used. + + Raises + ------ + OutOfBoundsError + If `alpha` is negative or `lasso_ratio` is not between 0 and 1. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Inner classes + # ------------------------------------------------------------------------------------------------------------------ + + class Penalty(ABC): + """ + Possible penalties for the linear regressor. + + Use the static factory methods to create instances of this class. + """ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + @abstractmethod + def _get_sklearn_model(self) -> RegressorMixin: + """Get the model of a penalty.""" + + @staticmethod + def linear() -> LinearRegressor.Penalty: + """Create a linear penalty.""" + raise NotImplementedError # pragma: no cover + + @staticmethod + def ridge(alpha: float = 1.0) -> LinearRegressor.Penalty: + """Create a ridge penalty.""" + raise NotImplementedError # pragma: no cover + + @staticmethod + def lasso(alpha: float = 1.0) -> LinearRegressor.Penalty: + """Create a lasso penalty.""" + raise NotImplementedError # pragma: no cover + + @staticmethod + def elastic_net(alpha: float = 1.0, lasso_ratio: float = 0.5) -> LinearRegressor.Penalty: + """Create an elastic net penalty.""" + raise NotImplementedError # pragma: no cover + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: + Regressor.__init__(self) + self._penalty = penalty + + def __hash__(self) -> int: + return _structural_hash( + super().__hash__(), + self._penalty, + ) + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def penalty(self) -> LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None]: + """The regularization of the model.""" + return self._penalty + + def _clone(self) -> LinearRegressor: + return LinearRegressor(penalty=self._penalty) + + def _get_sklearn_model(self) -> RegressorMixin: + return self._penalty._get_sklearn_model() + + def _check_additional_fit_preconditions(self) -> None: + if isinstance(self._penalty, Choice): + raise FittingWithChoiceError + + def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: + if not isinstance(self._penalty, Choice): + raise FittingWithoutChoiceError + + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + penalty_choices = self._penalty if isinstance(self._penalty, Choice) else [self._penalty] + + models = [] + for pen in penalty_choices: + models.append(LinearRegressor(penalty=pen)) + return models + + +# ---------------------------------------------------------------------------------------------------------------------- +# Kernels +# ---------------------------------------------------------------------------------------------------------------------- + +class _Linear(LinearRegressor.Penalty): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _Linear): + return NotImplemented + return True + + def __hash__(self) -> int: + return _structural_hash(self.__class__.__qualname__,) + + def __str__(self) -> str: + return "Linear" + + # ------------------------------------------------------------------------------------------------------------------ + # Template methods + # ------------------------------------------------------------------------------------------------------------------ + + def _get_sklearn_model(self) -> SklearnLinear: + return SklearnLinear(n_jobs=-1) + + +class _Ridge(LinearRegressor.Penalty): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, alpha: float | Choice[float] = 1.0): + # Validation + if isinstance(alpha, Choice): + for a in alpha: + _check_bounds("alpha", a, lower_bound=_ClosedBound(0)) + else: + _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) + + self._alpha = alpha + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _Ridge): + return NotImplemented + return self._alpha == other._alpha + + def __hash__(self) -> int: + return _structural_hash( + self.__class__.__qualname__, + self._alpha, + ) + + def __sizeof__(self) -> int: + return sys.getsizeof(self._alpha) + + def __str__(self) -> str: + return f"Ridge(alpha={self._alpha})" + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def alpha(self) -> float: + """The regularization of the linear penalty.""" + return self._alpha + + # ------------------------------------------------------------------------------------------------------------------ + # Template methods + # ------------------------------------------------------------------------------------------------------------------ + + def _get_sklearn_model(self) -> SklearnRidge: + return SklearnRidge(alpha=self._alpha) + + +class _Lasso(LinearRegressor.Penalty): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self, alpha: float | Choice[float] = 1.0): + # Validation + if isinstance(alpha, Choice): + for a in alpha: + _check_bounds("alpha", a, lower_bound=_ClosedBound(0)) + else: + _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) + + self._alpha = alpha + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _Lasso): + return NotImplemented + return True + + def __hash__(self) -> int: + return _structural_hash( + self.__class__.__qualname__, + self._alpha, + ) + + def __str__(self) -> str: + return f"Lasso(alpha={self._alpha})" + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def alpha(self) -> float: + """The regularization of the linear penalty.""" + return self._alpha + + # ------------------------------------------------------------------------------------------------------------------ + # Template methods + # ------------------------------------------------------------------------------------------------------------------ + + def _get_sklearn_model(self) -> SklearnLasso: + return SklearnLasso(alpha=self._alpha) + + +class _ElasticNet(LinearRegressor.Penalty): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self, alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5): + # Validation + if isinstance(alpha, Choice): + for a in alpha: + _check_bounds("alpha", a, lower_bound=_ClosedBound(0)) + else: + _check_bounds("alpha", alpha, lower_bound=_ClosedBound(0)) + + if isinstance(lasso_ratio, Choice): + for lr in lasso_ratio: + _check_bounds("lasso_ratio", lr, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) + else: + _check_bounds("lasso_ratio", lasso_ratio, lower_bound=_ClosedBound(0), upper_bound=_ClosedBound(1)) + + self._alpha = alpha + self._lasso_ratio = lasso_ratio + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _Lasso): + return NotImplemented + return True + + def __hash__(self) -> int: + return _structural_hash( + self.__class__.__qualname__, + self._alpha, + self._lasso_ratio, + ) + + def __str__(self) -> str: + return f"ElasticNet(alpha={self._alpha}, lasso_ratio={self._lasso_ratio})" + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def alpha(self) -> float: + """The regularization of the linear penalty.""" + return self._alpha + + @property + def lasso_ratio(self) -> float: + """The regularization of the linear penalty.""" + return self._alpha + + # ------------------------------------------------------------------------------------------------------------------ + # Template methods + # ------------------------------------------------------------------------------------------------------------------ + + def _get_sklearn_model(self) -> SklearnElasticNet: + return SklearnElasticNet(alpha=self._alpha, l1_ratio=self._lasso_ratio) + + +# Override the methods with classes, so they can be used in `isinstance` calls. Unlike methods, classes define a type. +# This is needed for the DSL, where LinearRegressor penalties are variants of an enum. +LinearRegressor.Penalty.linear = _Linear # type: ignore[method-assign] +LinearRegressor.Penalty.ridge = _Ridge # type: ignore[method-assign] +LinearRegressor.Penalty.lasso = _Lasso # type: ignore[method-assign] +LinearRegressor.Penalty.elastic_net = _ElasticNet # type: ignore[method-assign] diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py deleted file mode 100644 index 754f60ebe..000000000 --- a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table -from safeds.exceptions import OutOfBoundsError -from safeds.ml.classical.regression import ElasticNetRegressor -from safeds.ml.hyperparameters import Choice - - -@pytest.fixture() -def training_set() -> TabularDataset: - table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) - return table.to_tabular_dataset(target_name="col1") - - -class TestAlpha: - def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: - fitted_model = ElasticNetRegressor(alpha=1).fit(training_set) - assert fitted_model.alpha == 1 - - def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: - fitted_model = ElasticNetRegressor(alpha=1).fit(training_set) - assert fitted_model._wrapped_model is not None - assert fitted_model._wrapped_model.alpha == 1 - - @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) - def test_should_raise_if_less_than_0(self, alpha: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - ElasticNetRegressor(alpha=alpha) - - -class TestLassoRatio: - def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: - fitted_model = ElasticNetRegressor(lasso_ratio=0.3).fit(training_set) - assert fitted_model.lasso_ratio == 0.3 - - def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: - fitted_model = ElasticNetRegressor(lasso_ratio=0.3).fit(training_set) - assert fitted_model._wrapped_model is not None - assert fitted_model._wrapped_model.l1_ratio == 0.3 - - @pytest.mark.parametrize( - "lasso_ratio", - [-0.5, 1.5, Choice(-0.5)], - ids=["minus_zero_point_5", "one_point_5", "invalid_choice"], - ) - def test_should_raise_if_not_between_0_and_1(self, lasso_ratio: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - ElasticNetRegressor(lasso_ratio=lasso_ratio) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py new file mode 100644 index 000000000..7c745a455 --- /dev/null +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -0,0 +1,64 @@ +import pytest +from safeds.data.labeled.containers import TabularDataset +from safeds.data.tabular.containers import Table +from safeds.exceptions import OutOfBoundsError +from safeds.ml.classical.regression._linear_regressor import LinearRegressor, _Linear +from safeds.ml.hyperparameters import Choice + + +def kernels() -> list[LinearRegressor.Penalty]: + """ + Return the list of penalties to test. + + After you implemented a new penalty, add it to this list to ensure its `__hash__` and `__eq__` method work as + expected. + + Returns + ------- + penalties: + The list of penalties to test. + """ + return [ + LinearRegressor.Penalty.linear(), + LinearRegressor.Penalty.ridge(), + LinearRegressor.Penalty.lasso(), + LinearRegressor.Penalty.elastic_net(), + ] + + +@pytest.fixture() +def training_set() -> TabularDataset: + table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + return table.to_tabular_dataset(target_name="col1") + + +class TestPenalty: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + penalty = LinearRegressor.Penalty.linear() + fitted_model = LinearRegressor(penalty=penalty).fit(training_set=training_set) + assert isinstance(fitted_model.penalty, _Linear) + assert fitted_model._wrapped_model is not None + + @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) + def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float) -> None: + penalty = LinearRegressor.Penalty.ridge(alpha=alpha) + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=penalty) + + @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) + def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float) -> None: + penalty = LinearRegressor.Penalty.lasso(alpha=alpha) + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=penalty) + + @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) + def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float) -> None: + penalty = LinearRegressor.Penalty.elastic_net(alpha=alpha) + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=penalty) + + @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5], ids=["minus_0_point_5", "one_point_five"]) + def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float) -> None: + penalty = LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio) + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=penalty) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 57e31124b..f2e808915 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -21,7 +21,7 @@ from safeds.ml.classical.regression import ( AdaBoostRegressor, DecisionTreeRegressor, - ElasticNetRegressor, + LinearRegressor, GradientBoostingRegressor, KNearestNeighborsRegressor, RandomForestRegressor, @@ -52,9 +52,9 @@ def regressors() -> list[Regressor]: return [ AdaBoostRegressor(), DecisionTreeRegressor(), - ElasticNetRegressor(), GradientBoostingRegressor(), KNearestNeighborsRegressor(2), + LinearRegressor(), RandomForestRegressor(), SupportVectorRegressor(), ] @@ -76,7 +76,7 @@ def regressors_with_choices() -> list[Regressor]: return [ AdaBoostRegressor(learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), - ElasticNetRegressor(alpha=Choice(0, 0.5, 1), lasso_ratio=Choice(0, 0.5, 1)), + LinearRegressor(penalty=Choice(LinearRegressor.Penalty.ridge(), LinearRegressor.Penalty.lasso())), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), From 9aec1b0ba0dfbe0eebcd964c797a279b0a645aca Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jun 2024 18:42:55 +0200 Subject: [PATCH 63/94] add choices for the penalty params of linear regressor --- .../classical/regression/_linear_regressor.py | 82 +++++++++++++++---- src/safeds/ml/hyperparameters/_choice.py | 4 +- .../regression/test_linear_regressor.py | 28 +++---- .../ml/classical/regression/test_regressor.py | 2 +- 4 files changed, 84 insertions(+), 32 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 12109ff74..1ddaf7af3 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -3,7 +3,6 @@ import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING - from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError @@ -63,23 +62,31 @@ def __str__(self) -> str: ... def _get_sklearn_model(self) -> RegressorMixin: """Get the model of a penalty.""" + @abstractmethod + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + """Get a list of all possible models, given the choices.""" + + @abstractmethod + def _contains_choice_parameters(self) -> bool: + """Return if any parameters of this penalty are choice instances""" + @staticmethod def linear() -> LinearRegressor.Penalty: """Create a linear penalty.""" raise NotImplementedError # pragma: no cover @staticmethod - def ridge(alpha: float = 1.0) -> LinearRegressor.Penalty: + def ridge(alpha: float | Choice[float] = 1.0) -> LinearRegressor.Penalty: """Create a ridge penalty.""" raise NotImplementedError # pragma: no cover @staticmethod - def lasso(alpha: float = 1.0) -> LinearRegressor.Penalty: + def lasso(alpha: float | Choice[float] = 1.0) -> LinearRegressor.Penalty: """Create a lasso penalty.""" raise NotImplementedError # pragma: no cover @staticmethod - def elastic_net(alpha: float = 1.0, lasso_ratio: float = 0.5) -> LinearRegressor.Penalty: + def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5) -> LinearRegressor.Penalty: """Create an elastic net penalty.""" raise NotImplementedError # pragma: no cover @@ -89,7 +96,10 @@ def elastic_net(alpha: float = 1.0, lasso_ratio: float = 0.5) -> LinearRegressor def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: Regressor.__init__(self) - self._penalty = penalty + if penalty is None: + self._penalty = LinearRegressor.Penalty.linear() + else: + self._penalty = penalty def __hash__(self) -> int: return _structural_hash( @@ -102,7 +112,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def penalty(self) -> LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None]: + def penalty(self) -> LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None]: """The regularization of the model.""" return self._penalty @@ -113,11 +123,11 @@ def _get_sklearn_model(self) -> RegressorMixin: return self._penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice): + if isinstance(self._penalty, Choice) or self._penalty._contains_choice_parameters(): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice): + if not isinstance(self._penalty, Choice) and not self._penalty._contains_choice_parameters(): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: @@ -125,7 +135,12 @@ def _get_models_for_all_choices(self) -> list[LinearRegressor]: models = [] for pen in penalty_choices: - models.append(LinearRegressor(penalty=pen)) + if pen is None: + models.append(LinearRegressor()) + elif pen._contains_choice_parameters(): + models.extend(pen._get_models_for_all_choices()) + else: + models.append(LinearRegressor(penalty=pen)) return models @@ -149,6 +164,9 @@ def __hash__(self) -> int: def __str__(self) -> str: return "Linear" + def _contains_choice_parameters(self) -> bool: + return False + # ------------------------------------------------------------------------------------------------------------------ # Template methods # ------------------------------------------------------------------------------------------------------------------ @@ -156,6 +174,9 @@ def __str__(self) -> str: def _get_sklearn_model(self) -> SklearnLinear: return SklearnLinear(n_jobs=-1) + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + raise NotImplementedError # pragma: no cover + class _Ridge(LinearRegressor.Penalty): # ------------------------------------------------------------------------------------------------------------------ @@ -189,12 +210,15 @@ def __sizeof__(self) -> int: def __str__(self) -> str: return f"Ridge(alpha={self._alpha})" + def _contains_choice_parameters(self) -> bool: + return isinstance(self._alpha, Choice) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @property - def alpha(self) -> float: + def alpha(self) -> float | Choice[float]: """The regularization of the linear penalty.""" return self._alpha @@ -205,6 +229,13 @@ def alpha(self) -> float: def _get_sklearn_model(self) -> SklearnRidge: return SklearnRidge(alpha=self._alpha) + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + assert isinstance(self._alpha, Choice) + models = [] + for alpha in self._alpha: + models.append(LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=alpha))) + return models + class _Lasso(LinearRegressor.Penalty): # ------------------------------------------------------------------------------------------------------------------ @@ -234,12 +265,15 @@ def __hash__(self) -> int: def __str__(self) -> str: return f"Lasso(alpha={self._alpha})" + def _contains_choice_parameters(self) -> bool: + return isinstance(self._alpha, Choice) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @property - def alpha(self) -> float: + def alpha(self) -> float | Choice[float]: """The regularization of the linear penalty.""" return self._alpha @@ -250,6 +284,13 @@ def alpha(self) -> float: def _get_sklearn_model(self) -> SklearnLasso: return SklearnLasso(alpha=self._alpha) + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + assert isinstance(self._alpha, Choice) + models = [] + for alpha in self._alpha: + models.append(LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=alpha))) + return models + class _ElasticNet(LinearRegressor.Penalty): # ------------------------------------------------------------------------------------------------------------------ @@ -287,19 +328,22 @@ def __hash__(self) -> int: def __str__(self) -> str: return f"ElasticNet(alpha={self._alpha}, lasso_ratio={self._lasso_ratio})" + def _contains_choice_parameters(self) -> bool: + return isinstance(self._alpha, Choice) or isinstance(self._lasso_ratio, Choice) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @property - def alpha(self) -> float: + def alpha(self) -> float | Choice[float]: """The regularization of the linear penalty.""" return self._alpha @property - def lasso_ratio(self) -> float: + def lasso_ratio(self) -> float | Choice[float]: """The regularization of the linear penalty.""" - return self._alpha + return self._lasso_ratio # ------------------------------------------------------------------------------------------------------------------ # Template methods @@ -308,6 +352,16 @@ def lasso_ratio(self) -> float: def _get_sklearn_model(self) -> SklearnElasticNet: return SklearnElasticNet(alpha=self._alpha, l1_ratio=self._lasso_ratio) + def _get_models_for_all_choices(self) -> list[LinearRegressor]: + alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] + lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] + + models = [] + for alpha in self._alpha: + for lasso in lasso_choices: + models.append(LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso))) + return models + # Override the methods with classes, so they can be used in `isinstance` calls. Unlike methods, classes define a type. # This is needed for the DSL, where LinearRegressor penalties are variants of an enum. diff --git a/src/safeds/ml/hyperparameters/_choice.py b/src/safeds/ml/hyperparameters/_choice.py index 6d0f59db2..1047d4846 100644 --- a/src/safeds/ml/hyperparameters/_choice.py +++ b/src/safeds/ml/hyperparameters/_choice.py @@ -17,12 +17,14 @@ def __init__(self, *args: T) -> None: """ Create a new choice. + Duplicate values will be removed. + Parameters ---------- *args: The values to choose from. """ - self.elements = list(args) + self.elements = list(dict.fromkeys(args)) def __contains__(self, value: Any) -> bool: """ diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index 7c745a455..e62e3fc33 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -39,26 +39,22 @@ def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> assert isinstance(fitted_model.penalty, _Linear) assert fitted_model._wrapped_model is not None - @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) - def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float) -> None: - penalty = LinearRegressor.Penalty.ridge(alpha=alpha) + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=penalty) + LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=alpha)) - @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) - def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float) -> None: - penalty = LinearRegressor.Penalty.lasso(alpha=alpha) + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=penalty) + LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=alpha)) - @pytest.mark.parametrize("alpha", [-0.5, 2], ids=["minus_0_point_5", "two"]) - def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float) -> None: - penalty = LinearRegressor.Penalty.elastic_net(alpha=alpha) + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=penalty) + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha)) - @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5], ids=["minus_0_point_5", "one_point_five"]) - def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float) -> None: - penalty = LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio) + @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_0_point_5", "one_point_five", "invalid_choice"]) + def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=penalty) + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index f2e808915..92bc9fb39 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -76,9 +76,9 @@ def regressors_with_choices() -> list[Regressor]: return [ AdaBoostRegressor(learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), - LinearRegressor(penalty=Choice(LinearRegressor.Penalty.ridge(), LinearRegressor.Penalty.lasso())), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), + LinearRegressor(penalty=Choice(None, LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)))), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorRegressor(kernel=Choice(None, SupportVectorRegressor.Kernel.linear()), c=Choice(0.5, 1.0)), ] From fda0af091d772ff4614bb0135428150e7034a4a2 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jun 2024 18:54:14 +0200 Subject: [PATCH 64/94] linter fix --- .../classification/_ada_boost_classifier.py | 2 +- .../classical/regression/_ada_boost_regressor.py | 2 +- .../ml/classical/regression/_linear_regressor.py | 16 ++++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 5248f8aae..3dddf7c18 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Classifier | None | Choice[Classifier | None]: + def learner(self) -> Classifier | None | Choice[Classifier | None]: # type: ignore[override] """The base learner used for training the ensemble.""" return self._learner diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index a88ab4beb..fc0bcc8c2 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Regressor | None | Choice[Regressor | None]: #pragma: no cover + def learner(self) -> Regressor | None | Choice[Regressor | None]: # type: ignore[override] """The base learner used for training the ensemble.""" return self._learner diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 1ddaf7af3..e89212ed4 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -68,7 +68,7 @@ def _get_models_for_all_choices(self) -> list[LinearRegressor]: @abstractmethod def _contains_choice_parameters(self) -> bool: - """Return if any parameters of this penalty are choice instances""" + """Return if any parameters of this penalty are choice instances.""" @staticmethod def linear() -> LinearRegressor.Penalty: @@ -96,10 +96,10 @@ def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[ def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: Regressor.__init__(self) + self._penalty = penalty if penalty is None: self._penalty = LinearRegressor.Penalty.linear() - else: - self._penalty = penalty + def __hash__(self) -> int: return _structural_hash( @@ -353,11 +353,15 @@ def _get_sklearn_model(self) -> SklearnElasticNet: return SklearnElasticNet(alpha=self._alpha, l1_ratio=self._lasso_ratio) def _get_models_for_all_choices(self) -> list[LinearRegressor]: - alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] - lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] + alpha_choices = ( + self._alpha if isinstance(self._alpha, Choice) else [self._alpha] + ) + lasso_choices = ( + self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] + ) models = [] - for alpha in self._alpha: + for alpha in alpha_choices: for lasso in lasso_choices: models.append(LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso))) return models From 68810d419ef35d7be7a0275dfa6a0566e7ec49bc Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 28 Jun 2024 19:03:48 +0200 Subject: [PATCH 65/94] linter fix --- .../ml/classical/regression/_linear_regressor.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index e89212ed4..ecf1bb75f 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -96,9 +96,10 @@ def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[ def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: Regressor.__init__(self) - self._penalty = penalty if penalty is None: - self._penalty = LinearRegressor.Penalty.linear() + self._penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = LinearRegressor.Penalty.linear() + else: + self._penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = penalty def __hash__(self) -> int: @@ -120,14 +121,15 @@ def _clone(self) -> LinearRegressor: return LinearRegressor(penalty=self._penalty) def _get_sklearn_model(self) -> RegressorMixin: - return self._penalty._get_sklearn_model() + assert not isinstance(self.penalty, Choice) + return self.penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice) or self._penalty._contains_choice_parameters(): + if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice) and not self._penalty._contains_choice_parameters(): + if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: From 8e088d74d5fff77027dc0e6261aaa8a49ebfd7b7 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jun 2024 12:24:31 +0200 Subject: [PATCH 66/94] linter fix --- .../ml/classical/regression/_linear_regressor.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index ecf1bb75f..434fced83 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -97,9 +97,8 @@ def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[ def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: Regressor.__init__(self) if penalty is None: - self._penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = LinearRegressor.Penalty.linear() - else: - self._penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = penalty + penalty: LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None] = LinearRegressor.Penalty.linear() + self._penalty: LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None] = penalty def __hash__(self) -> int: @@ -125,12 +124,15 @@ def _get_sklearn_model(self) -> RegressorMixin: return self.penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): + if isinstance(self._penalty, Choice): raise FittingWithChoiceError + elif self.penalty._contains_choice_parameters(): + raise FittingWithoutChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): - raise FittingWithoutChoiceError + if not isinstance(self._penalty, Choice): + if not self.penalty._contains_choice_parameters(): + raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: penalty_choices = self._penalty if isinstance(self._penalty, Choice) else [self._penalty] From 425ceb0ef9f2def30d02a8b3c8734b3f1c27ab3d Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jun 2024 12:34:38 +0200 Subject: [PATCH 67/94] linter fix --- .../classical/regression/_linear_regressor.py | 37 +++++++------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 434fced83..e25580014 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -25,17 +25,8 @@ class LinearRegressor(Regressor): Parameters ---------- - alpha: - Controls the regularization of the model. The higher the value, the more regularized it becomes. - If 0, a linear model is used. - lasso_ratio: - Number between 0 and 1 that controls the ratio between Lasso and Ridge regularization. If 0, only Ridge - regularization is used. If 1, only Lasso regularization is used. - - Raises - ------ - OutOfBoundsError - If `alpha` is negative or `lasso_ratio` is not between 0 and 1. + penalty: + The type of penalty to be used. Defaults to a simple linear regression. """ # ------------------------------------------------------------------------------------------------------------------ @@ -86,7 +77,8 @@ def lasso(alpha: float | Choice[float] = 1.0) -> LinearRegressor.Penalty: raise NotImplementedError # pragma: no cover @staticmethod - def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5) -> LinearRegressor.Penalty: + def elastic_net(alpha: float | Choice[float] = 1.0, + lasso_ratio: float | Choice[float] = 0.5) -> LinearRegressor.Penalty: """Create an elastic net penalty.""" raise NotImplementedError # pragma: no cover @@ -97,9 +89,10 @@ def elastic_net(alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[ def __init__(self, penalty: LinearRegressor.Penalty | None | Choice[LinearRegressor.Penalty | None] = None) -> None: Regressor.__init__(self) if penalty is None: - penalty: LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None] = LinearRegressor.Penalty.linear() - self._penalty: LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None] = penalty + penalty = LinearRegressor.Penalty.linear() + # Hyperparameters + self._penalty: LinearRegressor.Penalty | Choice[LinearRegressor.Penalty | None] = penalty def __hash__(self) -> int: return _structural_hash( @@ -124,15 +117,12 @@ def _get_sklearn_model(self) -> RegressorMixin: return self.penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice): + if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): # type: ignore[assignment] raise FittingWithChoiceError - elif self.penalty._contains_choice_parameters(): - raise FittingWithoutChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice): - if not self.penalty._contains_choice_parameters(): - raise FittingWithoutChoiceError + if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): # type: ignore[assignment] + raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: penalty_choices = self._penalty if isinstance(self._penalty, Choice) else [self._penalty] @@ -163,7 +153,7 @@ def __eq__(self, other: object) -> bool: return True def __hash__(self) -> int: - return _structural_hash(self.__class__.__qualname__,) + return _structural_hash(self.__class__.__qualname__, ) def __str__(self) -> str: return "Linear" @@ -179,7 +169,7 @@ def _get_sklearn_model(self) -> SklearnLinear: return SklearnLinear(n_jobs=-1) def _get_models_for_all_choices(self) -> list[LinearRegressor]: - raise NotImplementedError # pragma: no cover + raise NotImplementedError # pragma: no cover class _Ridge(LinearRegressor.Penalty): @@ -367,7 +357,8 @@ def _get_models_for_all_choices(self) -> list[LinearRegressor]: models = [] for alpha in alpha_choices: for lasso in lasso_choices: - models.append(LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso))) + models.append( + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso))) return models From 5cbc5b45e2b81df86c58689f49862655c8de604f Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jun 2024 12:37:46 +0200 Subject: [PATCH 68/94] linter fix --- src/safeds/ml/classical/regression/_linear_regressor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index e25580014..dd63ed1a6 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -117,11 +117,11 @@ def _get_sklearn_model(self) -> RegressorMixin: return self.penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): # type: ignore[assignment] + if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): # type: ignore[union-attr] raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): # type: ignore[assignment] + if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): # type: ignore[union-attr] raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: From 9b708e5635ff606f95248664eb4cfd898ec1f29d Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sat, 29 Jun 2024 10:39:24 +0000 Subject: [PATCH 69/94] style: apply automated linter fixes --- .../classification/_ada_boost_classifier.py | 20 +++++++---- .../_support_vector_classifier.py | 6 ++-- .../regression/_ada_boost_regressor.py | 18 ++++++---- .../regression/_baseline_regressor.py | 6 +++- .../classical/regression/_linear_regressor.py | 36 ++++++++++--------- .../regression/_support_vector_regressor.py | 6 ++-- .../classification/test_classifier.py | 6 ++-- .../regression/test_linear_regressor.py | 4 ++- .../ml/classical/regression/test_regressor.py | 13 ++++--- 9 files changed, 71 insertions(+), 44 deletions(-) diff --git a/src/safeds/ml/classical/classification/_ada_boost_classifier.py b/src/safeds/ml/classical/classification/_ada_boost_classifier.py index 3dddf7c18..3dfb32b60 100644 --- a/src/safeds/ml/classical/classification/_ada_boost_classifier.py +++ b/src/safeds/ml/classical/classification/_ada_boost_classifier.py @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Classifier | None | Choice[Classifier | None]: # type: ignore[override] + def learner(self) -> Classifier | None | Choice[Classifier | None]: # type: ignore[override] """The base learner used for training the ensemble.""" return self._learner @@ -92,21 +92,27 @@ def _get_sklearn_model(self) -> ClassifierMixin: estimator=learner, n_estimators=self._max_learner_count, learning_rate=self._learning_rate, - algorithm="SAMME" # Will be the default in sklearn 1.6, remove this line then + algorithm="SAMME", # Will be the default in sklearn 1.6, remove this line then ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice) or isinstance(self._learner, Choice): + if ( + isinstance(self._max_learner_count, Choice) + or isinstance(self._learning_rate, Choice) + or isinstance(self._learner, Choice) + ): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice) and not isinstance(self._learner, Choice): + if ( + not isinstance(self._max_learner_count, Choice) + and not isinstance(self._learning_rate, Choice) + and not isinstance(self._learner, Choice) + ): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]: - learner_choices = ( - self._learner if isinstance(self._learner, Choice) else [self._learner] - ) + learner_choices = self._learner if isinstance(self._learner, Choice) else [self._learner] max_learner_count_choices = ( self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] ) diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 6d0496b87..03895ce87 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -92,9 +92,9 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorClassifier]: - #assert isinstance(self._c, Choice) # this is always true and just here for linting - c_choices = (self._c if isinstance(self._c, Choice) else [self._c]) - kernel_choices = (self.kernel if isinstance(self.kernel, Choice) else [self.kernel]) + # assert isinstance(self._c, Choice) # this is always true and just here for linting + c_choices = self._c if isinstance(self._c, Choice) else [self._c] + kernel_choices = self.kernel if isinstance(self.kernel, Choice) else [self.kernel] models = [] for c in c_choices: diff --git a/src/safeds/ml/classical/regression/_ada_boost_regressor.py b/src/safeds/ml/classical/regression/_ada_boost_regressor.py index fc0bcc8c2..ef09a7b08 100644 --- a/src/safeds/ml/classical/regression/_ada_boost_regressor.py +++ b/src/safeds/ml/classical/regression/_ada_boost_regressor.py @@ -68,7 +68,7 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - def learner(self) -> Regressor | None | Choice[Regressor | None]: # type: ignore[override] + def learner(self) -> Regressor | None | Choice[Regressor | None]: # type: ignore[override] """The base learner used for training the ensemble.""" return self._learner @@ -95,17 +95,23 @@ def _get_sklearn_model(self) -> RegressorMixin: ) def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._max_learner_count, Choice) or isinstance(self._learning_rate, Choice) or isinstance(self._learner, Choice): + if ( + isinstance(self._max_learner_count, Choice) + or isinstance(self._learning_rate, Choice) + or isinstance(self._learner, Choice) + ): raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._max_learner_count, Choice) and not isinstance(self._learning_rate, Choice) and not isinstance(self._learner, Choice): + if ( + not isinstance(self._max_learner_count, Choice) + and not isinstance(self._learning_rate, Choice) + and not isinstance(self._learner, Choice) + ): raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[AdaBoostRegressor]: - learner_choices = ( - self._learner if isinstance(self._learner, Choice) else [self._learner] - ) + learner_choices = self._learner if isinstance(self._learner, Choice) else [self._learner] max_learner_count_choices = ( self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count] ) diff --git a/src/safeds/ml/classical/regression/_baseline_regressor.py b/src/safeds/ml/classical/regression/_baseline_regressor.py index 0b373de5c..83060397a 100644 --- a/src/safeds/ml/classical/regression/_baseline_regressor.py +++ b/src/safeds/ml/classical/regression/_baseline_regressor.py @@ -52,7 +52,11 @@ def __init__(self, include_slower_models: bool = False): if include_slower_models: self._list_of_model_types.extend( - [LinearRegressor(LinearRegressor.Penalty.elastic_net()), LinearRegressor(LinearRegressor.Penalty.lasso()), GradientBoostingRegressor()], + [ + LinearRegressor(LinearRegressor.Penalty.elastic_net()), + LinearRegressor(LinearRegressor.Penalty.lasso()), + GradientBoostingRegressor(), + ], ) # pragma: no cover self._fitted_models: list[Regressor] = [] diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index dd63ed1a6..ea5aad9fd 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -3,16 +3,17 @@ import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING + +from sklearn.linear_model import ElasticNet as SklearnElasticNet +from sklearn.linear_model import Lasso as SklearnLasso +from sklearn.linear_model import LinearRegression as SklearnLinear +from sklearn.linear_model import Ridge as SklearnRidge + from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.ml.hyperparameters import Choice -from sklearn.linear_model import LinearRegression as SklearnLinear -from sklearn.linear_model import Ridge as SklearnRidge -from sklearn.linear_model import Lasso as SklearnLasso -from sklearn.linear_model import ElasticNet as SklearnElasticNet - from ._regressor import Regressor if TYPE_CHECKING: @@ -77,8 +78,9 @@ def lasso(alpha: float | Choice[float] = 1.0) -> LinearRegressor.Penalty: raise NotImplementedError # pragma: no cover @staticmethod - def elastic_net(alpha: float | Choice[float] = 1.0, - lasso_ratio: float | Choice[float] = 0.5) -> LinearRegressor.Penalty: + def elastic_net( + alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5, + ) -> LinearRegressor.Penalty: """Create an elastic net penalty.""" raise NotImplementedError # pragma: no cover @@ -117,11 +119,11 @@ def _get_sklearn_model(self) -> RegressorMixin: return self.penalty._get_sklearn_model() def _check_additional_fit_preconditions(self) -> None: - if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): # type: ignore[union-attr] + if isinstance(self._penalty, Choice) or self.penalty._contains_choice_parameters(): # type: ignore[union-attr] raise FittingWithChoiceError def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: - if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): # type: ignore[union-attr] + if not isinstance(self._penalty, Choice) and not self.penalty._contains_choice_parameters(): # type: ignore[union-attr] raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[LinearRegressor]: @@ -142,6 +144,7 @@ def _get_models_for_all_choices(self) -> list[LinearRegressor]: # Kernels # ---------------------------------------------------------------------------------------------------------------------- + class _Linear(LinearRegressor.Penalty): # ------------------------------------------------------------------------------------------------------------------ # Dunder methods @@ -153,7 +156,9 @@ def __eq__(self, other: object) -> bool: return True def __hash__(self) -> int: - return _structural_hash(self.__class__.__qualname__, ) + return _structural_hash( + self.__class__.__qualname__, + ) def __str__(self) -> str: return "Linear" @@ -347,18 +352,15 @@ def _get_sklearn_model(self) -> SklearnElasticNet: return SklearnElasticNet(alpha=self._alpha, l1_ratio=self._lasso_ratio) def _get_models_for_all_choices(self) -> list[LinearRegressor]: - alpha_choices = ( - self._alpha if isinstance(self._alpha, Choice) else [self._alpha] - ) - lasso_choices = ( - self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] - ) + alpha_choices = self._alpha if isinstance(self._alpha, Choice) else [self._alpha] + lasso_choices = self._lasso_ratio if isinstance(self._lasso_ratio, Choice) else [self._lasso_ratio] models = [] for alpha in alpha_choices: for lasso in lasso_choices: models.append( - LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso))) + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso)), + ) return models diff --git a/src/safeds/ml/classical/regression/_support_vector_regressor.py b/src/safeds/ml/classical/regression/_support_vector_regressor.py index e195176d5..24ab4196b 100644 --- a/src/safeds/ml/classical/regression/_support_vector_regressor.py +++ b/src/safeds/ml/classical/regression/_support_vector_regressor.py @@ -91,9 +91,9 @@ def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None: raise FittingWithoutChoiceError def _get_models_for_all_choices(self) -> list[SupportVectorRegressor]: - #assert isinstance(self._c, Choice) # this is always true and just here for linting - c_choices = (self._c if isinstance(self._c, Choice) else [self._c]) - kernel_choices = (self.kernel if isinstance(self.kernel, Choice) else [self.kernel]) + # assert isinstance(self._c, Choice) # this is always true and just here for linting + c_choices = self._c if isinstance(self._c, Choice) else [self._c] + kernel_choices = self.kernel if isinstance(self.kernel, Choice) else [self.kernel] models = [] for c in c_choices: diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 4497256f1..778c60c95 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -71,7 +71,9 @@ def classifiers_with_choices() -> list[Classifier]: The list of classifiers to test. """ return [ - AdaBoostClassifier(learner=Choice(AdaBoostClassifier(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + AdaBoostClassifier( + learner=Choice(AdaBoostClassifier(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2), + ), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsClassifier(neighbor_count=Choice(1, 2)), @@ -80,7 +82,7 @@ def classifiers_with_choices() -> list[Classifier]: max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2), ), - SupportVectorClassifier(kernel=Choice(None, SupportVectorClassifier.Kernel.linear()) ,c=Choice(0.5, 1.0)), + SupportVectorClassifier(kernel=Choice(None, SupportVectorClassifier.Kernel.linear()), c=Choice(0.5, 1.0)), ] diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index e62e3fc33..e8f9dad2b 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -54,7 +54,9 @@ def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float | Ch with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha)) - @pytest.mark.parametrize("lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_0_point_5", "one_point_five", "invalid_choice"]) + @pytest.mark.parametrize( + "lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_0_point_5", "one_point_five", "invalid_choice"], + ) def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 62c4a7083..1a7125149 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -21,9 +21,9 @@ from safeds.ml.classical.regression import ( AdaBoostRegressor, DecisionTreeRegressor, - LinearRegressor, GradientBoostingRegressor, KNearestNeighborsRegressor, + LinearRegressor, RandomForestRegressor, Regressor, SupportVectorRegressor, @@ -60,7 +60,6 @@ def regressors() -> list[Regressor]: ] - def regressors_with_choices() -> list[Regressor]: """ Return the list of regressors with Choices as Parameters to test choice functionality. @@ -74,11 +73,17 @@ def regressors_with_choices() -> list[Regressor]: The list of regressors to test. """ return [ - AdaBoostRegressor(learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), + AdaBoostRegressor( + learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2), + ), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), - LinearRegressor(penalty=Choice(None, LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)))), + LinearRegressor( + penalty=Choice( + None, LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), + ), + ), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorRegressor(kernel=Choice(None, SupportVectorRegressor.Kernel.linear()), c=Choice(0.5, 1.0)), ] From 54be12e49fb67fbc0590de6187413bef46e73258 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sat, 29 Jun 2024 10:40:54 +0000 Subject: [PATCH 70/94] style: apply automated linter fixes --- src/safeds/ml/classical/regression/_linear_regressor.py | 3 ++- .../safeds/ml/classical/classification/test_classifier.py | 4 +++- .../ml/classical/regression/test_linear_regressor.py | 4 +++- tests/safeds/ml/classical/regression/test_regressor.py | 7 +++++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index ea5aad9fd..47054a786 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -79,7 +79,8 @@ def lasso(alpha: float | Choice[float] = 1.0) -> LinearRegressor.Penalty: @staticmethod def elastic_net( - alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choice[float] = 0.5, + alpha: float | Choice[float] = 1.0, + lasso_ratio: float | Choice[float] = 0.5, ) -> LinearRegressor.Penalty: """Create an elastic net penalty.""" raise NotImplementedError # pragma: no cover diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 778c60c95..6f7055a9a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -72,7 +72,9 @@ def classifiers_with_choices() -> list[Classifier]: """ return [ AdaBoostClassifier( - learner=Choice(AdaBoostClassifier(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2), + learner=Choice(AdaBoostClassifier(), None), + max_learner_count=Choice(1, 2), + learning_rate=Choice(0.1, 0.2), ), DecisionTreeClassifier(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingClassifier(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index e8f9dad2b..5da2d14bc 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -55,7 +55,9 @@ def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float | Ch LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha)) @pytest.mark.parametrize( - "lasso_ratio", [-0.5, 1.5, Choice(-0.5)], ids=["minus_0_point_5", "one_point_five", "invalid_choice"], + "lasso_ratio", + [-0.5, 1.5, Choice(-0.5)], + ids=["minus_0_point_5", "one_point_five", "invalid_choice"], ) def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 1a7125149..0a76d15c7 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -74,14 +74,17 @@ def regressors_with_choices() -> list[Regressor]: """ return [ AdaBoostRegressor( - learner=Choice(AdaBoostRegressor(), None), max_learner_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2), + learner=Choice(AdaBoostRegressor(), None), + max_learner_count=Choice(1, 2), + learning_rate=Choice(0.1, 0.2), ), DecisionTreeRegressor(max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), GradientBoostingRegressor(tree_count=Choice(1, 2), learning_rate=Choice(0.1, 0.2)), KNearestNeighborsRegressor(neighbor_count=Choice(1, 2)), LinearRegressor( penalty=Choice( - None, LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), + None, + LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), ), ), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), From 254edf94944101851063856f36cb1c596383d084 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 29 Jun 2024 13:29:45 +0200 Subject: [PATCH 71/94] linter fix --- src/safeds/ml/classical/classification/_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index da619a175..58dd2ee79 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -283,7 +283,8 @@ def fit_by_exhaustive_search( for future in done: list_of_fitted_models.append(future.result()) executor.shutdown() - + if __name__ == "__main__": # For pytest + executor.shutdown(True) # For pytest best_model = None best_metric_value = None for fitted_model in list_of_fitted_models: @@ -315,7 +316,6 @@ def fit_by_exhaustive_search( assert best_model is not None return best_model - def _extract_table(table_or_dataset: Table | TabularDataset) -> Table: """Extract the table from the given table or dataset.""" if isinstance(table_or_dataset, TabularDataset): From 51da4c1f31e00257753f2831faa362175d41d1db Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 30 Jun 2024 00:23:36 +0200 Subject: [PATCH 72/94] linter fix --- src/safeds/ml/classical/classification/_classifier.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 58dd2ee79..bb8850f52 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -283,8 +283,6 @@ def fit_by_exhaustive_search( for future in done: list_of_fitted_models.append(future.result()) executor.shutdown() - if __name__ == "__main__": # For pytest - executor.shutdown(True) # For pytest best_model = None best_metric_value = None for fitted_model in list_of_fitted_models: From e4e4f22bbf1b17f5085b4cb13550ce0d72606f15 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 30 Jun 2024 10:41:52 +0000 Subject: [PATCH 73/94] style: apply automated linter fixes --- src/safeds/ml/classical/classification/_classifier.py | 1 + src/safeds/ml/classical/classification/_logistic_classifier.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index bb8850f52..0287c9fd6 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -314,6 +314,7 @@ def fit_by_exhaustive_search( assert best_model is not None return best_model + def _extract_table(table_or_dataset: Table | TabularDataset) -> Table: """Extract the table from the given table or dataset.""" if isinstance(table_or_dataset, TabularDataset): diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 115af807a..d6aa48f56 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash -from safeds.exceptions import FittingWithoutChoiceError from safeds._validation import _check_bounds, _OpenBound +from safeds.exceptions import FittingWithoutChoiceError from ._classifier import Classifier From 3b74c2127a0cb671757994177748e44767a00171 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 3 Jul 2024 01:44:57 +0200 Subject: [PATCH 74/94] add missing variable update --- .../classical/classification/_classifier.py | 18 ++++++++++++------ .../ml/classical/regression/_regressor.py | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index bb8850f52..eabf41009 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -271,8 +271,6 @@ def fit_by_exhaustive_search( ) list_of_models = self._get_models_for_all_choices() - if len(list_of_models) < 1: - raise LearningError("Please provide at least one Value in a Choice Parameter") list_of_fitted_models = [] with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: @@ -300,17 +298,25 @@ def fit_by_exhaustive_search( else: match optimization_metric.value: case "accuracy": - if fitted_model.accuracy(test_data) > best_metric_value: + accuracy_of_fitted_model = fitted_model.accuracy(test_data) + if accuracy_of_fitted_model > best_metric_value: best_model = fitted_model + best_metric_value = accuracy_of_fitted_model case "precision": - if fitted_model.precision(test_data, positive_class) > best_metric_value: + precision_of_fitted_model = fitted_model.precision(test_data, positive_class) + if precision_of_fitted_model > best_metric_value: best_model = fitted_model + best_metric_value = precision_of_fitted_model case "recall": - if fitted_model.recall(test_data, positive_class) > best_metric_value: + recall_of_fitted_model = fitted_model.recall(test_data, positive_class) + if recall_of_fitted_model > best_metric_value: best_model = fitted_model + best_metric_value = recall_of_fitted_model case "f1score": - if fitted_model.f1_score(test_data, positive_class) > best_metric_value: + f1score_of_fitted_model = fitted_model.f1score(test_data, positive_class) + if f1score_of_fitted_model > best_metric_value: best_model = fitted_model + best_metric_value = f1score_of_fitted_model assert best_model is not None return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 5b01d57bb..cdc205e47 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -9,7 +9,6 @@ from safeds.exceptions import ( ColumnLengthMismatchError, DatasetMissesDataError, - LearningError, ModelNotFittedError, PlainTableError, ) @@ -299,8 +298,6 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me ) list_of_models = self._get_models_for_all_choices() - if len(list_of_models) < 1: - raise LearningError("Please provide at least one Value in a Choice Parameter") list_of_fitted_models = [] with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: @@ -329,17 +326,25 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me else: match optimization_metric.value: case "mean_squared_error": - if fitted_model.mean_squared_error(test_data) < best_metric_value: + error_of_fitted_model = fitted_model.mean_squared_error(test_data) + if error_of_fitted_model < best_metric_value: best_model = fitted_model + best_metric_value = error_of_fitted_model case "mean_absolute_error": - if fitted_model.mean_absolute_error(test_data) < best_metric_value: + error_of_fitted_model = fitted_model.mean_absolute_error(test_data) + if error_of_fitted_model < best_metric_value: best_model = fitted_model + best_metric_value = error_of_fitted_model case "median_absolute_deviation": - if fitted_model.median_absolute_deviation(test_data) < best_metric_value: + error_of_fitted_model = fitted_model.median_absolute_deviation(test_data) + if error_of_fitted_model < best_metric_value: best_model = fitted_model + best_metric_value = error_of_fitted_model case "coefficient_of_determination": - if fitted_model.coefficient_of_determination(test_data) > best_metric_value: + error_of_fitted_model = fitted_model.coefficient_of_determination(test_data) + if error_of_fitted_model > best_metric_value: best_model = fitted_model + best_metric_value = error_of_fitted_model assert best_model is not None return best_model From e98b1c46d35a1dcaa4f5ae236aaeff7d7631832f Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 14:52:29 +0200 Subject: [PATCH 75/94] change choice to remove duplicates and throw error when no args are provided --- src/safeds/ml/hyperparameters/_choice.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/safeds/ml/hyperparameters/_choice.py b/src/safeds/ml/hyperparameters/_choice.py index 1047d4846..7a847a53c 100644 --- a/src/safeds/ml/hyperparameters/_choice.py +++ b/src/safeds/ml/hyperparameters/_choice.py @@ -3,6 +3,8 @@ from collections.abc import Collection from typing import TYPE_CHECKING, TypeVar +from safeds.exceptions._ml import EmptyChoiceError + if TYPE_CHECKING: from collections.abc import Iterator from typing import Any @@ -24,8 +26,11 @@ def __init__(self, *args: T) -> None: *args: The values to choose from. """ + if len(args) < 1: + raise EmptyChoiceError() self.elements = list(dict.fromkeys(args)) + def __contains__(self, value: Any) -> bool: """ Check if a value is in this choice. From fcfcf021b96e304b6df9afa707e7835fd7ff0872 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 14:55:07 +0200 Subject: [PATCH 76/94] add emptyChoiceError --- src/safeds/exceptions/__init__.py | 2 ++ src/safeds/exceptions/_ml.py | 6 ++++++ tests/safeds/ml/classical/classification/test_classifier.py | 4 ++-- tests/safeds/ml/classical/regression/test_regressor.py | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 105edbab7..fc8eaa8db 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -16,6 +16,7 @@ from ._ml import ( DatasetMissesDataError, DatasetMissesFeaturesError, + EmptyChoiceError, FeatureDataMismatchError, FittingWithChoiceError, FittingWithoutChoiceError, @@ -73,6 +74,7 @@ class OutOfBoundsError(SafeDsError): "DatasetMissesDataError", "DatasetMissesFeaturesError", "TargetDataMismatchError", + "EmptyChoiceError", "FeatureDataMismatchError", "FittingWithChoiceError", "FittingWithoutChoiceError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 977d516d5..2b9b536b0 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -42,6 +42,12 @@ def __init__(self) -> None: super().__init__("Dataset contains no rows") +class EmptyChoiceError(ValueError): + """Raised when a choice object is created, but no arguments are provided.""" + + def __init__(self) -> None: + super().__init__("Please provide at least one Value in a Choice Parameter") + class FittingWithChoiceError(Exception): """Raised when a model is fitted with a choice object as a parameter.""" diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 6f7055a9a..3d964d2e4 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -15,7 +15,7 @@ MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, + PlainTableError, EmptyChoiceError, ) from safeds.ml.classical.classification import ( AdaBoostClassifier, @@ -145,7 +145,7 @@ def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( self, valid_data: TabularDataset, ) -> None: - with pytest.raises(LearningError): + with pytest.raises(EmptyChoiceError): AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( valid_data, optimization_metric=ClassifierMetric.ACCURACY, diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 0a76d15c7..d39303e68 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -16,7 +16,7 @@ MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, + PlainTableError, EmptyChoiceError, ) from safeds.ml.classical.regression import ( AdaBoostRegressor, @@ -136,7 +136,7 @@ def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( self, valid_data: TabularDataset, ) -> None: - with pytest.raises(LearningError): + with pytest.raises(EmptyChoiceError): AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR, From dff67a36f47ef10559174dd9e9eac8cc6ef61411 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 14:56:45 +0200 Subject: [PATCH 77/94] set context of ProcessPoolExecutor to fork --- src/safeds/ml/classical/classification/_classifier.py | 4 +++- src/safeds/ml/classical/regression/_regressor.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 861c0bf9b..cee51a82a 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -4,6 +4,8 @@ from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, wait from typing import TYPE_CHECKING, Self +from joblib._multiprocessing_helpers import mp + from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table from safeds.exceptions import DatasetMissesDataError, LearningError, ModelNotFittedError, PlainTableError @@ -273,7 +275,7 @@ def fit_by_exhaustive_search( list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index cdc205e47..30268c51f 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -4,6 +4,8 @@ from concurrent.futures import ALL_COMPLETED, ProcessPoolExecutor, wait from typing import TYPE_CHECKING, Self +from joblib._multiprocessing_helpers import mp + from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table from safeds.exceptions import ( @@ -300,7 +302,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models)) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) From 415dfa2bad4cf6e600e44cbd5acaf019ba98bf12 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 15:05:38 +0200 Subject: [PATCH 78/94] update choice tests --- src/safeds/ml/hyperparameters/_choice.py | 1 - tests/safeds/ml/hyperparameters/test_choice.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/safeds/ml/hyperparameters/_choice.py b/src/safeds/ml/hyperparameters/_choice.py index 7a847a53c..7ef1376ee 100644 --- a/src/safeds/ml/hyperparameters/_choice.py +++ b/src/safeds/ml/hyperparameters/_choice.py @@ -30,7 +30,6 @@ def __init__(self, *args: T) -> None: raise EmptyChoiceError() self.elements = list(dict.fromkeys(args)) - def __contains__(self, value: Any) -> bool: """ Check if a value is in this choice. diff --git a/tests/safeds/ml/hyperparameters/test_choice.py b/tests/safeds/ml/hyperparameters/test_choice.py index 8adcd5952..b515845e5 100644 --- a/tests/safeds/ml/hyperparameters/test_choice.py +++ b/tests/safeds/ml/hyperparameters/test_choice.py @@ -3,12 +3,20 @@ from typing import TYPE_CHECKING import pytest + +from safeds.exceptions import EmptyChoiceError from safeds.ml.hyperparameters import Choice if TYPE_CHECKING: from typing import Any +class TestInit: + def test_should_iterate_values(self) -> None: + with pytest.raises(EmptyChoiceError): + Choice() + + class TestContains: @pytest.mark.parametrize( ("choice", "value", "expected"), @@ -35,11 +43,9 @@ class TestIter: @pytest.mark.parametrize( ("choice", "expected"), [ - (Choice(), []), (Choice(1, 2, 3), [1, 2, 3]), ], ids=[ - "empty", "non-empty", ], ) @@ -51,11 +57,9 @@ class TestLen: @pytest.mark.parametrize( ("choice", "expected"), [ - (Choice(), 0), (Choice(1, 2, 3), 3), ], ids=[ - "empty", "non-empty", ], ) From 537414907836e75b871af3a1fca6975e99d85d21 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 15:08:19 +0200 Subject: [PATCH 79/94] linter fix --- src/safeds/ml/hyperparameters/_choice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/hyperparameters/_choice.py b/src/safeds/ml/hyperparameters/_choice.py index 7ef1376ee..e97c73e19 100644 --- a/src/safeds/ml/hyperparameters/_choice.py +++ b/src/safeds/ml/hyperparameters/_choice.py @@ -27,7 +27,7 @@ def __init__(self, *args: T) -> None: The values to choose from. """ if len(args) < 1: - raise EmptyChoiceError() + raise EmptyChoiceError self.elements = list(dict.fromkeys(args)) def __contains__(self, value: Any) -> bool: From 1daac9ffbc365f52cbed3a00b905e4d0a7d06ac4 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:11:28 +0000 Subject: [PATCH 80/94] style: apply automated linter fixes --- src/safeds/exceptions/_ml.py | 1 + src/safeds/ml/classical/classification/_classifier.py | 2 +- src/safeds/ml/classical/regression/_regressor.py | 2 +- tests/safeds/ml/classical/classification/test_classifier.py | 3 ++- tests/safeds/ml/classical/regression/test_regressor.py | 4 ++-- tests/safeds/ml/hyperparameters/test_choice.py | 1 - 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 2b9b536b0..4f0462188 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -48,6 +48,7 @@ class EmptyChoiceError(ValueError): def __init__(self) -> None: super().__init__("Please provide at least one Value in a Choice Parameter") + class FittingWithChoiceError(Exception): """Raised when a model is fitted with a choice object as a parameter.""" diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index cee51a82a..4ab89d6dd 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -275,7 +275,7 @@ def fit_by_exhaustive_search( list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("fork")) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 30268c51f..ff6968517 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -302,7 +302,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("fork")) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 3d964d2e4..0706f77a7 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -9,13 +9,14 @@ from safeds.exceptions import ( DatasetMissesDataError, DatasetMissesFeaturesError, + EmptyChoiceError, FittingWithChoiceError, FittingWithoutChoiceError, LearningError, MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, EmptyChoiceError, + PlainTableError, ) from safeds.ml.classical.classification import ( AdaBoostClassifier, diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index d39303e68..1b6bbc37c 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -10,13 +10,13 @@ ColumnLengthMismatchError, DatasetMissesDataError, DatasetMissesFeaturesError, + EmptyChoiceError, FittingWithChoiceError, FittingWithoutChoiceError, - LearningError, MissingValuesColumnError, ModelNotFittedError, NonNumericColumnError, - PlainTableError, EmptyChoiceError, + PlainTableError, ) from safeds.ml.classical.regression import ( AdaBoostRegressor, diff --git a/tests/safeds/ml/hyperparameters/test_choice.py b/tests/safeds/ml/hyperparameters/test_choice.py index b515845e5..ec19031cd 100644 --- a/tests/safeds/ml/hyperparameters/test_choice.py +++ b/tests/safeds/ml/hyperparameters/test_choice.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING import pytest - from safeds.exceptions import EmptyChoiceError from safeds.ml.hyperparameters import Choice From 2d0b293c12598820462e6f6470f05b435f2f8b02 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 8 Jul 2024 15:15:37 +0200 Subject: [PATCH 81/94] set context of Processpoolexecutor to spawn --- src/safeds/ml/classical/classification/_classifier.py | 2 +- src/safeds/ml/classical/regression/_regressor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index cee51a82a..3998dbfe4 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -275,7 +275,7 @@ def fit_by_exhaustive_search( list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('spawn')) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 30268c51f..e579929d3 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -302,7 +302,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('fork')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('spawn')) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) From f2a80ee8be1353f11eb555734295df6ffa135bcb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:17:57 +0000 Subject: [PATCH 82/94] style: apply automated linter fixes --- src/safeds/ml/classical/classification/_classifier.py | 2 +- src/safeds/ml/classical/regression/_regressor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 3998dbfe4..8ebb1f4bc 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -275,7 +275,7 @@ def fit_by_exhaustive_search( list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('spawn')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("spawn")) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index e579929d3..e893e2185 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -302,7 +302,7 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me list_of_models = self._get_models_for_all_choices() list_of_fitted_models = [] - with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context('spawn')) as executor: + with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("spawn")) as executor: futures = [] for model in list_of_models: futures.append(executor.submit(model.fit, train_data)) From d6e596ed29b71cfe9c554903796e5c08750521d2 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 10 Jul 2024 15:44:00 +0200 Subject: [PATCH 83/94] set context of Processpoolexecutor to spawn --- .../classical/regression/_linear_regressor.py | 2 +- .../regression/test_linear_regressor.py | 64 ++++++++++++++++++- .../ml/classical/regression/test_regressor.py | 1 + 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/regression/_linear_regressor.py b/src/safeds/ml/classical/regression/_linear_regressor.py index 47054a786..f6ada488e 100644 --- a/src/safeds/ml/classical/regression/_linear_regressor.py +++ b/src/safeds/ml/classical/regression/_linear_regressor.py @@ -314,7 +314,7 @@ def __init__(self, alpha: float | Choice[float] = 1.0, lasso_ratio: float | Choi self._lasso_ratio = lasso_ratio def __eq__(self, other: object) -> bool: - if not isinstance(other, _Lasso): + if not isinstance(other, _ElasticNet): return NotImplemented return True diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index 5da2d14bc..60e912d01 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -1,3 +1,5 @@ +import sys + import pytest from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table @@ -6,7 +8,7 @@ from safeds.ml.hyperparameters import Choice -def kernels() -> list[LinearRegressor.Penalty]: +def penalties() -> list[LinearRegressor.Penalty]: """ Return the list of penalties to test. @@ -62,3 +64,63 @@ def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float | Ch def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) + + @pytest.mark.parametrize( + ("penalty1", "penalty2"), + ([(x, y) for x in penalties() for y in penalties() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_equal_penalties( + self, + penalty1: LinearRegressor.Penalty, + penalty2: LinearRegressor.Penalty, + ) -> None: + assert penalty1 == penalty2 + + @pytest.mark.parametrize( + ("penalty1", "penalty2"), + ([(x, y) for x in penalties() for y in penalties() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_unequal_penalties( + self, + penalty1: LinearRegressor.Penalty, + penalty2: LinearRegressor.Penalty, + ) -> None: + assert penalty1 != penalty2 + + @pytest.mark.parametrize( + ("penalty1", "penalty2"), + ([(x, y) for x in penalties() for y in penalties() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_same_hash_for_equal_penalties( + self, + penalty1: LinearRegressor.Penalty, + penalty2: LinearRegressor.Penalty, + ) -> None: + assert hash(penalty1) == hash(penalty2) + + @pytest.mark.parametrize( + ("penalty1", "penalty2"), + ([(x, y) for x in penalties() for y in penalties() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_different_hash_for_unequal_penalties( + self, + penalty1: LinearRegressor.Penalty, + penalty2: LinearRegressor.Penalty, + ) -> None: + assert hash(penalty1) != hash(penalty2) + + + @pytest.mark.parametrize( + "penalty", + ([LinearRegressor.Penalty.ridge(), LinearRegressor.Penalty.lasso(), LinearRegressor.Penalty.elastic_net()]), + ids=lambda x: x.__class__.__name__, + ) + def test_sizeof_kernel( + self, + penalty: LinearRegressor.Penalty, + ) -> None: + assert sys.getsizeof(penalty) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 1b6bbc37c..5dde11f2c 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -85,6 +85,7 @@ def regressors_with_choices() -> list[Regressor]: penalty=Choice( None, LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), + LinearRegressor.Penalty.ridge(), ), ), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), From 09b5b06e09347dbc602d890170d335b79e154e9d Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 10 Jul 2024 13:48:21 +0000 Subject: [PATCH 84/94] style: apply automated linter fixes --- tests/safeds/ml/classical/regression/test_linear_regressor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index 60e912d01..d2bce8e1e 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -113,7 +113,6 @@ def test_should_return_different_hash_for_unequal_penalties( ) -> None: assert hash(penalty1) != hash(penalty2) - @pytest.mark.parametrize( "penalty", ([LinearRegressor.Penalty.ridge(), LinearRegressor.Penalty.lasso(), LinearRegressor.Penalty.elastic_net()]), From 401de4cb0d68067d7e0c5ec816fdd7ac1ad60c5f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 14 Jul 2024 22:34:01 +0000 Subject: [PATCH 85/94] style: apply automated linter fixes --- .../ml/classical/classification/_decision_tree_classifier.py | 4 ++-- .../ml/classical/regression/_decision_tree_regressor.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/classical/classification/_decision_tree_classifier.py b/src/safeds/ml/classical/classification/_decision_tree_classifier.py index d8fb41144..e9a363db9 100644 --- a/src/safeds/ml/classical/classification/_decision_tree_classifier.py +++ b/src/safeds/ml/classical/classification/_decision_tree_classifier.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.data.image.containers import Image +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.exceptions._ml import ModelNotFittedError from safeds.ml.classical._bases import _DecisionTreeBase from safeds.ml.hyperparameters import Choice @@ -97,7 +97,7 @@ def _get_models_for_all_choices(self) -> list[DecisionTreeClassifier]: for msc in min_sample_count_choices: models.append(DecisionTreeClassifier(max_depth=md, min_sample_count_in_leaves=msc)) return models - + # ------------------------------------------------------------------------------------------------------------------ # Plot # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/regression/_decision_tree_regressor.py b/src/safeds/ml/classical/regression/_decision_tree_regressor.py index 959e33fca..37ee02030 100644 --- a/src/safeds/ml/classical/regression/_decision_tree_regressor.py +++ b/src/safeds/ml/classical/regression/_decision_tree_regressor.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.data.image.containers import Image +from safeds.exceptions import FittingWithChoiceError, FittingWithoutChoiceError from safeds.exceptions._ml import ModelNotFittedError from safeds.ml.classical._bases import _DecisionTreeBase from safeds.ml.hyperparameters import Choice @@ -135,4 +135,3 @@ def plot(self) -> Image: plt.close() return Image.from_bytes(image) - \ No newline at end of file From 319ce977b3dc837b3ab4cf08cb6b46ab37067656 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Jul 2024 01:08:30 +0200 Subject: [PATCH 86/94] add tests --- .../classical/classification/_classifier.py | 18 ++++---- .../ml/classical/regression/_regressor.py | 18 ++++---- .../classification/test_classifier.py | 43 ++++++++++++++++--- .../ml/classical/regression/test_regressor.py | 28 +++++++++--- 4 files changed, 76 insertions(+), 31 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 8ebb1f4bc..365a99427 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -251,8 +251,6 @@ def fit_by_exhaustive_search( LearningError If the training data contains invalid values or if the training failed. """ - if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): - raise PlainTableError if training_set.to_table().row_count == 0: raise DatasetMissesDataError if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None: @@ -302,23 +300,23 @@ def fit_by_exhaustive_search( case "accuracy": accuracy_of_fitted_model = fitted_model.accuracy(test_data) if accuracy_of_fitted_model > best_metric_value: - best_model = fitted_model - best_metric_value = accuracy_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = accuracy_of_fitted_model # pragma: no cover case "precision": precision_of_fitted_model = fitted_model.precision(test_data, positive_class) if precision_of_fitted_model > best_metric_value: - best_model = fitted_model - best_metric_value = precision_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = precision_of_fitted_model # pragma: no cover case "recall": recall_of_fitted_model = fitted_model.recall(test_data, positive_class) if recall_of_fitted_model > best_metric_value: - best_model = fitted_model - best_metric_value = recall_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = recall_of_fitted_model # pragma: no cover case "f1score": f1score_of_fitted_model = fitted_model.f1score(test_data, positive_class) if f1score_of_fitted_model > best_metric_value: - best_model = fitted_model - best_metric_value = f1score_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = f1score_of_fitted_model # pragma: no cover assert best_model is not None return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index e893e2185..75a39a2a0 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -282,8 +282,6 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me LearningError If the training data contains invalid values or if the training failed. """ - if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table): - raise PlainTableError if training_set.to_table().row_count == 0: raise DatasetMissesDataError @@ -330,23 +328,23 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me case "mean_squared_error": error_of_fitted_model = fitted_model.mean_squared_error(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model - best_metric_value = error_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "mean_absolute_error": error_of_fitted_model = fitted_model.mean_absolute_error(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model - best_metric_value = error_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "median_absolute_deviation": error_of_fitted_model = fitted_model.median_absolute_deviation(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model - best_metric_value = error_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "coefficient_of_determination": error_of_fitted_model = fitted_model.coefficient_of_determination(test_data) if error_of_fitted_model > best_metric_value: - best_model = fitted_model - best_metric_value = error_of_fitted_model + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover assert best_model is not None return best_model diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 0706f77a7..30e846928 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -142,16 +142,47 @@ def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( with pytest.raises(FittingWithoutChoiceError): classifier.fit_by_exhaustive_search(valid_data, optimization_metric=ClassifierMetric.ACCURACY) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( + @pytest.mark.parametrize( + ("metric", "positive_class"), + [ + ( + ClassifierMetric.ACCURACY, + None, + ), + ( + ClassifierMetric.PRECISION, + 0, + ), + ( + ClassifierMetric.RECALL, + 0, + ), + ( + ClassifierMetric.F1_SCORE, + 0, + ), + ], + ids=["accuracy", "precision", "recall", "f1_score"], + ) + def test_should_check_return_type_with_metric( self, valid_data: TabularDataset, + metric: ClassifierMetric, + positive_class: Any, ) -> None: - with pytest.raises(EmptyChoiceError): - AdaBoostClassifier(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( - valid_data, - optimization_metric=ClassifierMetric.ACCURACY, - ) + fitted_model = AdaBoostClassifier(max_learner_count=Choice(2, 3)).fit_by_exhaustive_search( + valid_data, + optimization_metric=metric, + positive_class=positive_class, + ) + assert isinstance(fitted_model, AdaBoostClassifier) + def test_should_raise_when_dataset_misses_data(self) -> None: + with pytest.raises(DatasetMissesDataError): + AdaBoostClassifier(max_learner_count=Choice(2, 3)).fit_by_exhaustive_search( + Table.from_dict({"a": [], "b": []}).to_tabular_dataset("a"), + ClassifierMetric.ACCURACY, + ) @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 5dde11f2c..5a4c1d2e5 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -133,14 +133,32 @@ def test_should_raise_if_model_is_fitted_by_exhaustive_search_without_choice( with pytest.raises(FittingWithoutChoiceError): regressor.fit_by_exhaustive_search(valid_data, optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR) - def test_should_raise_if_model_is_fitted_by_exhaustive_search_with_empty_choice( + @pytest.mark.parametrize( + "metric", + [ + RegressorMetric.MEAN_SQUARED_ERROR, + RegressorMetric.MEAN_ABSOLUTE_ERROR, + RegressorMetric.MEDIAN_ABSOLUTE_DEVIATION, + RegressorMetric.COEFFICIENT_OF_DETERMINATION, + ], + ids=["mean_squared_error", "mean_absolute_error", "median_absolute_deviation", "coefficient_of_determination"], + ) + def test_should_check_return_type_with_metric( self, valid_data: TabularDataset, + metric: RegressorMetric, ) -> None: - with pytest.raises(EmptyChoiceError): - AdaBoostRegressor(max_learner_count=Choice(), learning_rate=Choice()).fit_by_exhaustive_search( - valid_data, - optimization_metric=RegressorMetric.MEAN_SQUARED_ERROR, + fitted_model = AdaBoostRegressor(max_learner_count=Choice(2, 3)).fit_by_exhaustive_search( + valid_data, + optimization_metric=metric, + ) + assert isinstance(fitted_model, AdaBoostRegressor) + + def test_should_raise_when_dataset_misses_data(self) -> None: + with pytest.raises(DatasetMissesDataError): + AdaBoostRegressor(max_learner_count=Choice(2, 3)).fit_by_exhaustive_search( + Table.from_dict({"a": [], "b": []}).to_tabular_dataset("a"), + RegressorMetric.MEAN_SQUARED_ERROR, ) From 7ef9d4582779d457f95799c9785d6e43d586a6c7 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 15 Jul 2024 01:13:01 +0200 Subject: [PATCH 87/94] linter --- src/safeds/ml/classical/classification/_classifier.py | 4 ++-- src/safeds/ml/classical/regression/_regressor.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 365a99427..e231891c4 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -7,12 +7,12 @@ from joblib._multiprocessing_helpers import mp from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table -from safeds.exceptions import DatasetMissesDataError, LearningError, ModelNotFittedError, PlainTableError +from safeds.exceptions import DatasetMissesDataError, LearningError, ModelNotFittedError from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import ClassificationMetrics, ClassifierMetric if TYPE_CHECKING: + from safeds.data.tabular.containers import Table from typing import Any diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 75a39a2a0..180ad3911 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -7,18 +7,17 @@ from joblib._multiprocessing_helpers import mp from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table from safeds.exceptions import ( ColumnLengthMismatchError, DatasetMissesDataError, ModelNotFittedError, - PlainTableError, ) from safeds.ml.classical import SupervisedModel from safeds.ml.metrics import RegressionMetrics, RegressorMetric if TYPE_CHECKING: from safeds.data.tabular.containers import Column + from safeds.data.tabular.containers import Table class Regressor(SupervisedModel, ABC): From f3def3d200fa05597e2fa656f16977890aa6b8ad Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 14 Jul 2024 23:14:59 +0000 Subject: [PATCH 88/94] style: apply automated linter fixes --- .../classical/classification/_classifier.py | 19 ++++++++++--------- .../ml/classical/regression/_regressor.py | 19 +++++++++---------- .../classification/test_classifier.py | 2 +- .../ml/classical/regression/test_regressor.py | 1 - 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index e231891c4..9d1d985a0 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -12,9 +12,10 @@ from safeds.ml.metrics import ClassificationMetrics, ClassifierMetric if TYPE_CHECKING: - from safeds.data.tabular.containers import Table from typing import Any + from safeds.data.tabular.containers import Table + class Classifier(SupervisedModel, ABC): """A model for classification tasks.""" @@ -300,23 +301,23 @@ def fit_by_exhaustive_search( case "accuracy": accuracy_of_fitted_model = fitted_model.accuracy(test_data) if accuracy_of_fitted_model > best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = accuracy_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = accuracy_of_fitted_model # pragma: no cover case "precision": precision_of_fitted_model = fitted_model.precision(test_data, positive_class) if precision_of_fitted_model > best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = precision_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = precision_of_fitted_model # pragma: no cover case "recall": recall_of_fitted_model = fitted_model.recall(test_data, positive_class) if recall_of_fitted_model > best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = recall_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = recall_of_fitted_model # pragma: no cover case "f1score": f1score_of_fitted_model = fitted_model.f1score(test_data, positive_class) if f1score_of_fitted_model > best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = f1score_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = f1score_of_fitted_model # pragma: no cover assert best_model is not None return best_model diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 180ad3911..b74795002 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -16,8 +16,7 @@ from safeds.ml.metrics import RegressionMetrics, RegressorMetric if TYPE_CHECKING: - from safeds.data.tabular.containers import Column - from safeds.data.tabular.containers import Table + from safeds.data.tabular.containers import Column, Table class Regressor(SupervisedModel, ABC): @@ -327,23 +326,23 @@ def fit_by_exhaustive_search(self, training_set: TabularDataset, optimization_me case "mean_squared_error": error_of_fitted_model = fitted_model.mean_squared_error(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = error_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "mean_absolute_error": error_of_fitted_model = fitted_model.mean_absolute_error(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = error_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "median_absolute_deviation": error_of_fitted_model = fitted_model.median_absolute_deviation(test_data) if error_of_fitted_model < best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = error_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover case "coefficient_of_determination": error_of_fitted_model = fitted_model.coefficient_of_determination(test_data) if error_of_fitted_model > best_metric_value: - best_model = fitted_model # pragma: no cover - best_metric_value = error_of_fitted_model # pragma: no cover + best_model = fitted_model # pragma: no cover + best_metric_value = error_of_fitted_model # pragma: no cover assert best_model is not None return best_model diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 30e846928..a5134f118 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -9,7 +9,6 @@ from safeds.exceptions import ( DatasetMissesDataError, DatasetMissesFeaturesError, - EmptyChoiceError, FittingWithChoiceError, FittingWithoutChoiceError, LearningError, @@ -184,6 +183,7 @@ def test_should_raise_when_dataset_misses_data(self) -> None: ClassifierMetric.ACCURACY, ) + @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__) class TestFit: def test_should_succeed_on_valid_data(self, classifier: Classifier, valid_data: TabularDataset) -> None: diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 5a4c1d2e5..90ff36bfb 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -10,7 +10,6 @@ ColumnLengthMismatchError, DatasetMissesDataError, DatasetMissesFeaturesError, - EmptyChoiceError, FittingWithChoiceError, FittingWithoutChoiceError, MissingValuesColumnError, From c16b4dfe9709dc4f738bfd5a2be303c360bc6e47 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 18 Jul 2024 18:25:48 +0200 Subject: [PATCH 89/94] add tests --- .../regression/test_linear_regressor.py | 69 ++++++++++++------- .../ml/classical/regression/test_regressor.py | 12 +++- 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index d2bce8e1e..144390c13 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -41,30 +41,6 @@ def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> assert isinstance(fitted_model.penalty, _Linear) assert fitted_model._wrapped_model is not None - @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) - def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=alpha)) - - @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) - def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=alpha)) - - @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_0_point_5", "invalid_choice"]) - def test_should_raise_if_alpha_out_of_bounds_elastic_net(self, alpha: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha)) - - @pytest.mark.parametrize( - "lasso_ratio", - [-0.5, 1.5, Choice(-0.5)], - ids=["minus_0_point_5", "one_point_five", "invalid_choice"], - ) - def test_should_raise_if_lasso_ratio_out_of_bounds_elastic_net(self, lasso_ratio: float | Choice[float]) -> None: - with pytest.raises(OutOfBoundsError): - LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) - @pytest.mark.parametrize( ("penalty1", "penalty2"), ([(x, y) for x in penalties() for y in penalties() if x.__class__ == y.__class__]), @@ -123,3 +99,48 @@ def test_sizeof_kernel( penalty: LinearRegressor.Penalty, ) -> None: assert sys.getsizeof(penalty) > sys.getsizeof(object()) + + class TestLinear: + def test_str(self) -> None: + linear_penalty = LinearRegressor.Penalty.linear() + assert linear_penalty.__str__() == "Linear" + + class TestRidge: + def test_str(self) -> None: + ridge_penalty = LinearRegressor.Penalty.ridge(0.5) + assert ridge_penalty.__str__() == f"Ridge(alpha={0.5})" + + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_zero_point_five", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[float]) -> None: + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=alpha)) + + class TestLasso: + def test_str(self) -> None: + lasso_penalty = LinearRegressor.Penalty.lasso(0.5) + assert lasso_penalty.__str__() == f"Lasso(alpha={0.5})" + + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_zero_point_five", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[float]) -> None: + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=alpha)) + + class TestElasticNet: + def test_str(self) -> None: + elastic_net_penalty = LinearRegressor.Penalty.elastic_net(0.5, 0.75) + assert elastic_net_penalty.__str__() == f"ElasticNet(alpha={0.5}, lasso_ratio={0.75})" + + @pytest.mark.parametrize("alpha", [-0.5, Choice(-0.5)], ids=["minus_zero_point_five", "invalid_choice"]) + def test_should_raise_if_alpha_out_of_bounds(self, alpha: float | Choice[float]) -> None: + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(alpha=alpha)) + + @pytest.mark.parametrize( + "lasso_ratio", + [-0.5, 1.5, Choice(-0.5)], + ids=["minus_zero_point_five", "one_point_five", "invalid_choice"], + ) + def test_should_raise_if_lasso_ratio_out_of_bounds(self, + lasso_ratio: float | Choice[float]) -> None: + with pytest.raises(OutOfBoundsError): + LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 5a4c1d2e5..acfe09071 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -84,10 +84,18 @@ def regressors_with_choices() -> list[Regressor]: LinearRegressor( penalty=Choice( None, - LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), - LinearRegressor.Penalty.ridge(), + LinearRegressor.Penalty.linear(), ), ), + LinearRegressor( + penalty=LinearRegressor.Penalty.lasso(alpha=Choice(0.25, 0.75)) + ), + LinearRegressor( + penalty=LinearRegressor.Penalty.ridge(alpha=Choice(0.25, 0.75)) + ), + LinearRegressor( + penalty=LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)) + ), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorRegressor(kernel=Choice(None, SupportVectorRegressor.Kernel.linear()), c=Choice(0.5, 1.0)), ] From 17ce5c24f0cebeb05c916448744587236d31a7ec Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 18 Jul 2024 16:30:01 +0000 Subject: [PATCH 90/94] style: apply automated linter fixes --- .../ml/classical/regression/test_linear_regressor.py | 3 +-- tests/safeds/ml/classical/regression/test_regressor.py | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index 144390c13..c1167a1b1 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -140,7 +140,6 @@ def test_should_raise_if_alpha_out_of_bounds(self, alpha: float | Choice[float]) [-0.5, 1.5, Choice(-0.5)], ids=["minus_zero_point_five", "one_point_five", "invalid_choice"], ) - def test_should_raise_if_lasso_ratio_out_of_bounds(self, - lasso_ratio: float | Choice[float]) -> None: + def test_should_raise_if_lasso_ratio_out_of_bounds(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 7f28c99af..340fe8d7c 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -86,14 +86,10 @@ def regressors_with_choices() -> list[Regressor]: LinearRegressor.Penalty.linear(), ), ), + LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=Choice(0.25, 0.75))), + LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=Choice(0.25, 0.75))), LinearRegressor( - penalty=LinearRegressor.Penalty.lasso(alpha=Choice(0.25, 0.75)) - ), - LinearRegressor( - penalty=LinearRegressor.Penalty.ridge(alpha=Choice(0.25, 0.75)) - ), - LinearRegressor( - penalty=LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)) + penalty=LinearRegressor.Penalty.elastic_net(alpha=Choice(1.0, 2.0), lasso_ratio=Choice(0.1, 0.9)), ), RandomForestRegressor(tree_count=Choice(1, 2), max_depth=Choice(1, 2), min_sample_count_in_leaves=Choice(1, 2)), SupportVectorRegressor(kernel=Choice(None, SupportVectorRegressor.Kernel.linear()), c=Choice(0.5, 1.0)), From 9513f159a985ca76681189becf46ddcb71d2c20a Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 29 Aug 2024 21:26:00 +0200 Subject: [PATCH 91/94] fix codecov --- .../ml/classical/classification/_classifier.py | 4 ++-- .../regression/test_linear_regressor.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 9d1d985a0..7d0769312 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -294,7 +294,7 @@ def fit_by_exhaustive_search( best_metric_value = fitted_model.precision(test_data, positive_class) case "recall": best_metric_value = fitted_model.recall(test_data, positive_class) - case "f1score": + case "f1_score": best_metric_value = fitted_model.recall(test_data, positive_class) else: match optimization_metric.value: @@ -313,7 +313,7 @@ def fit_by_exhaustive_search( if recall_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = recall_of_fitted_model # pragma: no cover - case "f1score": + case "f1_score": f1score_of_fitted_model = fitted_model.f1score(test_data, positive_class) if f1score_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index c1167a1b1..9c7413cba 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -115,6 +115,10 @@ def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[f with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.ridge(alpha=alpha)) + def test_should_assert_alpha_is_set_correctly(self) -> None: + alpha = 0.69 + assert LinearRegressor.Penalty.ridge(alpha=alpha).alpha == alpha + class TestLasso: def test_str(self) -> None: lasso_penalty = LinearRegressor.Penalty.lasso(0.5) @@ -125,6 +129,10 @@ def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[f with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.lasso(alpha=alpha)) + def test_should_assert_alpha_is_set_correctly(self) -> None: + alpha = 0.69 + assert LinearRegressor.Penalty.lasso(alpha=alpha).alpha == alpha + class TestElasticNet: def test_str(self) -> None: elastic_net_penalty = LinearRegressor.Penalty.elastic_net(0.5, 0.75) @@ -143,3 +151,11 @@ def test_should_raise_if_alpha_out_of_bounds(self, alpha: float | Choice[float]) def test_should_raise_if_lasso_ratio_out_of_bounds(self, lasso_ratio: float | Choice[float]) -> None: with pytest.raises(OutOfBoundsError): LinearRegressor(penalty=LinearRegressor.Penalty.elastic_net(lasso_ratio=lasso_ratio)) + + def test_should_assert_alpha_is_set_correctly(self) -> None: + alpha = 0.69 + lasso_ratio = 0.96 + elastic_pen = LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso_ratio) + assert elastic_pen.alpha == alpha + assert elastic_pen.lasso_ratio == lasso_ratio + From 80c99b994923f90fa83a0aa344ecc83d2148c6cb Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 29 Aug 2024 21:30:26 +0200 Subject: [PATCH 92/94] fix linter --- .../ml/classical/regression/test_linear_regressor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index 9c7413cba..cc03a8cc7 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -117,7 +117,7 @@ def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[f def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 - assert LinearRegressor.Penalty.ridge(alpha=alpha).alpha == alpha + assert LinearRegressor.Penalty.ridge(alpha=alpha).alpha == alpha # type: ignore[attr-defined] class TestLasso: def test_str(self) -> None: @@ -131,7 +131,7 @@ def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[f def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 - assert LinearRegressor.Penalty.lasso(alpha=alpha).alpha == alpha + assert LinearRegressor.Penalty.lasso(alpha=alpha).alpha == alpha # type: ignore[attr-defined] class TestElasticNet: def test_str(self) -> None: @@ -156,6 +156,6 @@ def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 lasso_ratio = 0.96 elastic_pen = LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso_ratio) - assert elastic_pen.alpha == alpha - assert elastic_pen.lasso_ratio == lasso_ratio + assert elastic_pen.alpha == alpha # type: ignore[attr-defined] + assert elastic_pen.lasso_ratio == lasso_ratio # type: ignore[attr-defined] From 5c80f7f7fc5c7af01835fcc3721586a7135f7332 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 29 Aug 2024 19:32:19 +0000 Subject: [PATCH 93/94] style: apply automated linter fixes --- .../ml/classical/regression/test_linear_regressor.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_linear_regressor.py b/tests/safeds/ml/classical/regression/test_linear_regressor.py index cc03a8cc7..0334e58ea 100644 --- a/tests/safeds/ml/classical/regression/test_linear_regressor.py +++ b/tests/safeds/ml/classical/regression/test_linear_regressor.py @@ -117,7 +117,7 @@ def test_should_raise_if_alpha_out_of_bounds_ridge(self, alpha: float | Choice[f def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 - assert LinearRegressor.Penalty.ridge(alpha=alpha).alpha == alpha # type: ignore[attr-defined] + assert LinearRegressor.Penalty.ridge(alpha=alpha).alpha == alpha # type: ignore[attr-defined] class TestLasso: def test_str(self) -> None: @@ -131,7 +131,7 @@ def test_should_raise_if_alpha_out_of_bounds_lasso(self, alpha: float | Choice[f def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 - assert LinearRegressor.Penalty.lasso(alpha=alpha).alpha == alpha # type: ignore[attr-defined] + assert LinearRegressor.Penalty.lasso(alpha=alpha).alpha == alpha # type: ignore[attr-defined] class TestElasticNet: def test_str(self) -> None: @@ -156,6 +156,5 @@ def test_should_assert_alpha_is_set_correctly(self) -> None: alpha = 0.69 lasso_ratio = 0.96 elastic_pen = LinearRegressor.Penalty.elastic_net(alpha=alpha, lasso_ratio=lasso_ratio) - assert elastic_pen.alpha == alpha # type: ignore[attr-defined] - assert elastic_pen.lasso_ratio == lasso_ratio # type: ignore[attr-defined] - + assert elastic_pen.alpha == alpha # type: ignore[attr-defined] + assert elastic_pen.lasso_ratio == lasso_ratio # type: ignore[attr-defined] From 6c72375b3b5baf7ac68fac596419f5cdd527e187 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 29 Aug 2024 21:54:28 +0200 Subject: [PATCH 94/94] fix error --- src/safeds/ml/classical/classification/_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 7d0769312..b87c5a5af 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -314,7 +314,7 @@ def fit_by_exhaustive_search( best_model = fitted_model # pragma: no cover best_metric_value = recall_of_fitted_model # pragma: no cover case "f1_score": - f1score_of_fitted_model = fitted_model.f1score(test_data, positive_class) + f1score_of_fitted_model = fitted_model.f1_score(test_data, positive_class) if f1score_of_fitted_model > best_metric_value: best_model = fitted_model # pragma: no cover best_metric_value = f1score_of_fitted_model # pragma: no cover