Safe-DS · lars-reimann · May 5, 2023 · May 5, 2023 · May 5, 2023 · May 5, 2023
@@ -9,30 +9,47 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
 class AdaBoost(Classifier):
-    """Ada Boost classification.
+    """
+    Ada Boost classification.
 
     Parameters
     ----------
+    learner: Classifier
+        The learner from which the boosted ensemble is built.
+    maximum_number_of_learners: int
+        The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure
+        is stopped early. Has to be greater than 0.
     learning_rate : float
         Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution
         of each classifier. Has to be greater than 0.
 
     Raises
     ------
     ValueError
-        If the learning rate is less than or equal to 0.
+        If `maximum_number_of_learners` or `learning_rate` are less than or equal to 0
     """
 
-    def __init__(self, learning_rate: float = 1.0) -> None:
+    def __init__(
+        self,
+        learner: Classifier | None = None,
+        maximum_number_of_learners: int = 50,
+        learning_rate: float = 1.0,
+    ) -> None:
         # Validation
+        if maximum_number_of_learners <= 0:
+            raise ValueError("The parameter 'maximum_number_of_learners' has to be grater than 0.")
         if learning_rate <= 0:
-            raise ValueError("The learning rate has to be greater than 0.")
+            raise ValueError("The parameter 'learning_rate' has to be greater than 0.")
 
         # Hyperparameters
+        self._learner = learner
+        self._maximum_number_of_learners = maximum_number_of_learners
         self._learning_rate = learning_rate
 
         # Internal state
@@ -61,10 +78,14 @@ def fit(self, training_set: TaggedTable) -> AdaBoost:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_AdaBoostClassifier(learning_rate=self._learning_rate)
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
-        result = AdaBoost(learning_rate=self._learning_rate)
+        result = AdaBoost(
+            learner=self._learner,
+            maximum_number_of_learners=self._maximum_number_of_learners,
+            learning_rate=self._learning_rate,
+        )
         result._wrapped_classifier = wrapped_classifier
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name
@@ -108,3 +129,19 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        learner = self._learner._get_sklearn_classifier() if self._learner is not None else None
+        return sk_AdaBoostClassifier(
+            estimator=learner,
+            n_estimators=self._maximum_number_of_learners,
+            learning_rate=self._learning_rate,
+        )
@@ -11,6 +11,8 @@
 if TYPE_CHECKING:
     from typing import Any
 
+    from sklearn.base import ClassifierMixin
+
 
 class Classifier(ABC):
     """Abstract base class for all classifiers."""
@@ -76,6 +78,17 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
 
+    @abstractmethod
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+
     # noinspection PyProtectedMember
     def accuracy(self, validation_or_test_set: TaggedTable) -> float:
         """

@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -42,7 +44,7 @@ def fit(self, training_set: TaggedTable) -> DecisionTree:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_DecisionTreeClassifier()
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = DecisionTree()
@@ -89,3 +91,6 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        return sk_DecisionTreeClassifier()
@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -28,15 +30,15 @@ class GradientBoosting(Classifier):
     Raises
     ------
     ValueError
-        If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
+        If `number_of_trees` is less than or equal to 0 or `learning_rate` is non-positive.
     """
 
     def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
         # Validation
         if number_of_trees <= 0:
-            raise ValueError("The number of boosting stages to perform has to be greater than 0.")
+            raise ValueError("The parameter 'number_of_trees' has to be greater than 0.")
         if learning_rate <= 0:
-            raise ValueError("The learning rate has to be greater than 0.")
+            raise ValueError("The parameter 'learning_rate' has to be greater than 0.")
 
         # Hyperparameters
         self._number_of_trees = number_of_trees
@@ -68,10 +70,7 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_GradientBoostingClassifier(
-            n_estimators=self._number_of_trees,
-            learning_rate=self._learning_rate,
-        )
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
@@ -118,3 +117,14 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        return sk_GradientBoostingClassifier(n_estimators=self._number_of_trees, learning_rate=self._learning_rate)
@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -25,13 +27,13 @@ class KNearestNeighbors(Classifier):
     Raises
     ------
     ValueError
-        If the number of neighbors is less than or equal to 0.
+        If `number_of_neighbors` is less than or equal to 0.
     """
 
     def __init__(self, number_of_neighbors: int) -> None:
         # Validation
         if number_of_neighbors <= 0:
-            raise ValueError("The number of neighbors has to be greater than 0.")
+            raise ValueError("The parameter 'number_of_neighbors' has to be greater than 0.")
 
         # Hyperparameters
         self._number_of_neighbors = number_of_neighbors
@@ -60,19 +62,18 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors:
         Raises
         ------
         ValueError
-            If the number of neighbors is greater than the sample size.
+            If `number_of_neighbors` is greater than the sample size.
         LearningError
             If the training data contains invalid values or if the training failed.
         """
         if self._number_of_neighbors > training_set.number_of_rows:
             raise ValueError(
                 (
-                    f"The number of neighbors ({self._number_of_neighbors}) has to be less than or equal to the sample "
-                    f"size ({training_set.number_of_rows})."
+                    f"The parameter 'number_of_neighbors' ({self._number_of_neighbors}) has to be less than or equal to"
+                    f" the sample size ({training_set.number_of_rows})."
                 ),
             )
-
-        wrapped_classifier = sk_KNeighborsClassifier(self._number_of_neighbors, n_jobs=-1)
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = KNearestNeighbors(self._number_of_neighbors)
@@ -119,3 +120,14 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        return sk_KNeighborsClassifier(self._number_of_neighbors, n_jobs=-1)
@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -42,7 +44,7 @@ def fit(self, training_set: TaggedTable) -> LogisticRegression:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_LogisticRegression(n_jobs=-1)
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = LogisticRegression()
@@ -89,3 +91,14 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        return sk_LogisticRegression(n_jobs=-1)
@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -23,13 +25,13 @@ class RandomForest(Classifier):
     Raises
     ------
     ValueError
-        If the number of trees is less than or equal to 0.
+        If `number_of_trees` is less than or equal to 0.
     """
 
     def __init__(self, number_of_trees: int = 100) -> None:
         # Validation
         if number_of_trees < 1:
-            raise ValueError("The number of trees has to be greater than 0.")
+            raise ValueError("The parameter 'number_of_trees' has to be greater than 0.")
 
         # Hyperparameters
         self._number_of_trees = number_of_trees
@@ -60,7 +62,7 @@ def fit(self, training_set: TaggedTable) -> RandomForest:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1)
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = RandomForest(self._number_of_trees)
@@ -107,3 +109,14 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        return sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1)
@@ -9,6 +9,8 @@
 from ._classifier import Classifier
 
 if TYPE_CHECKING:
+    from sklearn.base import ClassifierMixin
+
     from safeds.data.tabular.containers import Table, TaggedTable
 
 
@@ -34,7 +36,7 @@ def __init__(self, c: float = 1.0) -> None:
         self._target_name: str | None = None
 
         if c <= 0:
-            raise ValueError("The strength of regularization given by the c parameter must be strictly positive.")
+            raise ValueError("The parameter 'c' has to be strictly positive.")
         self._c = c
 
     def fit(self, training_set: TaggedTable) -> SupportVectorMachine:
@@ -58,7 +60,7 @@ def fit(self, training_set: TaggedTable) -> SupportVectorMachine:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_SVC(C=self._c)
+        wrapped_classifier = self._get_sklearn_classifier()
         fit(wrapped_classifier, training_set)
 
         result = SupportVectorMachine(self._c)
@@ -105,3 +107,14 @@ def is_fitted(self) -> bool:
             Whether the classifier is fitted.
         """
         return self._wrapped_classifier is not None
+
+    def _get_sklearn_classifier(self) -> ClassifierMixin:
+        """
+        Return a new wrapped Classifier from sklearn.
+
+        Returns
+        -------
+        wrapped_classifier: ClassifierMixin
+            The sklearn Classifier.
+        """
+        return sk_SVC(C=self._c)