PythonPredictions · sborms · Sep 28, 2021 · Sep 24, 2021 · Sep 24, 2021
diff --git a/cobra/evaluation/evaluator.py b/cobra/evaluation/evaluator.py
@@ -30,6 +30,10 @@ class ClassificationEvaluator():
 
     Attributes
     ----------
+    y_true : np.ndarray
+        True binary target data labels.
+    y_pred : np.ndarray
+        Target scores of the model.
     confusion_matrix : np.ndarray
         Confusion matrix computed for a particular cut-off.
     cumulative_gains : tuple
@@ -51,10 +55,14 @@ class ClassificationEvaluator():
         (by default 10, so deciles).
     """
 
-    def __init__(self, probability_cutoff: float=None,
+    def __init__(self,
+                 probability_cutoff: float=None,
                  lift_at: float=0.05,
                  n_bins: int = 10):
 
+        self.y_true = None
+        self.y_pred = None
+
         self.lift_at = lift_at
         self.probability_cutoff = probability_cutoff
         self.n_bins = n_bins
@@ -90,23 +98,26 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
                              for pred in y_pred])
 
         # Compute the various evaluation metrics
-        self.scalar_metrics = ClassificationEvaluator.compute_scalar_metrics(
+        self.scalar_metrics = ClassificationEvaluator._compute_scalar_metrics(
             y_true,
             y_pred,
             y_pred_b,
             self.lift_at
         )
 
+        self.y_true = y_true
+        self.y_pred = y_pred
+
         self.roc_curve = {"fpr": fpr, "tpr": tpr, "thresholds": thresholds}
         self.confusion_matrix = confusion_matrix(y_true, y_pred_b)
         self.lift_curve = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, self.n_bins)
         self.cumulative_gains = ClassificationEvaluator._compute_cumulative_gains(y_true, y_pred)
 
     @staticmethod
-    def compute_scalar_metrics(y_true: np.ndarray,
-                               y_pred: np.ndarray,
-                               y_pred_b: np.ndarray,
-                               lift_at: float) -> pd.Series:
+    def _compute_scalar_metrics(y_true: np.ndarray,
+                                y_pred: np.ndarray,
+                                y_pred_b: np.ndarray,
+                                lift_at: float) -> pd.Series:
         """Convenient function to compute various scalar performance measures
         and return them in a pd.Series
 
@@ -133,11 +144,10 @@ def compute_scalar_metrics(y_true: np.ndarray,
             "recall": recall_score(y_true, y_pred_b),
             "F1": f1_score(y_true, y_pred_b, average=None)[1],
             "matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b),
-            "lift at  {}".format(lift_at): np.round(Evaluator
-                                                    ._compute_lift(
-                                                        y_true=y_true,
-                                                        y_pred=y_pred,
-                                                        lift_at=lift_at), 2)
+            "lift at {}".format(lift_at): np.round(ClassificationEvaluator
+                                                   ._compute_lift(y_true=y_true,
+                                                                  y_pred=y_pred,
+                                                                  lift_at=lift_at), 2)
         })
 
     def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
@@ -351,8 +361,8 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
             plt.show()
 
     @staticmethod
-    def find_optimal_cutoff(y_true: np.ndarray,
-                            y_pred: np.ndarray) -> float:
+    def _find_optimal_cutoff(y_true: np.ndarray,
+                             y_pred: np.ndarray) -> float:
         """Find the optimal probability cut off point for a
         classification model. Wrapper around _compute_optimal_cutoff
 
@@ -524,19 +534,27 @@ def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray,
         return lift
 
 
-
 class RegressionEvaluator():
 
     """Summary
 
     Attributes
     ----------
-    evaluation_metrics : dict
+    y_true : np.ndarray
+        True binary target data labels.
+    y_pred : np.ndarray
+        Target scores of the model.
+    scalar_metrics : dict
         Map containing various scalar evaluation metrics (R-squared, MAE, MSE, RMSE)
+    qq : pd.Series
+        Theoretical quantiles and associated actual residuals.
     """
 
     def __init__(self):
 
+        self.y_true = None
+        self.y_pred = None
+
         # Placeholder to store fitted output
         self.scalar_metrics = None
         self.qq = None
@@ -552,16 +570,18 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
         y_pred : np.ndarray
             Model scores.
         """
-
         # Compute the various evaluation metrics
-        self.scalar_metrics = RegressionEvaluator.compute_scalar_metrics(y_true, y_pred)
+        self.scalar_metrics = RegressionEvaluator._compute_scalar_metrics(y_true, y_pred)
+
+        self.y_true = y_true
+        self.y_pred = y_pred
 
         # Compute qq info
-        self.qq = RegressionEvaluator.compute_qq_residuals(y_true, y_pred)
+        self.qq = RegressionEvaluator._compute_qq_residuals(y_true, y_pred)
 
     @staticmethod
-    def compute_scalar_metrics(y_true: np.ndarray,
-                               y_pred: np.ndarray) -> pd.Series:
+    def _compute_scalar_metrics(y_true: np.ndarray,
+                                y_pred: np.ndarray) -> pd.Series:
         """Convenient function to compute various scalar performance measures
         and return them in a pd.Series
 
@@ -589,9 +609,9 @@ def compute_scalar_metrics(y_true: np.ndarray,
         })
 
     @staticmethod
-    def compute_qq_residuals(y_true: np.ndarray,
-                             y_pred: np.ndarray) -> pd.Series:
-        """Convenient function to compute various scalar performance measures
+    def _compute_qq_residuals(y_true: np.ndarray,
+                              y_pred: np.ndarray) -> pd.Series:
+        """Convenience function to compute various scalar performance measures
         and return them in a pd.Series
 
         Parameters
@@ -604,7 +624,7 @@ def compute_qq_residuals(y_true: np.ndarray,
         Returns
         -------
         pd.Series
-            ...
+            theoretical quantiles and associated actual residuals
         """
         ## also possible directly via statsmodels.api.qqplot()
 
@@ -623,21 +643,22 @@ def compute_qq_residuals(y_true: np.ndarray,
             "residuals": df["z_res"].values,
         })
 
-    def plot_predictions(self, y_true: np.ndarray, y_pred: np.ndarray,
-                         path: str=None, dim: tuple=(12, 8)):
+    def plot_predictions(self, path: str=None, dim: tuple=(12, 8)):
         """Plot predictions from the model against actual values
 
         Parameters
         ----------
-        y_true : np.ndarray
-            True binary target data labels.
-        y_pred : np.ndarray
-            Target scores of the model.
         path : str, optional
             Path to store the figure.
         dim : tuple, optional
             Tuple with width and length of the plot.
         """
+        if self.y_true is None and self.y_pred is None:
+            msg = ("This {} instance is not fitted yet. Call 'fit' with "
+                   "appropriate arguments before using this method.")
+
+        y_true = self.y_true
+        y_pred = self.y_pred
 
         with plt.style.context("seaborn-whitegrid"):
 
@@ -682,8 +703,8 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
             x = self.qq["quantiles"]
             y = self.qq["residuals"]
 
-            ax.plot(x, y, "o--.", color="cornflowerblue", linewidth=3)
-            ax.plot(x, x, "r--.", color="cornflowerblue", linewidth=3)
+            ax.plot(x, x, "r--", color="darkorange", linewidth=3)
+            ax.plot(x, y, "o--", color="cornflowerblue", linewidth=3)
 
             ax.set_xlabel("Theoretical quantiles", fontsize=15)
             ax.set_ylabel("Standardized residuals", fontsize=15)

diff --git a/cobra/evaluation/pigs_tables.py b/cobra/evaluation/pigs_tables.py
@@ -54,13 +54,13 @@ def compute_pig_table(basetable: pd.DataFrame,
     Parameters
     ----------
     basetable : pd.DataFrame
-        input data from which to compute the pig table
+        Input data from which to compute the pig table.
     predictor_column_name : str
-        predictor name of which to compute the pig table
+        Predictor name of which to compute the pig table.
     target_column_name : str
-        name of the target variable
+        Name of the target variable.
     id_column_name : str
-        name of the id column (used to count population size)
+        Name of the id column (used to count population size).
 
     Returns
     -------
@@ -109,17 +109,17 @@ def plot_incidence(pig_tables: pd.DataFrame,
     Parameters
     ----------
     pig_tables: pd.DataFrame
-        dataframe with cleaned, binned, partitioned and prepared data,
+        Dataframe with cleaned, binned, partitioned and prepared data,
         as created by generate_pig_tables() from this module.
     variable: str
-        name of the predictor variable for which the PIG will be plotted.
+        Name of the predictor variable for which the PIG will be plotted.
     model_type: str
-        type of model (either "classification" or "regression").
+        Type of model (either "classification" or "regression").
     column_order: list, default=None
-        explicit order of the value bins of the predictor variable to be used
+        Explicit order of the value bins of the predictor variable to be used
         on the PIG.
     dim: tuple, default=(12, 8)
-        optional tuple to configure the width and length of the plot.
+        Optional tuple to configure the width and length of the plot.
     """
     if model_type not in ["classification", "regression"]:
         raise ValueError("An unexpected value was set for the model_type "

diff --git a/tests/evaluation/test_evaluation.py b/tests/evaluation/test_evaluation.py
@@ -38,42 +38,50 @@ def test_plot_incidence_with_different_column_orders(self):
                            # different bins than in the data variable:
                            column_order=['1st-4th', '5th-6th', '7th-8th'])
 
-    # Stubs for later: requires exposing df_plot and testing matplotlib's
-    # plot object internals:
+    # Stubs for later (requires exposing df_plot and testing matplotlib's
+    # plot object fix and ax internals):
     """
     def test_plot_incidence_without_column_order(self):
         data = mock_data()
         plot_incidence(pig_tables=data, 
                        variable='education',
                        model_type="classification",
                        column_order=None)
-        # Can't assert: df_plot is not exposed by the function
 
     def test_plot_incidence_with_column_order(self):
         data = mock_data()
         plot_incidence(pig_tables=data,
                        variable='education',
                        model_type="classification",
                        column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
-        # Can't assert: df_plot is not exposed by the function
 
     def test_plot_incidence_visual_result_for_classification(self):
         data = mock_data()
         plot_incidence(pig_tables=data,
                        variable='education',
                        model_type="classification",
                        column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
-        # Can't assert: would need to check matplotlib's fig and ax 
-        # internals.
 
     def test_plot_incidence_visual_result_for_regression(self):
         data = mock_data()  # change into regression target though.
         plot_incidence(pig_tables=data,
                        variable='education',
-                       model_type="classification",
+                       model_type="regression",
                        column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
-        # Can't assert: would need to check matplotlib's fig and ax 
-        # internals.
+
+    def test_plot_predictions_regression(self):
+        y_true, y_pred = mock_preds(50, seed=123)
+
+        evaluator = RegressionEvaluator()
+        evaluator.fit(y_true, y_pred)
+        evaluator.plot_predictions()
+
+    def test_plot_qq(self):
+        y_true, y_pred = mock_preds(50, seed=631993)
+
+        evaluator = RegressionEvaluator()
+        evaluator.fit(y_true, y_pred)
+        evaluator.plot_qq()
     """
 
     def test_lift_curve_n_bins(self):
@@ -83,9 +91,38 @@ def test_lift_curve_n_bins(self):
 
         n_bins_out = []
         for n_bins in n_bins_test:
-            e = ClassificationEvaluator(n_bins = n_bins)
+            e = ClassificationEvaluator(n_bins=n_bins)
             out = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, e.n_bins)
             lifts = out[1]
             n_bins_out.append(len(lifts))
 
         assert n_bins_test == n_bins_out
+
+    def test_fit_classification(self):
+        y_true, y_pred = mock_preds(50)
+        y_true = (y_true > 0.5).astype(int)  # convert to 0-1 labels
+
+        evaluator = ClassificationEvaluator(n_bins=5)
+        evaluator.fit(y_true, y_pred)
+
+        assert (evaluator.y_true == y_true).all()
+        assert (evaluator.y_pred == y_pred).all()
+        for metric in ["accuracy", "AUC", "precision", "recall",
+                       "F1", "matthews_corrcoef", "lift at {}".format(evaluator.lift_at)]:
+            assert evaluator.scalar_metrics[metric] is not None
+        assert evaluator.roc_curve is not None
+        assert evaluator.confusion_matrix is not None
+        assert evaluator.lift_curve is not None
+        assert evaluator.cumulative_gains is not None
+
+    def test_fit_regression(self):
+        y_true, y_pred = mock_preds(50, seed=789)
+        y_true, y_pred = y_true*10, y_pred*10  # rescale so it looks more regression-like
+        evaluator = RegressionEvaluator()
+        evaluator.fit(y_true, y_pred)
+
+        assert (evaluator.y_true == y_true).all()
+        assert (evaluator.y_pred == y_pred).all()
+        for metric in ["R2", "MAE", "MSE", "RMSE"]:
+            assert evaluator.scalar_metrics[metric] is not None
+        assert evaluator.qq is not None