diff --git a/cobra/evaluation/evaluator.py b/cobra/evaluation/evaluator.py index b395711..c671f8c 100644 --- a/cobra/evaluation/evaluator.py +++ b/cobra/evaluation/evaluator.py @@ -30,6 +30,10 @@ class ClassificationEvaluator(): Attributes ---------- + y_true : np.ndarray + True binary target data labels. + y_pred : np.ndarray + Target scores of the model. confusion_matrix : np.ndarray Confusion matrix computed for a particular cut-off. cumulative_gains : tuple @@ -51,10 +55,14 @@ class ClassificationEvaluator(): (by default 10, so deciles). """ - def __init__(self, probability_cutoff: float=None, + def __init__(self, + probability_cutoff: float=None, lift_at: float=0.05, n_bins: int = 10): + self.y_true = None + self.y_pred = None + self.lift_at = lift_at self.probability_cutoff = probability_cutoff self.n_bins = n_bins @@ -90,23 +98,26 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray): for pred in y_pred]) # Compute the various evaluation metrics - self.scalar_metrics = ClassificationEvaluator.compute_scalar_metrics( + self.scalar_metrics = ClassificationEvaluator._compute_scalar_metrics( y_true, y_pred, y_pred_b, self.lift_at ) + self.y_true = y_true + self.y_pred = y_pred + self.roc_curve = {"fpr": fpr, "tpr": tpr, "thresholds": thresholds} self.confusion_matrix = confusion_matrix(y_true, y_pred_b) self.lift_curve = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, self.n_bins) self.cumulative_gains = ClassificationEvaluator._compute_cumulative_gains(y_true, y_pred) @staticmethod - def compute_scalar_metrics(y_true: np.ndarray, - y_pred: np.ndarray, - y_pred_b: np.ndarray, - lift_at: float) -> pd.Series: + def _compute_scalar_metrics(y_true: np.ndarray, + y_pred: np.ndarray, + y_pred_b: np.ndarray, + lift_at: float) -> pd.Series: """Convenient function to compute various scalar performance measures and return them in a pd.Series @@ -133,11 +144,10 @@ def compute_scalar_metrics(y_true: np.ndarray, "recall": recall_score(y_true, y_pred_b), "F1": f1_score(y_true, y_pred_b, average=None)[1], "matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b), - "lift at {}".format(lift_at): np.round(Evaluator - ._compute_lift( - y_true=y_true, - y_pred=y_pred, - lift_at=lift_at), 2) + "lift at {}".format(lift_at): np.round(ClassificationEvaluator + ._compute_lift(y_true=y_true, + y_pred=y_pred, + lift_at=lift_at), 2) }) def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)): @@ -351,8 +361,8 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)): plt.show() @staticmethod - def find_optimal_cutoff(y_true: np.ndarray, - y_pred: np.ndarray) -> float: + def _find_optimal_cutoff(y_true: np.ndarray, + y_pred: np.ndarray) -> float: """Find the optimal probability cut off point for a classification model. Wrapper around _compute_optimal_cutoff @@ -524,19 +534,27 @@ def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray, return lift - class RegressionEvaluator(): """Summary Attributes ---------- - evaluation_metrics : dict + y_true : np.ndarray + True binary target data labels. + y_pred : np.ndarray + Target scores of the model. + scalar_metrics : dict Map containing various scalar evaluation metrics (R-squared, MAE, MSE, RMSE) + qq : pd.Series + Theoretical quantiles and associated actual residuals. """ def __init__(self): + self.y_true = None + self.y_pred = None + # Placeholder to store fitted output self.scalar_metrics = None self.qq = None @@ -552,16 +570,18 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray): y_pred : np.ndarray Model scores. """ - # Compute the various evaluation metrics - self.scalar_metrics = RegressionEvaluator.compute_scalar_metrics(y_true, y_pred) + self.scalar_metrics = RegressionEvaluator._compute_scalar_metrics(y_true, y_pred) + + self.y_true = y_true + self.y_pred = y_pred # Compute qq info - self.qq = RegressionEvaluator.compute_qq_residuals(y_true, y_pred) + self.qq = RegressionEvaluator._compute_qq_residuals(y_true, y_pred) @staticmethod - def compute_scalar_metrics(y_true: np.ndarray, - y_pred: np.ndarray) -> pd.Series: + def _compute_scalar_metrics(y_true: np.ndarray, + y_pred: np.ndarray) -> pd.Series: """Convenient function to compute various scalar performance measures and return them in a pd.Series @@ -589,9 +609,9 @@ def compute_scalar_metrics(y_true: np.ndarray, }) @staticmethod - def compute_qq_residuals(y_true: np.ndarray, - y_pred: np.ndarray) -> pd.Series: - """Convenient function to compute various scalar performance measures + def _compute_qq_residuals(y_true: np.ndarray, + y_pred: np.ndarray) -> pd.Series: + """Convenience function to compute various scalar performance measures and return them in a pd.Series Parameters @@ -604,7 +624,7 @@ def compute_qq_residuals(y_true: np.ndarray, Returns ------- pd.Series - ... + theoretical quantiles and associated actual residuals """ ## also possible directly via statsmodels.api.qqplot() @@ -623,21 +643,22 @@ def compute_qq_residuals(y_true: np.ndarray, "residuals": df["z_res"].values, }) - def plot_predictions(self, y_true: np.ndarray, y_pred: np.ndarray, - path: str=None, dim: tuple=(12, 8)): + def plot_predictions(self, path: str=None, dim: tuple=(12, 8)): """Plot predictions from the model against actual values Parameters ---------- - y_true : np.ndarray - True binary target data labels. - y_pred : np.ndarray - Target scores of the model. path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ + if self.y_true is None and self.y_pred is None: + msg = ("This {} instance is not fitted yet. Call 'fit' with " + "appropriate arguments before using this method.") + + y_true = self.y_true + y_pred = self.y_pred with plt.style.context("seaborn-whitegrid"): @@ -682,8 +703,8 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)): x = self.qq["quantiles"] y = self.qq["residuals"] - ax.plot(x, y, "o--.", color="cornflowerblue", linewidth=3) - ax.plot(x, x, "r--.", color="cornflowerblue", linewidth=3) + ax.plot(x, x, "r--", color="darkorange", linewidth=3) + ax.plot(x, y, "o--", color="cornflowerblue", linewidth=3) ax.set_xlabel("Theoretical quantiles", fontsize=15) ax.set_ylabel("Standardized residuals", fontsize=15) diff --git a/cobra/evaluation/pigs_tables.py b/cobra/evaluation/pigs_tables.py index d22ad17..4cf8aa2 100644 --- a/cobra/evaluation/pigs_tables.py +++ b/cobra/evaluation/pigs_tables.py @@ -54,13 +54,13 @@ def compute_pig_table(basetable: pd.DataFrame, Parameters ---------- basetable : pd.DataFrame - input data from which to compute the pig table + Input data from which to compute the pig table. predictor_column_name : str - predictor name of which to compute the pig table + Predictor name of which to compute the pig table. target_column_name : str - name of the target variable + Name of the target variable. id_column_name : str - name of the id column (used to count population size) + Name of the id column (used to count population size). Returns ------- @@ -109,17 +109,17 @@ def plot_incidence(pig_tables: pd.DataFrame, Parameters ---------- pig_tables: pd.DataFrame - dataframe with cleaned, binned, partitioned and prepared data, + Dataframe with cleaned, binned, partitioned and prepared data, as created by generate_pig_tables() from this module. variable: str - name of the predictor variable for which the PIG will be plotted. + Name of the predictor variable for which the PIG will be plotted. model_type: str - type of model (either "classification" or "regression"). + Type of model (either "classification" or "regression"). column_order: list, default=None - explicit order of the value bins of the predictor variable to be used + Explicit order of the value bins of the predictor variable to be used on the PIG. dim: tuple, default=(12, 8) - optional tuple to configure the width and length of the plot. + Optional tuple to configure the width and length of the plot. """ if model_type not in ["classification", "regression"]: raise ValueError("An unexpected value was set for the model_type " diff --git a/tests/evaluation/test_evaluation.py b/tests/evaluation/test_evaluation.py index d08d0dd..07296e6 100644 --- a/tests/evaluation/test_evaluation.py +++ b/tests/evaluation/test_evaluation.py @@ -38,8 +38,8 @@ def test_plot_incidence_with_different_column_orders(self): # different bins than in the data variable: column_order=['1st-4th', '5th-6th', '7th-8th']) - # Stubs for later: requires exposing df_plot and testing matplotlib's - # plot object internals: + # Stubs for later (requires exposing df_plot and testing matplotlib's + # plot object fix and ax internals): """ def test_plot_incidence_without_column_order(self): data = mock_data() @@ -47,7 +47,6 @@ def test_plot_incidence_without_column_order(self): variable='education', model_type="classification", column_order=None) - # Can't assert: df_plot is not exposed by the function def test_plot_incidence_with_column_order(self): data = mock_data() @@ -55,7 +54,6 @@ def test_plot_incidence_with_column_order(self): variable='education', model_type="classification", column_order=['1st-4th', '5th-6th', '7th-8th', '9th']) - # Can't assert: df_plot is not exposed by the function def test_plot_incidence_visual_result_for_classification(self): data = mock_data() @@ -63,17 +61,27 @@ def test_plot_incidence_visual_result_for_classification(self): variable='education', model_type="classification", column_order=['1st-4th', '5th-6th', '7th-8th', '9th']) - # Can't assert: would need to check matplotlib's fig and ax - # internals. def test_plot_incidence_visual_result_for_regression(self): data = mock_data() # change into regression target though. plot_incidence(pig_tables=data, variable='education', - model_type="classification", + model_type="regression", column_order=['1st-4th', '5th-6th', '7th-8th', '9th']) - # Can't assert: would need to check matplotlib's fig and ax - # internals. + + def test_plot_predictions_regression(self): + y_true, y_pred = mock_preds(50, seed=123) + + evaluator = RegressionEvaluator() + evaluator.fit(y_true, y_pred) + evaluator.plot_predictions() + + def test_plot_qq(self): + y_true, y_pred = mock_preds(50, seed=631993) + + evaluator = RegressionEvaluator() + evaluator.fit(y_true, y_pred) + evaluator.plot_qq() """ def test_lift_curve_n_bins(self): @@ -83,9 +91,38 @@ def test_lift_curve_n_bins(self): n_bins_out = [] for n_bins in n_bins_test: - e = ClassificationEvaluator(n_bins = n_bins) + e = ClassificationEvaluator(n_bins=n_bins) out = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, e.n_bins) lifts = out[1] n_bins_out.append(len(lifts)) assert n_bins_test == n_bins_out + + def test_fit_classification(self): + y_true, y_pred = mock_preds(50) + y_true = (y_true > 0.5).astype(int) # convert to 0-1 labels + + evaluator = ClassificationEvaluator(n_bins=5) + evaluator.fit(y_true, y_pred) + + assert (evaluator.y_true == y_true).all() + assert (evaluator.y_pred == y_pred).all() + for metric in ["accuracy", "AUC", "precision", "recall", + "F1", "matthews_corrcoef", "lift at {}".format(evaluator.lift_at)]: + assert evaluator.scalar_metrics[metric] is not None + assert evaluator.roc_curve is not None + assert evaluator.confusion_matrix is not None + assert evaluator.lift_curve is not None + assert evaluator.cumulative_gains is not None + + def test_fit_regression(self): + y_true, y_pred = mock_preds(50, seed=789) + y_true, y_pred = y_true*10, y_pred*10 # rescale so it looks more regression-like + evaluator = RegressionEvaluator() + evaluator.fit(y_true, y_pred) + + assert (evaluator.y_true == y_true).all() + assert (evaluator.y_pred == y_pred).all() + for metric in ["R2", "MAE", "MSE", "RMSE"]: + assert evaluator.scalar_metrics[metric] is not None + assert evaluator.qq is not None