Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 53 additions & 32 deletions cobra/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class ClassificationEvaluator():

Attributes
----------
y_true : np.ndarray
True binary target data labels.
y_pred : np.ndarray
Target scores of the model.
confusion_matrix : np.ndarray
Confusion matrix computed for a particular cut-off.
cumulative_gains : tuple
Expand All @@ -51,10 +55,14 @@ class ClassificationEvaluator():
(by default 10, so deciles).
"""

def __init__(self, probability_cutoff: float=None,
def __init__(self,
probability_cutoff: float=None,
lift_at: float=0.05,
n_bins: int = 10):

self.y_true = None
self.y_pred = None

self.lift_at = lift_at
self.probability_cutoff = probability_cutoff
self.n_bins = n_bins
Expand Down Expand Up @@ -90,23 +98,26 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
for pred in y_pred])

# Compute the various evaluation metrics
self.scalar_metrics = ClassificationEvaluator.compute_scalar_metrics(
self.scalar_metrics = ClassificationEvaluator._compute_scalar_metrics(
y_true,
y_pred,
y_pred_b,
self.lift_at
)

self.y_true = y_true
self.y_pred = y_pred

self.roc_curve = {"fpr": fpr, "tpr": tpr, "thresholds": thresholds}
self.confusion_matrix = confusion_matrix(y_true, y_pred_b)
self.lift_curve = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, self.n_bins)
self.cumulative_gains = ClassificationEvaluator._compute_cumulative_gains(y_true, y_pred)

@staticmethod
def compute_scalar_metrics(y_true: np.ndarray,
y_pred: np.ndarray,
y_pred_b: np.ndarray,
lift_at: float) -> pd.Series:
def _compute_scalar_metrics(y_true: np.ndarray,
y_pred: np.ndarray,
y_pred_b: np.ndarray,
lift_at: float) -> pd.Series:
"""Convenient function to compute various scalar performance measures
and return them in a pd.Series

Expand All @@ -133,11 +144,10 @@ def compute_scalar_metrics(y_true: np.ndarray,
"recall": recall_score(y_true, y_pred_b),
"F1": f1_score(y_true, y_pred_b, average=None)[1],
"matthews_corrcoef": matthews_corrcoef(y_true, y_pred_b),
"lift at {}".format(lift_at): np.round(Evaluator
._compute_lift(
y_true=y_true,
y_pred=y_pred,
lift_at=lift_at), 2)
"lift at {}".format(lift_at): np.round(ClassificationEvaluator
._compute_lift(y_true=y_true,
y_pred=y_pred,
lift_at=lift_at), 2)
})

def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
Expand Down Expand Up @@ -351,8 +361,8 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
plt.show()

@staticmethod
def find_optimal_cutoff(y_true: np.ndarray,
y_pred: np.ndarray) -> float:
def _find_optimal_cutoff(y_true: np.ndarray,
y_pred: np.ndarray) -> float:
"""Find the optimal probability cut off point for a
classification model. Wrapper around _compute_optimal_cutoff

Expand Down Expand Up @@ -524,19 +534,27 @@ def _compute_lift(y_true: np.ndarray, y_pred: np.ndarray,
return lift



class RegressionEvaluator():

"""Summary

Attributes
----------
evaluation_metrics : dict
y_true : np.ndarray
True binary target data labels.
y_pred : np.ndarray
Target scores of the model.
scalar_metrics : dict
Map containing various scalar evaluation metrics (R-squared, MAE, MSE, RMSE)
qq : pd.Series
Theoretical quantiles and associated actual residuals.
"""

def __init__(self):

self.y_true = None
self.y_pred = None

# Placeholder to store fitted output
self.scalar_metrics = None
self.qq = None
Expand All @@ -552,16 +570,18 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
y_pred : np.ndarray
Model scores.
"""

# Compute the various evaluation metrics
self.scalar_metrics = RegressionEvaluator.compute_scalar_metrics(y_true, y_pred)
self.scalar_metrics = RegressionEvaluator._compute_scalar_metrics(y_true, y_pred)

self.y_true = y_true
self.y_pred = y_pred

# Compute qq info
self.qq = RegressionEvaluator.compute_qq_residuals(y_true, y_pred)
self.qq = RegressionEvaluator._compute_qq_residuals(y_true, y_pred)

@staticmethod
def compute_scalar_metrics(y_true: np.ndarray,
y_pred: np.ndarray) -> pd.Series:
def _compute_scalar_metrics(y_true: np.ndarray,
y_pred: np.ndarray) -> pd.Series:
"""Convenient function to compute various scalar performance measures
and return them in a pd.Series

Expand Down Expand Up @@ -589,9 +609,9 @@ def compute_scalar_metrics(y_true: np.ndarray,
})

@staticmethod
def compute_qq_residuals(y_true: np.ndarray,
y_pred: np.ndarray) -> pd.Series:
"""Convenient function to compute various scalar performance measures
def _compute_qq_residuals(y_true: np.ndarray,
y_pred: np.ndarray) -> pd.Series:
"""Convenience function to compute various scalar performance measures
and return them in a pd.Series

Parameters
Expand All @@ -604,7 +624,7 @@ def compute_qq_residuals(y_true: np.ndarray,
Returns
-------
pd.Series
...
theoretical quantiles and associated actual residuals
"""
## also possible directly via statsmodels.api.qqplot()

Expand All @@ -623,21 +643,22 @@ def compute_qq_residuals(y_true: np.ndarray,
"residuals": df["z_res"].values,
})

def plot_predictions(self, y_true: np.ndarray, y_pred: np.ndarray,
path: str=None, dim: tuple=(12, 8)):
def plot_predictions(self, path: str=None, dim: tuple=(12, 8)):
"""Plot predictions from the model against actual values

Parameters
----------
y_true : np.ndarray
True binary target data labels.
y_pred : np.ndarray
Target scores of the model.
path : str, optional
Path to store the figure.
dim : tuple, optional
Tuple with width and length of the plot.
"""
if self.y_true is None and self.y_pred is None:
msg = ("This {} instance is not fitted yet. Call 'fit' with "
"appropriate arguments before using this method.")

y_true = self.y_true
y_pred = self.y_pred

with plt.style.context("seaborn-whitegrid"):

Expand Down Expand Up @@ -682,8 +703,8 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
x = self.qq["quantiles"]
y = self.qq["residuals"]

ax.plot(x, y, "o--.", color="cornflowerblue", linewidth=3)
ax.plot(x, x, "r--.", color="cornflowerblue", linewidth=3)
ax.plot(x, x, "r--", color="darkorange", linewidth=3)
ax.plot(x, y, "o--", color="cornflowerblue", linewidth=3)

ax.set_xlabel("Theoretical quantiles", fontsize=15)
ax.set_ylabel("Standardized residuals", fontsize=15)
Expand Down
18 changes: 9 additions & 9 deletions cobra/evaluation/pigs_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def compute_pig_table(basetable: pd.DataFrame,
Parameters
----------
basetable : pd.DataFrame
input data from which to compute the pig table
Input data from which to compute the pig table.
predictor_column_name : str
predictor name of which to compute the pig table
Predictor name of which to compute the pig table.
target_column_name : str
name of the target variable
Name of the target variable.
id_column_name : str
name of the id column (used to count population size)
Name of the id column (used to count population size).

Returns
-------
Expand Down Expand Up @@ -109,17 +109,17 @@ def plot_incidence(pig_tables: pd.DataFrame,
Parameters
----------
pig_tables: pd.DataFrame
dataframe with cleaned, binned, partitioned and prepared data,
Dataframe with cleaned, binned, partitioned and prepared data,
as created by generate_pig_tables() from this module.
variable: str
name of the predictor variable for which the PIG will be plotted.
Name of the predictor variable for which the PIG will be plotted.
model_type: str
type of model (either "classification" or "regression").
Type of model (either "classification" or "regression").
column_order: list, default=None
explicit order of the value bins of the predictor variable to be used
Explicit order of the value bins of the predictor variable to be used
on the PIG.
dim: tuple, default=(12, 8)
optional tuple to configure the width and length of the plot.
Optional tuple to configure the width and length of the plot.
"""
if model_type not in ["classification", "regression"]:
raise ValueError("An unexpected value was set for the model_type "
Expand Down
57 changes: 47 additions & 10 deletions tests/evaluation/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,42 +38,50 @@ def test_plot_incidence_with_different_column_orders(self):
# different bins than in the data variable:
column_order=['1st-4th', '5th-6th', '7th-8th'])

# Stubs for later: requires exposing df_plot and testing matplotlib's
# plot object internals:
# Stubs for later (requires exposing df_plot and testing matplotlib's
# plot object fix and ax internals):
"""
def test_plot_incidence_without_column_order(self):
data = mock_data()
plot_incidence(pig_tables=data,
variable='education',
model_type="classification",
column_order=None)
# Can't assert: df_plot is not exposed by the function

def test_plot_incidence_with_column_order(self):
data = mock_data()
plot_incidence(pig_tables=data,
variable='education',
model_type="classification",
column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
# Can't assert: df_plot is not exposed by the function

def test_plot_incidence_visual_result_for_classification(self):
data = mock_data()
plot_incidence(pig_tables=data,
variable='education',
model_type="classification",
column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
# Can't assert: would need to check matplotlib's fig and ax
# internals.

def test_plot_incidence_visual_result_for_regression(self):
data = mock_data() # change into regression target though.
plot_incidence(pig_tables=data,
variable='education',
model_type="classification",
model_type="regression",
column_order=['1st-4th', '5th-6th', '7th-8th', '9th'])
# Can't assert: would need to check matplotlib's fig and ax
# internals.

def test_plot_predictions_regression(self):
y_true, y_pred = mock_preds(50, seed=123)

evaluator = RegressionEvaluator()
evaluator.fit(y_true, y_pred)
evaluator.plot_predictions()

def test_plot_qq(self):
y_true, y_pred = mock_preds(50, seed=631993)

evaluator = RegressionEvaluator()
evaluator.fit(y_true, y_pred)
evaluator.plot_qq()
"""

def test_lift_curve_n_bins(self):
Expand All @@ -83,9 +91,38 @@ def test_lift_curve_n_bins(self):

n_bins_out = []
for n_bins in n_bins_test:
e = ClassificationEvaluator(n_bins = n_bins)
e = ClassificationEvaluator(n_bins=n_bins)
out = ClassificationEvaluator._compute_lift_per_bin(y_true, y_pred, e.n_bins)
lifts = out[1]
n_bins_out.append(len(lifts))

assert n_bins_test == n_bins_out

def test_fit_classification(self):
y_true, y_pred = mock_preds(50)
y_true = (y_true > 0.5).astype(int) # convert to 0-1 labels

evaluator = ClassificationEvaluator(n_bins=5)
evaluator.fit(y_true, y_pred)

assert (evaluator.y_true == y_true).all()
assert (evaluator.y_pred == y_pred).all()
for metric in ["accuracy", "AUC", "precision", "recall",
"F1", "matthews_corrcoef", "lift at {}".format(evaluator.lift_at)]:
assert evaluator.scalar_metrics[metric] is not None
assert evaluator.roc_curve is not None
assert evaluator.confusion_matrix is not None
assert evaluator.lift_curve is not None
assert evaluator.cumulative_gains is not None

def test_fit_regression(self):
y_true, y_pred = mock_preds(50, seed=789)
y_true, y_pred = y_true*10, y_pred*10 # rescale so it looks more regression-like
evaluator = RegressionEvaluator()
evaluator.fit(y_true, y_pred)

assert (evaluator.y_true == y_true).all()
assert (evaluator.y_pred == y_pred).all()
for metric in ["R2", "MAE", "MSE", "RMSE"]:
assert evaluator.scalar_metrics[metric] is not None
assert evaluator.qq is not None