diff --git a/aif360/metrics/classification_metric.py b/aif360/metrics/classification_metric.py index b2b3b782..731fc351 100644 --- a/aif360/metrics/classification_metric.py +++ b/aif360/metrics/classification_metric.py @@ -6,6 +6,7 @@ from aif360.datasets import BinaryLabelDataset from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset +_LOG_CLIP_EPS = 1e-10 class ClassificationMetric(BinaryLabelDatasetMetric): """Class for computing metrics based on two BinaryLabelDatasets. @@ -862,6 +863,75 @@ def neg_ratio(i, j): return edf_clf - edf_data + def pseudo_r2(self, privileged=None): + r"""McFadden's Pseudo R² for a group. + + .. math:: + + R^2_{McFadden} = 1 - \frac{\ln L_{model}}{\ln L_{null}} + + where :math:`\ln L_{model} = \sum_i [y_i \ln(\hat{p}_i) + (1-y_i)\ln(1-\hat{p}_i)]` + and :math:`\ln L_{null} = \sum_i [y_i \ln(\bar{p}) + (1-y_i)\ln(1-\bar{p})]`, + with :math:`\bar{p}` being the base rate (mean of true labels) for the group. + + Suitable for binary classification where ``scores`` are predicted + probabilities in [0, 1] and ``labels`` are binary (0 or 1). + + Args: + privileged (bool, optional): Boolean prescribing whether to + condition this metric on the `privileged_groups`, if `True`, or + the `unprivileged_groups`, if `False`. Defaults to `None` + meaning this metric is computed over the entire dataset. + + Returns: + numpy.float64: McFadden's Pseudo R². Returns 0.0 if + :math:`\ln L_{null} = 0`. + """ + condition = self._to_condition(privileged) + cond_vec = utils.compute_boolean_conditioning_vector( + self.dataset.protected_attributes, + self.dataset.protected_attribute_names, + condition) + + y_true = np.ravel(self.dataset.labels)[cond_vec] + y_pred = np.ravel(self.classified_dataset.scores)[cond_vec] + + # convert labels to binary 0/1 based on favorable_label + y_true = (y_true == self.dataset.favorable_label).astype(np.float64) + + y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS) + + ll_model = np.sum(y_true * np.log(y_pred_clipped) + + (1 - y_true) * np.log(1 - y_pred_clipped)) + + p_bar = np.mean(y_true) + p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS) + ll_null = np.sum(y_true * np.log(p_bar_clipped) + + (1 - y_true) * np.log(1 - p_bar_clipped)) + + if ll_null == 0: + return np.float64(0.0) + + return np.float64(1.0 - ll_model / ll_null) + + def pseudo_r2_parity(self): + r"""Difference in McFadden's Pseudo R² between unprivileged and + privileged groups. + + .. math:: + + \Delta R^2 = R^2_{\text{unprivileged}} - R^2_{\text{privileged}} + + A value of 0 indicates parity. Positive values indicate the model + fits better for the unprivileged group; negative values indicate + better fit for the privileged group. + + Returns: + numpy.float64: Difference in McFadden's Pseudo R² + (unprivileged − privileged). + """ + return self.difference(self.pseudo_r2) + # ============================== ALIASES =================================== def equal_opportunity_difference(self): """Alias of :meth:`true_positive_rate_difference`.""" diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py index 0ccf2185..649af11d 100644 --- a/aif360/metrics/regression_metric.py +++ b/aif360/metrics/regression_metric.py @@ -98,3 +98,4 @@ def _dcg(self, scores): logs = np.log2(np.arange(2, len(scores)+2)) z = np.sum(scores/logs) return z + diff --git a/tests/test_classification_metric.py b/tests/test_classification_metric.py index 3a32bcb0..fb65d078 100644 --- a/tests/test_classification_metric.py +++ b/tests/test_classification_metric.py @@ -193,5 +193,55 @@ def test_generalized_binary_confusion_matrix(): assert round(gtp,2) == 5.31 gfp = cm.num_generalized_false_positives() assert gfp == 1.09 + + +# --- McFadden's Pseudo R² tests --- +_df_clf = pd.DataFrame({ + 'sex': [1, 1, 1, 1, 0, 0, 0, 0], + 'label': [1, 1, 0, 0, 1, 1, 0, 0], +}) + +_ds_true = BinaryLabelDataset( + df=_df_clf, + label_names=['label'], + protected_attribute_names=['sex'], + favorable_label=1, + unfavorable_label=0, + privileged_protected_attributes=[[1]] +) + +# privileged (sex==1): good predictions → higher R² +# unprivileged (sex==0): poor predictions → lower R² +_ds_pred = _ds_true.copy() +_ds_pred.scores = np.array([[0.9], [0.8], [0.2], [0.1], + [0.6], [0.4], [0.6], [0.4]]) +_ds_pred.labels = np.array([[1], [1], [0], [0], + [1], [0], [1], [0]], dtype=np.float64) + +_m_pseudo = ClassificationMetric( + _ds_true, _ds_pred, + privileged_groups=[{'sex': 1}], + unprivileged_groups=[{'sex': 0}] +) + +def test_pseudo_r2_overall(): + r2 = _m_pseudo.pseudo_r2() + assert isinstance(r2, (float, np.floating)) + assert r2 <= 1.0 + +def test_pseudo_r2_privileged(): + r2 = _m_pseudo.pseudo_r2(privileged=True) + assert isinstance(r2, (float, np.floating)) + assert r2 > 0 # privileged group has better predictions + +def test_pseudo_r2_unprivileged(): + r2 = _m_pseudo.pseudo_r2(privileged=False) + assert isinstance(r2, (float, np.floating)) + +def test_pseudo_r2_parity(): + parity = _m_pseudo.pseudo_r2_parity() + expected = _m_pseudo.pseudo_r2(privileged=False) - _m_pseudo.pseudo_r2(privileged=True) + assert abs(parity - expected) < 1e-9 + \ No newline at end of file diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py index 7829e660..aca243a1 100644 --- a/tests/test_regression_metric.py +++ b/tests/test_regression_metric.py @@ -15,6 +15,7 @@ ['r', 20], ['b', 10], ], columns=['s', 'score']) +df['s'] = df['s'].astype(object) dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']]) # sorted_dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']]) @@ -36,4 +37,4 @@ def test_dcg(): def test_ndcg(): actual = m.discounted_cum_gain(normalized=True, full_dataset=dataset) - expected = 0.9205433036318259 \ No newline at end of file + expected = 0.9205433036318259