From 96959ab8a0e22d278b1a92f73db37b1474f014d2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 2 Apr 2026 08:42:18 +0000
Subject: [PATCH 1/8] Initial plan


From dd60789643dfe2df15b1e0e1a6407d310288bce5 Mon Sep 17 00:00:00 2001
From: chengpeter88 <peter88620@gmail.com>
Date: Thu, 2 Apr 2026 17:06:48 +0800
Subject: [PATCH 2/8] Add pseudo R2 parity metric to RegressionDatasetMetric

---
 aif360/metrics/regression_metric.py | 57 ++++++++++++++++++++++++++++-
 tests/test_regression_metric.py     | 57 ++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py
index 0ccf2185..93022353 100644
--- a/aif360/metrics/regression_metric.py
+++ b/aif360/metrics/regression_metric.py
@@ -1,5 +1,5 @@
 import numpy as np
-from aif360.metrics import DatasetMetric
+from aif360.metrics import DatasetMetric, utils
 from aif360.datasets import RegressionDataset
 
 
@@ -98,3 +98,58 @@ def _dcg(self, scores):
         logs = np.log2(np.arange(2, len(scores)+2))
         z = np.sum(scores/logs)
         return z
+
+    def pseudo_r2(self, privileged=None):
+        """Compute the Pseudo R² (coefficient of determination) for a group.
+
+        .. math::
+
+           R^2 = 1 - \\frac{SS_{res}}{SS_{tot}}
+
+        where :math:`SS_{res} = \\sum_i (y_i - \\hat{y}_i)^2` and
+        :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`.
+
+        Args:
+            privileged (bool, optional): Boolean prescribing whether to
+                condition this metric on the `privileged_groups`, if `True`, or
+                the `unprivileged_groups`, if `False`. Defaults to `None`
+                meaning this metric is computed over the entire dataset.
+
+        Returns:
+            numpy.float64: Pseudo R² value. Returns 0.0 if
+            :math:`SS_{tot} = 0` (all labels are identical).
+        """
+        condition = self._to_condition(privileged)
+        cond_vec = utils.compute_boolean_conditioning_vector(
+            self.dataset.protected_attributes,
+            self.dataset.protected_attribute_names,
+            condition)
+
+        y_true = np.ravel(self.dataset.labels)[cond_vec]
+        y_pred = np.ravel(self.dataset.scores)[cond_vec]
+
+        ss_res = np.sum((y_true - y_pred) ** 2)
+        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+
+        if ss_tot == 0:
+            return np.float64(0.0)
+
+        return np.float64(1.0 - ss_res / ss_tot)
+
+    def pseudo_r2_parity(self):
+        """Compute the difference in Pseudo R² between unprivileged and
+        privileged groups.
+
+        .. math::
+
+           \\Delta R^2 = R^2_{\\text{unprivileged}} - R^2_{\\text{privileged}}
+
+        A value of 0 indicates perfect fairness; a positive value indicates
+        the model explains more variance for the unprivileged group; a negative
+        value indicates the model explains more variance for the privileged
+        group.
+
+        Returns:
+            numpy.float64: Difference in Pseudo R² (unprivileged − privileged).
+        """
+        return self.difference(self.pseudo_r2)
diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py
index 7829e660..dafca17d 100644
--- a/tests/test_regression_metric.py
+++ b/tests/test_regression_metric.py
@@ -15,6 +15,7 @@
     ['r', 20],
     ['b', 10],
 ], columns=['s', 'score'])
+df['s'] = df['s'].astype(object)
 
 dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']])
 # sorted_dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']])
@@ -36,4 +37,58 @@ def test_dcg():
 
 def test_ndcg():
     actual = m.discounted_cum_gain(normalized=True, full_dataset=dataset)
-    expected = 0.9205433036318259
\ No newline at end of file
+    expected = 0.9205433036318259
+
+
+# --- Pseudo R² tests ---
+# Build a small synthetic dataset with known labels and scores (predictions).
+# privileged group: s == 'r' (mapped to 1), unprivileged group: s == 'b' (mapped to 0)
+_df_r2 = pd.DataFrame({
+    's':      ['r', 'r', 'r', 'b', 'b', 'b'],
+    'label':  [10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
+})
+_df_r2['s'] = _df_r2['s'].astype(object)
+_dataset_r2 = RegressionDataset(
+    _df_r2, dep_var_name='label',
+    protected_attribute_names=['s'],
+    privileged_classes=[['r']]
+)
+# Overwrite scores with imperfect predictions (in the normalized [0,1] space)
+# privileged group (s==1): predictions close to truth → high R²
+# unprivileged group (s==0): predictions less accurate → lower R²
+_preds = np.array([[0.0], [0.1], [0.2], [0.6], [0.9], [1.0]])  # shape (6,1)
+_dataset_r2.scores = _preds
+
+_m_r2 = RegressionDatasetMetric(
+    dataset=_dataset_r2,
+    privileged_groups=[{'s': 1}],
+    unprivileged_groups=[{'s': 0}],
+)
+
+
+def test_pseudo_r2_overall():
+    r2 = _m_r2.pseudo_r2()
+    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
+    assert r2 <= 1.0, f"R² should be <= 1, got {r2}"
+    assert abs(r2 - 0.9142857142857143) < 1e-9, f"Unexpected overall R², got {r2}"
+
+
+def test_pseudo_r2_privileged():
+    r2 = _m_r2.pseudo_r2(privileged=True)
+    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
+    assert abs(r2 - 0.375) < 1e-9, f"Unexpected privileged R², got {r2}"
+
+
+def test_pseudo_r2_unprivileged():
+    r2 = _m_r2.pseudo_r2(privileged=False)
+    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
+    assert abs(r2 - 0.875) < 1e-9, f"Unexpected unprivileged R², got {r2}"
+
+
+def test_pseudo_r2_parity():
+    parity = _m_r2.pseudo_r2_parity()
+    expected = _m_r2.pseudo_r2(privileged=False) - _m_r2.pseudo_r2(privileged=True)
+    assert abs(parity - expected) < 1e-9, (
+        f"pseudo_r2_parity() = {parity}, "
+        f"pseudo_r2(False) - pseudo_r2(True) = {expected}"
+    )
\ No newline at end of file

From 4472a17714a53c84b49f308eb2c1a40a500f2371 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:14:12 +0000
Subject: [PATCH 3/8] Initial plan


From 3830fa1a022ac061c04b33d33bf53d6876611bfa Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:19:14 +0000
Subject: [PATCH 4/8] =?UTF-8?q?Fix=20pseudo=5Fr2()=20to=20use=20McFadden's?=
 =?UTF-8?q?=20Pseudo=20R=C2=B2=20for=20binary=20classification?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Agent-Logs-Url: https://github.com/Zheng-ZhongHeng/AIF360/sessions/b2ffc1d9-9448-4d04-8046-a6a0a58fb375

Co-authored-by: Zheng-ZhongHeng <148734589+Zheng-ZhongHeng@users.noreply.github.com>
---
 aif360/metrics/regression_metric.py | 43 ++++++++++++++++++++++-------
 tests/test_regression_metric.py     | 26 +++++++++--------
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py
index 93022353..ab31bd7b 100644
--- a/aif360/metrics/regression_metric.py
+++ b/aif360/metrics/regression_metric.py
@@ -2,6 +2,8 @@
 from aif360.metrics import DatasetMetric, utils
 from aif360.datasets import RegressionDataset
 
+_LOG_CLIP_EPS = 1e-10
+
 
 class RegressionDatasetMetric(DatasetMetric):
     """Class for computing metrics based on a single
@@ -100,14 +102,31 @@ def _dcg(self, scores):
         return z
 
     def pseudo_r2(self, privileged=None):
-        """Compute the Pseudo R² (coefficient of determination) for a group.
+        """Compute McFadden's Pseudo R² for a group in a binary classification
+        setting.
+
+        .. math::
+
+           R^2_{McFadden} = 1 - \\frac{\\ln L_{model}}{\\ln L_{null}}
+
+        where
 
         .. math::
 
-           R^2 = 1 - \\frac{SS_{res}}{SS_{tot}}
+           \\ln L_{model} = \\sum_i \\left[ y_i \\ln(\\hat{p}_i) +
+               (1 - y_i) \\ln(1 - \\hat{p}_i) \\right]
 
-        where :math:`SS_{res} = \\sum_i (y_i - \\hat{y}_i)^2` and
-        :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`.
+        and
+
+        .. math::
+
+           \\ln L_{null} = \\sum_i \\left[ y_i \\ln(\\bar{p}) +
+               (1 - y_i) \\ln(1 - \\bar{p}) \\right]
+
+        :math:`\\hat{p}_i` are the predicted probabilities from
+        ``dataset.scores`` (values in ``[0, 1]``), :math:`y_i` are the binary
+        true labels (``0`` or ``1``) from ``dataset.labels``, and
+        :math:`\\bar{p}` is the base rate (mean of the labels) for the group.
 
         Args:
             privileged (bool, optional): Boolean prescribing whether to
@@ -116,8 +135,8 @@ def pseudo_r2(self, privileged=None):
                 meaning this metric is computed over the entire dataset.
 
         Returns:
-            numpy.float64: Pseudo R² value. Returns 0.0 if
-            :math:`SS_{tot} = 0` (all labels are identical).
+            numpy.float64: McFadden's Pseudo R² value. Returns ``0.0`` if
+            :math:`\\ln L_{null} = 0` (all labels are identical).
         """
         condition = self._to_condition(privileged)
         cond_vec = utils.compute_boolean_conditioning_vector(
@@ -128,13 +147,17 @@ def pseudo_r2(self, privileged=None):
         y_true = np.ravel(self.dataset.labels)[cond_vec]
         y_pred = np.ravel(self.dataset.scores)[cond_vec]
 
-        ss_res = np.sum((y_true - y_pred) ** 2)
-        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_model = np.sum(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
+
+        p_bar = np.mean(y_true)
+        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_null = np.sum(y_true * np.log(p_bar_clipped) + (1 - y_true) * np.log(1 - p_bar_clipped))
 
-        if ss_tot == 0:
+        if ll_null == 0:
             return np.float64(0.0)
 
-        return np.float64(1.0 - ss_res / ss_tot)
+        return np.float64(1.0 - ll_model / ll_null)
 
     def pseudo_r2_parity(self):
         """Compute the difference in Pseudo R² between unprivileged and
diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py
index dafca17d..9a0bbb6a 100644
--- a/tests/test_regression_metric.py
+++ b/tests/test_regression_metric.py
@@ -40,12 +40,13 @@ def test_ndcg():
     expected = 0.9205433036318259
 
 
-# --- Pseudo R² tests ---
-# Build a small synthetic dataset with known labels and scores (predictions).
+# --- Pseudo R² tests (McFadden's) ---
+# Build a small synthetic dataset with binary labels and probability scores.
 # privileged group: s == 'r' (mapped to 1), unprivileged group: s == 'b' (mapped to 0)
+# labels: binary (0 or 1); scores: predicted probabilities (0~1)
 _df_r2 = pd.DataFrame({
-    's':      ['r', 'r', 'r', 'b', 'b', 'b'],
-    'label':  [10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
+    's':      ['r', 'r', 'r', 'r', 'b', 'b', 'b', 'b'],
+    'label':  [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0],
 })
 _df_r2['s'] = _df_r2['s'].astype(object)
 _dataset_r2 = RegressionDataset(
@@ -53,10 +54,10 @@ def test_ndcg():
     protected_attribute_names=['s'],
     privileged_classes=[['r']]
 )
-# Overwrite scores with imperfect predictions (in the normalized [0,1] space)
-# privileged group (s==1): predictions close to truth → high R²
-# unprivileged group (s==0): predictions less accurate → lower R²
-_preds = np.array([[0.0], [0.1], [0.2], [0.6], [0.9], [1.0]])  # shape (6,1)
+# privileged group: predictions close to true labels → higher McFadden R²
+# unprivileged group: predictions less accurate → lower McFadden R²
+_preds = np.array([[0.9], [0.8], [0.2], [0.1],   # privileged: good predictions
+                   [0.6], [0.4], [0.6], [0.4]])   # unprivileged: poor predictions
 _dataset_r2.scores = _preds
 
 _m_r2 = RegressionDatasetMetric(
@@ -69,20 +70,21 @@ def test_ndcg():
 def test_pseudo_r2_overall():
     r2 = _m_r2.pseudo_r2()
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 <= 1.0, f"R² should be <= 1, got {r2}"
-    assert abs(r2 - 0.9142857142857143) < 1e-9, f"Unexpected overall R², got {r2}"
+    assert r2 <= 1.0, f"McFadden R² should be <= 1, got {r2}"
+    assert abs(r2 - 0.3667937806535049) < 1e-9, f"Unexpected overall McFadden R², got {r2}"
 
 
 def test_pseudo_r2_privileged():
     r2 = _m_r2.pseudo_r2(privileged=True)
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - 0.375) < 1e-9, f"Unexpected privileged R², got {r2}"
+    assert r2 > 0, f"Privileged McFadden R² should be > 0, got {r2}"
+    assert abs(r2 - 0.7630344058337939) < 1e-9, f"Unexpected privileged McFadden R², got {r2}"
 
 
 def test_pseudo_r2_unprivileged():
     r2 = _m_r2.pseudo_r2(privileged=False)
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - 0.875) < 1e-9, f"Unexpected unprivileged R², got {r2}"
+    assert abs(r2 - (-0.029446844526784144)) < 1e-9, f"Unexpected unprivileged McFadden R², got {r2}"
 
 
 def test_pseudo_r2_parity():

From b6591d4fac7e9d25737dfba31424813c1a8de852 Mon Sep 17 00:00:00 2001
From: Peter <s711161120@gm.ntpu.edu.tw>
Date: Thu, 2 Apr 2026 09:19:14 +0000
Subject: [PATCH 5/8] =?UTF-8?q?Fix=20pseudo=5Fr2()=20to=20use=20McFadden's?=
 =?UTF-8?q?=20Pseudo=20R=C2=B2=20for=20binary=20classification?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Agent-Logs-Url: https://github.com/Zheng-ZhongHeng/AIF360/sessions/b2ffc1d9-9448-4d04-8046-a6a0a58fb375

Co-authored-by: Zheng-ZhongHeng <148734589+Zheng-ZhongHeng@users.noreply.github.com>
---
 aif360/metrics/regression_metric.py | 43 ++++++++++++++++++++++-------
 tests/test_regression_metric.py     | 26 +++++++++--------
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py
index 93022353..ab31bd7b 100644
--- a/aif360/metrics/regression_metric.py
+++ b/aif360/metrics/regression_metric.py
@@ -2,6 +2,8 @@
 from aif360.metrics import DatasetMetric, utils
 from aif360.datasets import RegressionDataset
 
+_LOG_CLIP_EPS = 1e-10
+
 
 class RegressionDatasetMetric(DatasetMetric):
     """Class for computing metrics based on a single
@@ -100,14 +102,31 @@ def _dcg(self, scores):
         return z
 
     def pseudo_r2(self, privileged=None):
-        """Compute the Pseudo R² (coefficient of determination) for a group.
+        """Compute McFadden's Pseudo R² for a group in a binary classification
+        setting.
+
+        .. math::
+
+           R^2_{McFadden} = 1 - \\frac{\\ln L_{model}}{\\ln L_{null}}
+
+        where
 
         .. math::
 
-           R^2 = 1 - \\frac{SS_{res}}{SS_{tot}}
+           \\ln L_{model} = \\sum_i \\left[ y_i \\ln(\\hat{p}_i) +
+               (1 - y_i) \\ln(1 - \\hat{p}_i) \\right]
 
-        where :math:`SS_{res} = \\sum_i (y_i - \\hat{y}_i)^2` and
-        :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`.
+        and
+
+        .. math::
+
+           \\ln L_{null} = \\sum_i \\left[ y_i \\ln(\\bar{p}) +
+               (1 - y_i) \\ln(1 - \\bar{p}) \\right]
+
+        :math:`\\hat{p}_i` are the predicted probabilities from
+        ``dataset.scores`` (values in ``[0, 1]``), :math:`y_i` are the binary
+        true labels (``0`` or ``1``) from ``dataset.labels``, and
+        :math:`\\bar{p}` is the base rate (mean of the labels) for the group.
 
         Args:
             privileged (bool, optional): Boolean prescribing whether to
@@ -116,8 +135,8 @@ def pseudo_r2(self, privileged=None):
                 meaning this metric is computed over the entire dataset.
 
         Returns:
-            numpy.float64: Pseudo R² value. Returns 0.0 if
-            :math:`SS_{tot} = 0` (all labels are identical).
+            numpy.float64: McFadden's Pseudo R² value. Returns ``0.0`` if
+            :math:`\\ln L_{null} = 0` (all labels are identical).
         """
         condition = self._to_condition(privileged)
         cond_vec = utils.compute_boolean_conditioning_vector(
@@ -128,13 +147,17 @@ def pseudo_r2(self, privileged=None):
         y_true = np.ravel(self.dataset.labels)[cond_vec]
         y_pred = np.ravel(self.dataset.scores)[cond_vec]
 
-        ss_res = np.sum((y_true - y_pred) ** 2)
-        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_model = np.sum(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
+
+        p_bar = np.mean(y_true)
+        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_null = np.sum(y_true * np.log(p_bar_clipped) + (1 - y_true) * np.log(1 - p_bar_clipped))
 
-        if ss_tot == 0:
+        if ll_null == 0:
             return np.float64(0.0)
 
-        return np.float64(1.0 - ss_res / ss_tot)
+        return np.float64(1.0 - ll_model / ll_null)
 
     def pseudo_r2_parity(self):
         """Compute the difference in Pseudo R² between unprivileged and
diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py
index dafca17d..9a0bbb6a 100644
--- a/tests/test_regression_metric.py
+++ b/tests/test_regression_metric.py
@@ -40,12 +40,13 @@ def test_ndcg():
     expected = 0.9205433036318259
 
 
-# --- Pseudo R² tests ---
-# Build a small synthetic dataset with known labels and scores (predictions).
+# --- Pseudo R² tests (McFadden's) ---
+# Build a small synthetic dataset with binary labels and probability scores.
 # privileged group: s == 'r' (mapped to 1), unprivileged group: s == 'b' (mapped to 0)
+# labels: binary (0 or 1); scores: predicted probabilities (0~1)
 _df_r2 = pd.DataFrame({
-    's':      ['r', 'r', 'r', 'b', 'b', 'b'],
-    'label':  [10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
+    's':      ['r', 'r', 'r', 'r', 'b', 'b', 'b', 'b'],
+    'label':  [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0],
 })
 _df_r2['s'] = _df_r2['s'].astype(object)
 _dataset_r2 = RegressionDataset(
@@ -53,10 +54,10 @@ def test_ndcg():
     protected_attribute_names=['s'],
     privileged_classes=[['r']]
 )
-# Overwrite scores with imperfect predictions (in the normalized [0,1] space)
-# privileged group (s==1): predictions close to truth → high R²
-# unprivileged group (s==0): predictions less accurate → lower R²
-_preds = np.array([[0.0], [0.1], [0.2], [0.6], [0.9], [1.0]])  # shape (6,1)
+# privileged group: predictions close to true labels → higher McFadden R²
+# unprivileged group: predictions less accurate → lower McFadden R²
+_preds = np.array([[0.9], [0.8], [0.2], [0.1],   # privileged: good predictions
+                   [0.6], [0.4], [0.6], [0.4]])   # unprivileged: poor predictions
 _dataset_r2.scores = _preds
 
 _m_r2 = RegressionDatasetMetric(
@@ -69,20 +70,21 @@ def test_ndcg():
 def test_pseudo_r2_overall():
     r2 = _m_r2.pseudo_r2()
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 <= 1.0, f"R² should be <= 1, got {r2}"
-    assert abs(r2 - 0.9142857142857143) < 1e-9, f"Unexpected overall R², got {r2}"
+    assert r2 <= 1.0, f"McFadden R² should be <= 1, got {r2}"
+    assert abs(r2 - 0.3667937806535049) < 1e-9, f"Unexpected overall McFadden R², got {r2}"
 
 
 def test_pseudo_r2_privileged():
     r2 = _m_r2.pseudo_r2(privileged=True)
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - 0.375) < 1e-9, f"Unexpected privileged R², got {r2}"
+    assert r2 > 0, f"Privileged McFadden R² should be > 0, got {r2}"
+    assert abs(r2 - 0.7630344058337939) < 1e-9, f"Unexpected privileged McFadden R², got {r2}"
 
 
 def test_pseudo_r2_unprivileged():
     r2 = _m_r2.pseudo_r2(privileged=False)
     assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - 0.875) < 1e-9, f"Unexpected unprivileged R², got {r2}"
+    assert abs(r2 - (-0.029446844526784144)) < 1e-9, f"Unexpected unprivileged McFadden R², got {r2}"
 
 
 def test_pseudo_r2_parity():

From fa8232cca7a2acbddf08131c57832db4b6d88eb0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:36:25 +0000
Subject: [PATCH 6/8] Initial plan


From f5e54be2d635071c385f813690591cf3ea4ad29e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 2 Apr 2026 09:41:37 +0000
Subject: [PATCH 7/8] Move pseudo_r2 and pseudo_r2_parity from
 RegressionDatasetMetric to ClassificationMetric

Agent-Logs-Url: https://github.com/Zheng-ZhongHeng/AIF360/sessions/45a103f5-09f8-443e-8507-2fc4cf5b2f1e

Co-authored-by: Zheng-ZhongHeng <148734589+Zheng-ZhongHeng@users.noreply.github.com>
---
 aif360/metrics/classification_metric.py | 70 ++++++++++++++++++++++
 aif360/metrics/regression_metric.py     | 79 +------------------------
 tests/test_classification_metric.py     | 50 ++++++++++++++++
 tests/test_regression_metric.py         | 56 ------------------
 4 files changed, 121 insertions(+), 134 deletions(-)

diff --git a/aif360/metrics/classification_metric.py b/aif360/metrics/classification_metric.py
index b2b3b782..731fc351 100644
--- a/aif360/metrics/classification_metric.py
+++ b/aif360/metrics/classification_metric.py
@@ -6,6 +6,7 @@
 from aif360.datasets import BinaryLabelDataset
 from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset
 
+_LOG_CLIP_EPS = 1e-10
 
 class ClassificationMetric(BinaryLabelDatasetMetric):
     """Class for computing metrics based on two BinaryLabelDatasets.
@@ -862,6 +863,75 @@ def neg_ratio(i, j):
 
         return edf_clf - edf_data
 
+    def pseudo_r2(self, privileged=None):
+        r"""McFadden's Pseudo R² for a group.
+
+        .. math::
+
+           R^2_{McFadden} = 1 - \frac{\ln L_{model}}{\ln L_{null}}
+
+        where :math:`\ln L_{model} = \sum_i [y_i \ln(\hat{p}_i) + (1-y_i)\ln(1-\hat{p}_i)]`
+        and :math:`\ln L_{null} = \sum_i [y_i \ln(\bar{p}) + (1-y_i)\ln(1-\bar{p})]`,
+        with :math:`\bar{p}` being the base rate (mean of true labels) for the group.
+
+        Suitable for binary classification where ``scores`` are predicted
+        probabilities in [0, 1] and ``labels`` are binary (0 or 1).
+
+        Args:
+            privileged (bool, optional): Boolean prescribing whether to
+                condition this metric on the `privileged_groups`, if `True`, or
+                the `unprivileged_groups`, if `False`. Defaults to `None`
+                meaning this metric is computed over the entire dataset.
+
+        Returns:
+            numpy.float64: McFadden's Pseudo R². Returns 0.0 if
+            :math:`\ln L_{null} = 0`.
+        """
+        condition = self._to_condition(privileged)
+        cond_vec = utils.compute_boolean_conditioning_vector(
+            self.dataset.protected_attributes,
+            self.dataset.protected_attribute_names,
+            condition)
+
+        y_true = np.ravel(self.dataset.labels)[cond_vec]
+        y_pred = np.ravel(self.classified_dataset.scores)[cond_vec]
+
+        # convert labels to binary 0/1 based on favorable_label
+        y_true = (y_true == self.dataset.favorable_label).astype(np.float64)
+
+        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+
+        ll_model = np.sum(y_true * np.log(y_pred_clipped) +
+                          (1 - y_true) * np.log(1 - y_pred_clipped))
+
+        p_bar = np.mean(y_true)
+        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_null = np.sum(y_true * np.log(p_bar_clipped) +
+                         (1 - y_true) * np.log(1 - p_bar_clipped))
+
+        if ll_null == 0:
+            return np.float64(0.0)
+
+        return np.float64(1.0 - ll_model / ll_null)
+
+    def pseudo_r2_parity(self):
+        r"""Difference in McFadden's Pseudo R² between unprivileged and
+        privileged groups.
+
+        .. math::
+
+           \Delta R^2 = R^2_{\text{unprivileged}} - R^2_{\text{privileged}}
+
+        A value of 0 indicates parity. Positive values indicate the model
+        fits better for the unprivileged group; negative values indicate
+        better fit for the privileged group.
+
+        Returns:
+            numpy.float64: Difference in McFadden's Pseudo R²
+            (unprivileged − privileged).
+        """
+        return self.difference(self.pseudo_r2)
+
     # ============================== ALIASES ===================================
     def equal_opportunity_difference(self):
         """Alias of :meth:`true_positive_rate_difference`."""
diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py
index ab31bd7b..649af11d 100644
--- a/aif360/metrics/regression_metric.py
+++ b/aif360/metrics/regression_metric.py
@@ -1,9 +1,7 @@
 import numpy as np
-from aif360.metrics import DatasetMetric, utils
+from aif360.metrics import DatasetMetric
 from aif360.datasets import RegressionDataset
 
-_LOG_CLIP_EPS = 1e-10
-
 
 class RegressionDatasetMetric(DatasetMetric):
     """Class for computing metrics based on a single
@@ -101,78 +99,3 @@ def _dcg(self, scores):
         z = np.sum(scores/logs)
         return z
 
-    def pseudo_r2(self, privileged=None):
-        """Compute McFadden's Pseudo R² for a group in a binary classification
-        setting.
-
-        .. math::
-
-           R^2_{McFadden} = 1 - \\frac{\\ln L_{model}}{\\ln L_{null}}
-
-        where
-
-        .. math::
-
-           \\ln L_{model} = \\sum_i \\left[ y_i \\ln(\\hat{p}_i) +
-               (1 - y_i) \\ln(1 - \\hat{p}_i) \\right]
-
-        and
-
-        .. math::
-
-           \\ln L_{null} = \\sum_i \\left[ y_i \\ln(\\bar{p}) +
-               (1 - y_i) \\ln(1 - \\bar{p}) \\right]
-
-        :math:`\\hat{p}_i` are the predicted probabilities from
-        ``dataset.scores`` (values in ``[0, 1]``), :math:`y_i` are the binary
-        true labels (``0`` or ``1``) from ``dataset.labels``, and
-        :math:`\\bar{p}` is the base rate (mean of the labels) for the group.
-
-        Args:
-            privileged (bool, optional): Boolean prescribing whether to
-                condition this metric on the `privileged_groups`, if `True`, or
-                the `unprivileged_groups`, if `False`. Defaults to `None`
-                meaning this metric is computed over the entire dataset.
-
-        Returns:
-            numpy.float64: McFadden's Pseudo R² value. Returns ``0.0`` if
-            :math:`\\ln L_{null} = 0` (all labels are identical).
-        """
-        condition = self._to_condition(privileged)
-        cond_vec = utils.compute_boolean_conditioning_vector(
-            self.dataset.protected_attributes,
-            self.dataset.protected_attribute_names,
-            condition)
-
-        y_true = np.ravel(self.dataset.labels)[cond_vec]
-        y_pred = np.ravel(self.dataset.scores)[cond_vec]
-
-        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
-        ll_model = np.sum(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
-
-        p_bar = np.mean(y_true)
-        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
-        ll_null = np.sum(y_true * np.log(p_bar_clipped) + (1 - y_true) * np.log(1 - p_bar_clipped))
-
-        if ll_null == 0:
-            return np.float64(0.0)
-
-        return np.float64(1.0 - ll_model / ll_null)
-
-    def pseudo_r2_parity(self):
-        """Compute the difference in Pseudo R² between unprivileged and
-        privileged groups.
-
-        .. math::
-
-           \\Delta R^2 = R^2_{\\text{unprivileged}} - R^2_{\\text{privileged}}
-
-        A value of 0 indicates perfect fairness; a positive value indicates
-        the model explains more variance for the unprivileged group; a negative
-        value indicates the model explains more variance for the privileged
-        group.
-
-        Returns:
-            numpy.float64: Difference in Pseudo R² (unprivileged − privileged).
-        """
-        return self.difference(self.pseudo_r2)
diff --git a/tests/test_classification_metric.py b/tests/test_classification_metric.py
index 3a32bcb0..fb65d078 100644
--- a/tests/test_classification_metric.py
+++ b/tests/test_classification_metric.py
@@ -193,5 +193,55 @@ def test_generalized_binary_confusion_matrix():
     assert round(gtp,2) == 5.31
     gfp = cm.num_generalized_false_positives()
     assert gfp == 1.09
+
+
+# --- McFadden's Pseudo R² tests ---
+_df_clf = pd.DataFrame({
+    'sex':   [1, 1, 1, 1, 0, 0, 0, 0],
+    'label': [1, 1, 0, 0, 1, 1, 0, 0],
+})
+
+_ds_true = BinaryLabelDataset(
+    df=_df_clf,
+    label_names=['label'],
+    protected_attribute_names=['sex'],
+    favorable_label=1,
+    unfavorable_label=0,
+    privileged_protected_attributes=[[1]]
+)
+
+# privileged (sex==1): good predictions → higher R²
+# unprivileged (sex==0): poor predictions → lower R²
+_ds_pred = _ds_true.copy()
+_ds_pred.scores = np.array([[0.9], [0.8], [0.2], [0.1],
+                             [0.6], [0.4], [0.6], [0.4]])
+_ds_pred.labels = np.array([[1], [1], [0], [0],
+                             [1], [0], [1], [0]], dtype=np.float64)
+
+_m_pseudo = ClassificationMetric(
+    _ds_true, _ds_pred,
+    privileged_groups=[{'sex': 1}],
+    unprivileged_groups=[{'sex': 0}]
+)
+
+def test_pseudo_r2_overall():
+    r2 = _m_pseudo.pseudo_r2()
+    assert isinstance(r2, (float, np.floating))
+    assert r2 <= 1.0
+
+def test_pseudo_r2_privileged():
+    r2 = _m_pseudo.pseudo_r2(privileged=True)
+    assert isinstance(r2, (float, np.floating))
+    assert r2 > 0  # privileged group has better predictions
+
+def test_pseudo_r2_unprivileged():
+    r2 = _m_pseudo.pseudo_r2(privileged=False)
+    assert isinstance(r2, (float, np.floating))
+
+def test_pseudo_r2_parity():
+    parity = _m_pseudo.pseudo_r2_parity()
+    expected = _m_pseudo.pseudo_r2(privileged=False) - _m_pseudo.pseudo_r2(privileged=True)
+    assert abs(parity - expected) < 1e-9
+
    
    
\ No newline at end of file
diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py
index 9a0bbb6a..aca243a1 100644
--- a/tests/test_regression_metric.py
+++ b/tests/test_regression_metric.py
@@ -38,59 +38,3 @@ def test_dcg():
 def test_ndcg():
     actual = m.discounted_cum_gain(normalized=True, full_dataset=dataset)
     expected = 0.9205433036318259
-
-
-# --- Pseudo R² tests (McFadden's) ---
-# Build a small synthetic dataset with binary labels and probability scores.
-# privileged group: s == 'r' (mapped to 1), unprivileged group: s == 'b' (mapped to 0)
-# labels: binary (0 or 1); scores: predicted probabilities (0~1)
-_df_r2 = pd.DataFrame({
-    's':      ['r', 'r', 'r', 'r', 'b', 'b', 'b', 'b'],
-    'label':  [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0],
-})
-_df_r2['s'] = _df_r2['s'].astype(object)
-_dataset_r2 = RegressionDataset(
-    _df_r2, dep_var_name='label',
-    protected_attribute_names=['s'],
-    privileged_classes=[['r']]
-)
-# privileged group: predictions close to true labels → higher McFadden R²
-# unprivileged group: predictions less accurate → lower McFadden R²
-_preds = np.array([[0.9], [0.8], [0.2], [0.1],   # privileged: good predictions
-                   [0.6], [0.4], [0.6], [0.4]])   # unprivileged: poor predictions
-_dataset_r2.scores = _preds
-
-_m_r2 = RegressionDatasetMetric(
-    dataset=_dataset_r2,
-    privileged_groups=[{'s': 1}],
-    unprivileged_groups=[{'s': 0}],
-)
-
-
-def test_pseudo_r2_overall():
-    r2 = _m_r2.pseudo_r2()
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 <= 1.0, f"McFadden R² should be <= 1, got {r2}"
-    assert abs(r2 - 0.3667937806535049) < 1e-9, f"Unexpected overall McFadden R², got {r2}"
-
-
-def test_pseudo_r2_privileged():
-    r2 = _m_r2.pseudo_r2(privileged=True)
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 > 0, f"Privileged McFadden R² should be > 0, got {r2}"
-    assert abs(r2 - 0.7630344058337939) < 1e-9, f"Unexpected privileged McFadden R², got {r2}"
-
-
-def test_pseudo_r2_unprivileged():
-    r2 = _m_r2.pseudo_r2(privileged=False)
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - (-0.029446844526784144)) < 1e-9, f"Unexpected unprivileged McFadden R², got {r2}"
-
-
-def test_pseudo_r2_parity():
-    parity = _m_r2.pseudo_r2_parity()
-    expected = _m_r2.pseudo_r2(privileged=False) - _m_r2.pseudo_r2(privileged=True)
-    assert abs(parity - expected) < 1e-9, (
-        f"pseudo_r2_parity() = {parity}, "
-        f"pseudo_r2(False) - pseudo_r2(True) = {expected}"
-    )
\ No newline at end of file

From 8d1d1c47474f739a9534620e8d5bb564281020c3 Mon Sep 17 00:00:00 2001
From: Peter <s711161120@gm.ntpu.edu.tw>
Date: Thu, 2 Apr 2026 09:41:37 +0000
Subject: [PATCH 8/8] Move pseudo_r2 and pseudo_r2_parity from
 RegressionDatasetMetric to ClassificationMetric

Agent-Logs-Url: https://github.com/Zheng-ZhongHeng/AIF360/sessions/45a103f5-09f8-443e-8507-2fc4cf5b2f1e

Co-authored-by: Zheng-ZhongHeng <148734589+Zheng-ZhongHeng@users.noreply.github.com>
---
 aif360/metrics/classification_metric.py | 70 ++++++++++++++++++++++
 aif360/metrics/regression_metric.py     | 79 +------------------------
 tests/test_classification_metric.py     | 50 ++++++++++++++++
 tests/test_regression_metric.py         | 56 ------------------
 4 files changed, 121 insertions(+), 134 deletions(-)

diff --git a/aif360/metrics/classification_metric.py b/aif360/metrics/classification_metric.py
index b2b3b782..731fc351 100644
--- a/aif360/metrics/classification_metric.py
+++ b/aif360/metrics/classification_metric.py
@@ -6,6 +6,7 @@
 from aif360.datasets import BinaryLabelDataset
 from aif360.datasets.multiclass_label_dataset import MulticlassLabelDataset
 
+_LOG_CLIP_EPS = 1e-10
 
 class ClassificationMetric(BinaryLabelDatasetMetric):
     """Class for computing metrics based on two BinaryLabelDatasets.
@@ -862,6 +863,75 @@ def neg_ratio(i, j):
 
         return edf_clf - edf_data
 
+    def pseudo_r2(self, privileged=None):
+        r"""McFadden's Pseudo R² for a group.
+
+        .. math::
+
+           R^2_{McFadden} = 1 - \frac{\ln L_{model}}{\ln L_{null}}
+
+        where :math:`\ln L_{model} = \sum_i [y_i \ln(\hat{p}_i) + (1-y_i)\ln(1-\hat{p}_i)]`
+        and :math:`\ln L_{null} = \sum_i [y_i \ln(\bar{p}) + (1-y_i)\ln(1-\bar{p})]`,
+        with :math:`\bar{p}` being the base rate (mean of true labels) for the group.
+
+        Suitable for binary classification where ``scores`` are predicted
+        probabilities in [0, 1] and ``labels`` are binary (0 or 1).
+
+        Args:
+            privileged (bool, optional): Boolean prescribing whether to
+                condition this metric on the `privileged_groups`, if `True`, or
+                the `unprivileged_groups`, if `False`. Defaults to `None`
+                meaning this metric is computed over the entire dataset.
+
+        Returns:
+            numpy.float64: McFadden's Pseudo R². Returns 0.0 if
+            :math:`\ln L_{null} = 0`.
+        """
+        condition = self._to_condition(privileged)
+        cond_vec = utils.compute_boolean_conditioning_vector(
+            self.dataset.protected_attributes,
+            self.dataset.protected_attribute_names,
+            condition)
+
+        y_true = np.ravel(self.dataset.labels)[cond_vec]
+        y_pred = np.ravel(self.classified_dataset.scores)[cond_vec]
+
+        # convert labels to binary 0/1 based on favorable_label
+        y_true = (y_true == self.dataset.favorable_label).astype(np.float64)
+
+        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+
+        ll_model = np.sum(y_true * np.log(y_pred_clipped) +
+                          (1 - y_true) * np.log(1 - y_pred_clipped))
+
+        p_bar = np.mean(y_true)
+        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
+        ll_null = np.sum(y_true * np.log(p_bar_clipped) +
+                         (1 - y_true) * np.log(1 - p_bar_clipped))
+
+        if ll_null == 0:
+            return np.float64(0.0)
+
+        return np.float64(1.0 - ll_model / ll_null)
+
+    def pseudo_r2_parity(self):
+        r"""Difference in McFadden's Pseudo R² between unprivileged and
+        privileged groups.
+
+        .. math::
+
+           \Delta R^2 = R^2_{\text{unprivileged}} - R^2_{\text{privileged}}
+
+        A value of 0 indicates parity. Positive values indicate the model
+        fits better for the unprivileged group; negative values indicate
+        better fit for the privileged group.
+
+        Returns:
+            numpy.float64: Difference in McFadden's Pseudo R²
+            (unprivileged − privileged).
+        """
+        return self.difference(self.pseudo_r2)
+
     # ============================== ALIASES ===================================
     def equal_opportunity_difference(self):
         """Alias of :meth:`true_positive_rate_difference`."""
diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py
index ab31bd7b..649af11d 100644
--- a/aif360/metrics/regression_metric.py
+++ b/aif360/metrics/regression_metric.py
@@ -1,9 +1,7 @@
 import numpy as np
-from aif360.metrics import DatasetMetric, utils
+from aif360.metrics import DatasetMetric
 from aif360.datasets import RegressionDataset
 
-_LOG_CLIP_EPS = 1e-10
-
 
 class RegressionDatasetMetric(DatasetMetric):
     """Class for computing metrics based on a single
@@ -101,78 +99,3 @@ def _dcg(self, scores):
         z = np.sum(scores/logs)
         return z
 
-    def pseudo_r2(self, privileged=None):
-        """Compute McFadden's Pseudo R² for a group in a binary classification
-        setting.
-
-        .. math::
-
-           R^2_{McFadden} = 1 - \\frac{\\ln L_{model}}{\\ln L_{null}}
-
-        where
-
-        .. math::
-
-           \\ln L_{model} = \\sum_i \\left[ y_i \\ln(\\hat{p}_i) +
-               (1 - y_i) \\ln(1 - \\hat{p}_i) \\right]
-
-        and
-
-        .. math::
-
-           \\ln L_{null} = \\sum_i \\left[ y_i \\ln(\\bar{p}) +
-               (1 - y_i) \\ln(1 - \\bar{p}) \\right]
-
-        :math:`\\hat{p}_i` are the predicted probabilities from
-        ``dataset.scores`` (values in ``[0, 1]``), :math:`y_i` are the binary
-        true labels (``0`` or ``1``) from ``dataset.labels``, and
-        :math:`\\bar{p}` is the base rate (mean of the labels) for the group.
-
-        Args:
-            privileged (bool, optional): Boolean prescribing whether to
-                condition this metric on the `privileged_groups`, if `True`, or
-                the `unprivileged_groups`, if `False`. Defaults to `None`
-                meaning this metric is computed over the entire dataset.
-
-        Returns:
-            numpy.float64: McFadden's Pseudo R² value. Returns ``0.0`` if
-            :math:`\\ln L_{null} = 0` (all labels are identical).
-        """
-        condition = self._to_condition(privileged)
-        cond_vec = utils.compute_boolean_conditioning_vector(
-            self.dataset.protected_attributes,
-            self.dataset.protected_attribute_names,
-            condition)
-
-        y_true = np.ravel(self.dataset.labels)[cond_vec]
-        y_pred = np.ravel(self.dataset.scores)[cond_vec]
-
-        y_pred_clipped = np.clip(y_pred, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
-        ll_model = np.sum(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
-
-        p_bar = np.mean(y_true)
-        p_bar_clipped = np.clip(p_bar, _LOG_CLIP_EPS, 1 - _LOG_CLIP_EPS)
-        ll_null = np.sum(y_true * np.log(p_bar_clipped) + (1 - y_true) * np.log(1 - p_bar_clipped))
-
-        if ll_null == 0:
-            return np.float64(0.0)
-
-        return np.float64(1.0 - ll_model / ll_null)
-
-    def pseudo_r2_parity(self):
-        """Compute the difference in Pseudo R² between unprivileged and
-        privileged groups.
-
-        .. math::
-
-           \\Delta R^2 = R^2_{\\text{unprivileged}} - R^2_{\\text{privileged}}
-
-        A value of 0 indicates perfect fairness; a positive value indicates
-        the model explains more variance for the unprivileged group; a negative
-        value indicates the model explains more variance for the privileged
-        group.
-
-        Returns:
-            numpy.float64: Difference in Pseudo R² (unprivileged − privileged).
-        """
-        return self.difference(self.pseudo_r2)
diff --git a/tests/test_classification_metric.py b/tests/test_classification_metric.py
index 3a32bcb0..fb65d078 100644
--- a/tests/test_classification_metric.py
+++ b/tests/test_classification_metric.py
@@ -193,5 +193,55 @@ def test_generalized_binary_confusion_matrix():
     assert round(gtp,2) == 5.31
     gfp = cm.num_generalized_false_positives()
     assert gfp == 1.09
+
+
+# --- McFadden's Pseudo R² tests ---
+_df_clf = pd.DataFrame({
+    'sex':   [1, 1, 1, 1, 0, 0, 0, 0],
+    'label': [1, 1, 0, 0, 1, 1, 0, 0],
+})
+
+_ds_true = BinaryLabelDataset(
+    df=_df_clf,
+    label_names=['label'],
+    protected_attribute_names=['sex'],
+    favorable_label=1,
+    unfavorable_label=0,
+    privileged_protected_attributes=[[1]]
+)
+
+# privileged (sex==1): good predictions → higher R²
+# unprivileged (sex==0): poor predictions → lower R²
+_ds_pred = _ds_true.copy()
+_ds_pred.scores = np.array([[0.9], [0.8], [0.2], [0.1],
+                             [0.6], [0.4], [0.6], [0.4]])
+_ds_pred.labels = np.array([[1], [1], [0], [0],
+                             [1], [0], [1], [0]], dtype=np.float64)
+
+_m_pseudo = ClassificationMetric(
+    _ds_true, _ds_pred,
+    privileged_groups=[{'sex': 1}],
+    unprivileged_groups=[{'sex': 0}]
+)
+
+def test_pseudo_r2_overall():
+    r2 = _m_pseudo.pseudo_r2()
+    assert isinstance(r2, (float, np.floating))
+    assert r2 <= 1.0
+
+def test_pseudo_r2_privileged():
+    r2 = _m_pseudo.pseudo_r2(privileged=True)
+    assert isinstance(r2, (float, np.floating))
+    assert r2 > 0  # privileged group has better predictions
+
+def test_pseudo_r2_unprivileged():
+    r2 = _m_pseudo.pseudo_r2(privileged=False)
+    assert isinstance(r2, (float, np.floating))
+
+def test_pseudo_r2_parity():
+    parity = _m_pseudo.pseudo_r2_parity()
+    expected = _m_pseudo.pseudo_r2(privileged=False) - _m_pseudo.pseudo_r2(privileged=True)
+    assert abs(parity - expected) < 1e-9
+
    
    
\ No newline at end of file
diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py
index 9a0bbb6a..aca243a1 100644
--- a/tests/test_regression_metric.py
+++ b/tests/test_regression_metric.py
@@ -38,59 +38,3 @@ def test_dcg():
 def test_ndcg():
     actual = m.discounted_cum_gain(normalized=True, full_dataset=dataset)
     expected = 0.9205433036318259
-
-
-# --- Pseudo R² tests (McFadden's) ---
-# Build a small synthetic dataset with binary labels and probability scores.
-# privileged group: s == 'r' (mapped to 1), unprivileged group: s == 'b' (mapped to 0)
-# labels: binary (0 or 1); scores: predicted probabilities (0~1)
-_df_r2 = pd.DataFrame({
-    's':      ['r', 'r', 'r', 'r', 'b', 'b', 'b', 'b'],
-    'label':  [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0],
-})
-_df_r2['s'] = _df_r2['s'].astype(object)
-_dataset_r2 = RegressionDataset(
-    _df_r2, dep_var_name='label',
-    protected_attribute_names=['s'],
-    privileged_classes=[['r']]
-)
-# privileged group: predictions close to true labels → higher McFadden R²
-# unprivileged group: predictions less accurate → lower McFadden R²
-_preds = np.array([[0.9], [0.8], [0.2], [0.1],   # privileged: good predictions
-                   [0.6], [0.4], [0.6], [0.4]])   # unprivileged: poor predictions
-_dataset_r2.scores = _preds
-
-_m_r2 = RegressionDatasetMetric(
-    dataset=_dataset_r2,
-    privileged_groups=[{'s': 1}],
-    unprivileged_groups=[{'s': 0}],
-)
-
-
-def test_pseudo_r2_overall():
-    r2 = _m_r2.pseudo_r2()
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 <= 1.0, f"McFadden R² should be <= 1, got {r2}"
-    assert abs(r2 - 0.3667937806535049) < 1e-9, f"Unexpected overall McFadden R², got {r2}"
-
-
-def test_pseudo_r2_privileged():
-    r2 = _m_r2.pseudo_r2(privileged=True)
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert r2 > 0, f"Privileged McFadden R² should be > 0, got {r2}"
-    assert abs(r2 - 0.7630344058337939) < 1e-9, f"Unexpected privileged McFadden R², got {r2}"
-
-
-def test_pseudo_r2_unprivileged():
-    r2 = _m_r2.pseudo_r2(privileged=False)
-    assert isinstance(r2, (float, np.floating)), f"Expected float, got {type(r2)}"
-    assert abs(r2 - (-0.029446844526784144)) < 1e-9, f"Unexpected unprivileged McFadden R², got {r2}"
-
-
-def test_pseudo_r2_parity():
-    parity = _m_r2.pseudo_r2_parity()
-    expected = _m_r2.pseudo_r2(privileged=False) - _m_r2.pseudo_r2(privileged=True)
-    assert abs(parity - expected) < 1e-9, (
-        f"pseudo_r2_parity() = {parity}, "
-        f"pseudo_r2(False) - pseudo_r2(True) = {expected}"
-    )
\ No newline at end of file