From f93613f2c737839e9426d7a88a1cfe81c3d5c22d Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 25 Jul 2023 13:26:50 +0200
Subject: [PATCH 01/14] Started implementing improved goodness of fit
 implementation

---
 cebra/integrations/sklearn/metrics.py | 77 +++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index ccecaa11..9ef2efb9 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -108,6 +108,83 @@ def infonce_loss(
     return avg_loss
 
 
+def goodness_of_fit_score(
+    cebra_model: cebra_sklearn_cebra.CEBRA,
+    X: Union[npt.NDArray, torch.Tensor],
+    *y,
+    session_id: Optional[int] = None,
+    num_batches: int = 500,
+    correct_by_batchsize: bool = False,
+) -> float:
+    """Compute the InfoNCE loss on a *single session* dataset on the model.
+
+    Args:
+        cebra_model: The model to use to compute the InfoNCE loss on the samples.
+        X: A 2D data matrix, corresponding to a *single session* recording.
+        y: An arbitrary amount of continuous indices passed as 2D matrices, and up to one
+            discrete index passed as a 1D array. Each index has to match the length of ``X``.
+        session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`cebra.CEBRA.num_sessions`
+            for multisession, set to ``None`` for single session.
+        num_batches: The number of iterations to consider to evaluate the model on the new data.
+            Higher values will give a more accurate estimate. Set it to at least 500 iterations.
+    """
+    loss = infonce_loss(cebra_model=cebra_model,
+                        X=X,
+                        *y,
+                        session_id=session_id,
+                        num_batches=500,
+                        correct_by_batchsize=False)
+    return infonce_to_goodness_of_fit(loss, cebra_model)
+
+
+def goodness_of_fit_score(model):
+    infonce = np.array(model.state_dict_["log"]["total"])
+    return infonce_to_goodness_of_fit(infonce, model)
+
+
+def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
+                               model: cebra.CEBRA) -> np.ndarray:
+    """Given a trained CEBRA model, return goodness of fit metric
+
+    The goodness of fit ranges from 0 (lowest meaningful value)
+    to a positive number with the unit "bits", the higher the
+    better.
+
+    Values lower than 0 bits are possible, but these only occur
+    due to numerical effects. A perfectly collapsed embedding
+    (e.g., because the data cannot be fit with the provided
+    auxiliary variables) will have a goodness of fit of 0.
+
+    The conversion between the generalized InfoNCE metric that
+    CEBRA is trained with and the goodness of fit computed with this
+    function is
+
+    .. math::
+
+        S = \log N - \text{InfoNCE}
+
+    Args:
+        model: The trained CEBRA model
+
+    Returns:
+        Numpy array containing the goodness of fit
+        values, measured in bits
+
+    Raises:
+        ``RuntimeError``, if provided model is not
+        fit to data.
+    """
+    if not hasattr(model, "state_dict_"):
+        raise RuntimeError("Fit the CEBRA model first.")
+
+    nats_to_bits = np.log2(np.e)
+    num_sessions = model.num_sessions_
+    if num_sessions is None:
+        num_sessions = 1
+    chance_level = np.log(model.batch_size * (model.num_sessions_ or 1))
+    return (chance_level - infonce) * nats_to_bits
+
+
 def _consistency_scores(
     embeddings: List[Union[npt.NDArray, torch.Tensor]],
     datasets: List[Union[int, str]],

From d87153521c3afda13f1541d5e0fcf52649e8e842 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 27 Oct 2024 18:50:33 +0100
Subject: [PATCH 02/14] add tests and improve implementation

---
 cebra/integrations/sklearn/metrics.py | 70 ++++++++++++++++++++-------
 tests/test_sklearn_metrics.py         | 64 ++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 18 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 9ef2efb9..41dc67ff 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -108,16 +108,15 @@ def infonce_loss(
     return avg_loss
 
 
-def goodness_of_fit_score(
-    cebra_model: cebra_sklearn_cebra.CEBRA,
-    X: Union[npt.NDArray, torch.Tensor],
-    *y,
-    session_id: Optional[int] = None,
-    num_batches: int = 500,
-    correct_by_batchsize: bool = False,
-) -> float:
+def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
+                          X: Union[npt.NDArray, torch.Tensor],
+                          *y,
+                          session_id: Optional[int] = None,
+                          num_batches: int = 500) -> float:
     """Compute the InfoNCE loss on a *single session* dataset on the model.
 
+    This function uses the :func:`infonce_loss` function to compute the InfoNCE loss.
+
     Args:
         cebra_model: The model to use to compute the InfoNCE loss on the samples.
         X: A 2D data matrix, corresponding to a *single session* recording.
@@ -127,23 +126,60 @@ def goodness_of_fit_score(
             for multisession, set to ``None`` for single session.
         num_batches: The number of iterations to consider to evaluate the model on the new data.
             Higher values will give a more accurate estimate. Set it to at least 500 iterations.
+
+    Returns:
+        The average GoF score estimated over ``num_batches`` batches from the data distribution.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(max_iterations=10)
+        >>> gof = cebra.goodness_of_fit_score(cebra_model, neural_data)
     """
-    loss = infonce_loss(cebra_model=cebra_model,
-                        X=X,
+    loss = infonce_loss(cebra_model,
+                        X,
                         *y,
                         session_id=session_id,
-                        num_batches=500,
+                        num_batches=num_batches,
                         correct_by_batchsize=False)
     return infonce_to_goodness_of_fit(loss, cebra_model)
 
 
-def goodness_of_fit_score(model):
+def goodness_of_fit_history(model):
+    """Return the history of the goodness of fit score.
+
+    Args:
+        model: A trained CEBRA model.
+
+    Returns:
+        A numpy array containing the goodness of fit values, measured in bits.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(max_iterations=10)
+        >>> gof_history = cebra.goodness_of_fit_history(cebra_model)
+    """
     infonce = np.array(model.state_dict_["log"]["total"])
     return infonce_to_goodness_of_fit(infonce, model)
 
 
 def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
-                               model: cebra.CEBRA) -> np.ndarray:
+                               model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
     """Given a trained CEBRA model, return goodness of fit metric
 
     The goodness of fit ranges from 0 (lowest meaningful value)
@@ -161,18 +197,16 @@ def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
 
     .. math::
 
-        S = \log N - \text{InfoNCE}
+        S = \\log N - \\text{InfoNCE}
 
     Args:
         model: The trained CEBRA model
 
     Returns:
-        Numpy array containing the goodness of fit
-        values, measured in bits
+        Numpy array containing the goodness of fit values, measured in bits
 
     Raises:
-        ``RuntimeError``, if provided model is not
-        fit to data.
+        ``RuntimeError``, if provided model is not fit to data.
     """
     if not hasattr(model, "state_dict_"):
         raise RuntimeError("Fit the CEBRA model first.")
diff --git a/tests/test_sklearn_metrics.py b/tests/test_sklearn_metrics.py
index 58e12010..eb4d8420 100644
--- a/tests/test_sklearn_metrics.py
+++ b/tests/test_sklearn_metrics.py
@@ -383,3 +383,67 @@ def test_sklearn_runs_consistency():
     with pytest.raises(ValueError, match="Invalid.*embeddings"):
         _, _, _ = cebra_sklearn_metrics.consistency_score(
             invalid_embeddings_runs, between="runs")
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_goodness_of_fit_score(seed):
+    """
+    Ensure that the GoF score is close to 0 for a model fit on random data.
+    """
+    cebra_model = cebra_sklearn_cebra.CEBRA(
+        model_architecture="offset1-model",
+        max_iterations=5,
+        batch_size=512,
+    )
+    X = torch.tensor(np.random.uniform(0, 1, (5000, 50)))
+    y = torch.tensor(np.random.uniform(0, 1, (5000, 5)))
+    cebra_model.fit(X, y)
+    score = cebra_sklearn_metrics.goodness_of_fit_score(cebra_model,
+                                                        X,
+                                                        y,
+                                                        session_id=0,
+                                                        num_batches=500)
+    assert isinstance(score, float)
+    assert np.isclose(score, 0, atol=0.01)
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_goodness_of_fit_history(seed):
+    """
+    Ensure that the GoF score is higher for a model fit on data with underlying
+    structure than for a model fit on random data.
+    """
+
+    # Generate data
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(1000, 50, dtype=torch.float32, generator=generator)
+    y_random = torch.rand(len(X), 5, dtype=torch.float32, generator=generator)
+    linear_map = torch.randn(50, 5, dtype=torch.float32, generator=generator)
+    y_linear = X @ linear_map
+
+    def _fit_and_get_history(X, y):
+        cebra_model = cebra_sklearn_cebra.CEBRA(
+            model_architecture="offset1-model",
+            max_iterations=150,
+            batch_size=512,
+            device="cpu")
+        cebra_model.fit(X, y)
+        history = cebra_sklearn_metrics.goodness_of_fit_history(cebra_model)
+        # NOTE(stes): Ignore the first 5 iterations, they can have nonsensical values
+        # due to numerical issues.
+        return history[5:]
+
+    history_random = _fit_and_get_history(X, y_random)
+    history_linear = _fit_and_get_history(X, y_linear)
+
+    assert isinstance(history_random, np.ndarray)
+    assert history_random.shape[0] > 0
+    # NOTE(stes): Ignore the first 5 iterations, they can have nonsensical values
+    # due to numerical issues.
+    history_random_non_negative = history_random[history_random >= 0]
+    np.testing.assert_allclose(history_random_non_negative, 0, atol=0.05)
+
+    assert isinstance(history_linear, np.ndarray)
+    assert history_linear.shape[0] > 0
+
+    assert np.all(history_linear[-20:] > history_random[-20:])

From 17d31e5ace235fd3070103d168ed3fd047058194 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 27 Oct 2024 19:00:40 +0100
Subject: [PATCH 03/14] Fix examples

---
 cebra/integrations/sklearn/metrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 41dc67ff..4b3c08ea 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -141,7 +141,7 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
         >>> cebra_model = cebra.CEBRA(max_iterations=10)
         >>> cebra_model.fit(neural_data)
         CEBRA(max_iterations=10)
-        >>> gof = cebra.goodness_of_fit_score(cebra_model, neural_data)
+        >>> gof = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model, neural_data)
     """
     loss = infonce_loss(cebra_model,
                         X,
@@ -172,7 +172,7 @@ def goodness_of_fit_history(model):
         >>> cebra_model = cebra.CEBRA(max_iterations=10)
         >>> cebra_model.fit(neural_data)
         CEBRA(max_iterations=10)
-        >>> gof_history = cebra.goodness_of_fit_history(cebra_model)
+        >>> gof_history = cebra.sklearn.metrics.goodness_of_fit_history(cebra_model)
     """
     infonce = np.array(model.state_dict_["log"]["total"])
     return infonce_to_goodness_of_fit(infonce, model)
@@ -215,7 +215,7 @@ def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
     num_sessions = model.num_sessions_
     if num_sessions is None:
         num_sessions = 1
-    chance_level = np.log(model.batch_size * (model.num_sessions_ or 1))
+    chance_level = np.log(model.batch_size * num_sessions)
     return (chance_level - infonce) * nats_to_bits
 
 

From 4f155d83b9e86e157ee3ebbd6075cbecd148a19f Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 27 Oct 2024 19:02:10 +0100
Subject: [PATCH 04/14] Fix docstring error

---
 cebra/integrations/sklearn/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 4b3c08ea..29f7715b 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -206,7 +206,7 @@ def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
         Numpy array containing the goodness of fit values, measured in bits
 
     Raises:
-        ``RuntimeError``, if provided model is not fit to data.
+        RuntimeError: If the provided model is not fit to data.
     """
     if not hasattr(model, "state_dict_"):
         raise RuntimeError("Fit the CEBRA model first.")

From afe25e68b11d9f7eafa667c73e9057bb41f21086 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Mon, 16 Dec 2024 12:12:51 -0500
Subject: [PATCH 05/14] Handle batch size = None for goodness of fit
 computation

---
 cebra/integrations/sklearn/metrics.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 29f7715b..9a1dd5a6 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -138,7 +138,7 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
         >>> import cebra
         >>> import numpy as np
         >>> neural_data = np.random.uniform(0, 1, (1000, 20))
-        >>> cebra_model = cebra.CEBRA(max_iterations=10)
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
         >>> cebra_model.fit(neural_data)
         CEBRA(max_iterations=10)
         >>> gof = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model, neural_data)
@@ -169,7 +169,7 @@ def goodness_of_fit_history(model):
         >>> import cebra
         >>> import numpy as np
         >>> neural_data = np.random.uniform(0, 1, (1000, 20))
-        >>> cebra_model = cebra.CEBRA(max_iterations=10)
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
         >>> cebra_model.fit(neural_data)
         CEBRA(max_iterations=10)
         >>> gof_history = cebra.sklearn.metrics.goodness_of_fit_history(cebra_model)
@@ -210,6 +210,11 @@ def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
     """
     if not hasattr(model, "state_dict_"):
         raise RuntimeError("Fit the CEBRA model first.")
+    if model.batch_size is None:
+        raise ValueError(
+            "Computing the goodness of fit is not yet supported for "
+            "models trained on the full dataset (batchsize = None). " 
+        )
 
     nats_to_bits = np.log2(np.e)
     num_sessions = model.num_sessions_

From caba8c50d1f3a099dc4d6a09ff72f8f9968df2a2 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Mon, 16 Dec 2024 09:28:52 -0800
Subject: [PATCH 06/14] adapt GoF implementation

---
 cebra/integrations/sklearn/metrics.py | 42 ++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 9a1dd5a6..46e3b8ca 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -178,9 +178,11 @@ def goodness_of_fit_history(model):
     return infonce_to_goodness_of_fit(infonce, model)
 
 
-def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
-                               model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
-    """Given a trained CEBRA model, return goodness of fit metric
+def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
+                               model: Optional[cebra_sklearn_cebra.CEBRA] = None,
+                               batch_size: Optional[int] = None,
+                               num_sessions: Optional[int] = None) -> Union[float, np.ndarray]:
+    """Given a trained CEBRA model, return goodness of fit metric.
 
     The goodness of fit ranges from 0 (lowest meaningful value)
     to a positive number with the unit "bits", the higher the
@@ -199,27 +201,41 @@ def infonce_to_goodness_of_fit(infonce: Union[float, Iterable[float]],
 
         S = \\log N - \\text{InfoNCE}
 
+    To use this function, either provide a trained CEBRA model or the
+    batch size and number of sessions.
+
     Args:
+        infonce: The InfoNCE loss, either a single value or an iterable of values.
         model: The trained CEBRA model
+        batch_size: The batch size used to train the model.
+        num_sessions: The number of sessions used to train the model.
 
     Returns:
         Numpy array containing the goodness of fit values, measured in bits
 
     Raises:
         RuntimeError: If the provided model is not fit to data.
+        ValueError: If both ``model`` and ``(batch_size, num_sessions)`` are provided.
     """
-    if not hasattr(model, "state_dict_"):
-        raise RuntimeError("Fit the CEBRA model first.")
-    if model.batch_size is None:
-        raise ValueError(
-            "Computing the goodness of fit is not yet supported for "
-            "models trained on the full dataset (batchsize = None). " 
-        )
+    if model is not None:
+        if batch_size is not None or num_sessions is not None:
+            raise ValueError("batch_size and num_sessions should not be provided if model is provided.")
+        if not hasattr(model, "state_dict_"):
+            raise RuntimeError("Fit the CEBRA model first.")
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). " 
+            )
+        batch_size = model.batch_size
+        num_sessions = model.num_sessions_
+        if num_sessions is None:
+            num_sessions = 1
+    else:
+        if batch_size is None or num_sessions is None:
+            raise ValueError("batch_size should be provided if model is not provided.")
 
     nats_to_bits = np.log2(np.e)
-    num_sessions = model.num_sessions_
-    if num_sessions is None:
-        num_sessions = 1
     chance_level = np.log(model.batch_size * num_sessions)
     return (chance_level - infonce) * nats_to_bits
 

From 3d05a1862c8570ed8999d0a20da1964eedc039f2 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 17 Dec 2024 04:51:10 -0500
Subject: [PATCH 07/14] Fix docstring tests

---
 cebra/integrations/sklearn/metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 46e3b8ca..bf1be3df 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -140,7 +140,7 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
         >>> neural_data = np.random.uniform(0, 1, (1000, 20))
         >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
         >>> cebra_model.fit(neural_data)
-        CEBRA(max_iterations=10)
+        CEBRA(batch_size=512, max_iterations=10)
         >>> gof = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model, neural_data)
     """
     loss = infonce_loss(cebra_model,
@@ -171,7 +171,7 @@ def goodness_of_fit_history(model):
         >>> neural_data = np.random.uniform(0, 1, (1000, 20))
         >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
         >>> cebra_model.fit(neural_data)
-        CEBRA(max_iterations=10)
+        CEBRA(batch_size=512, max_iterations=10)
         >>> gof_history = cebra.sklearn.metrics.goodness_of_fit_history(cebra_model)
     """
     infonce = np.array(model.state_dict_["log"]["total"])

From e577b5ae0ed72b767faf718f21c4e0ac73050391 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 21 Jan 2025 22:59:32 +0100
Subject: [PATCH 08/14] Update docstring for goodness_of_fit_score
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>
---
 cebra/integrations/sklearn/metrics.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index bf1be3df..93ad983f 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -113,9 +113,11 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
                           *y,
                           session_id: Optional[int] = None,
                           num_batches: int = 500) -> float:
-    """Compute the InfoNCE loss on a *single session* dataset on the model.
+    """Compute the goodness of fit score on a *single session* dataset on the model.
 
-    This function uses the :func:`infonce_loss` function to compute the InfoNCE loss.
+    This function uses the :func:`infonce_loss` function to compute the InfoNCE loss
+    for a given `cebra_model` and the :func:`infonce_to_goodness_of_fit` function 
+    to derive the goodness of fit from the InfoNCE loss.
 
     Args:
         cebra_model: The model to use to compute the InfoNCE loss on the samples.

From cab2b2b4d85559d0def9b4e93eb62abd4821ccc3 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 21 Jan 2025 23:00:02 +0100
Subject: [PATCH 09/14] add annotations to goodness_of_fit_history
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>
---
 cebra/integrations/sklearn/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 93ad983f..4f30ba01 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -154,7 +154,7 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
     return infonce_to_goodness_of_fit(loss, cebra_model)
 
 
-def goodness_of_fit_history(model):
+def goodness_of_fit_history(model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
     """Return the history of the goodness of fit score.
 
     Args:

From 1d4276918488602833a5fb464f478c6940603433 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 21 Jan 2025 23:00:16 +0100
Subject: [PATCH 10/14] fix typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>
---
 cebra/integrations/sklearn/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 4f30ba01..d5f9e2c2 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -208,7 +208,7 @@ def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
 
     Args:
         infonce: The InfoNCE loss, either a single value or an iterable of values.
-        model: The trained CEBRA model
+        model: The trained CEBRA model. 
         batch_size: The batch size used to train the model.
         num_sessions: The number of sessions used to train the model.
 

From d6f70e49197d629f4fe0f61e904a039c6a2a68c4 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Tue, 21 Jan 2025 23:00:33 +0100
Subject: [PATCH 11/14] improve err message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>
---
 cebra/integrations/sklearn/metrics.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index d5f9e2c2..a49dd32a 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -235,7 +235,10 @@ def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
             num_sessions = 1
     else:
         if batch_size is None or num_sessions is None:
-            raise ValueError("batch_size should be provided if model is not provided.")
+            raise ValueError(
+                  f"batch_size ({batch_size}) and num_sessions ({num_sessions})"
+                  f"should be provided if model is not provided."
+            )
 
     nats_to_bits = np.log2(np.e)
     chance_level = np.log(model.batch_size * num_sessions)

From bf8694436906fed31bf16a213086787f7f86e5f5 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sat, 25 Jan 2025 00:14:19 +0100
Subject: [PATCH 12/14] make numerical test less conversative

---
 tests/test_sklearn_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sklearn_metrics.py b/tests/test_sklearn_metrics.py
index eb4d8420..ab37e291 100644
--- a/tests/test_sklearn_metrics.py
+++ b/tests/test_sklearn_metrics.py
@@ -441,7 +441,7 @@ def _fit_and_get_history(X, y):
     # NOTE(stes): Ignore the first 5 iterations, they can have nonsensical values
     # due to numerical issues.
     history_random_non_negative = history_random[history_random >= 0]
-    np.testing.assert_allclose(history_random_non_negative, 0, atol=0.05)
+    np.testing.assert_allclose(history_random_non_negative, 0, atol=0.075)
 
     assert isinstance(history_linear, np.ndarray)
     assert history_linear.shape[0] > 0

From fd8e7cdaedf2ab692e1e2d225c6ae1aabae414e1 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sat, 25 Jan 2025 17:26:40 +0100
Subject: [PATCH 13/14] Add tests for exception handling

---
 tests/test_sklearn_metrics.py | 68 +++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/tests/test_sklearn_metrics.py b/tests/test_sklearn_metrics.py
index ab37e291..3a3de159 100644
--- a/tests/test_sklearn_metrics.py
+++ b/tests/test_sklearn_metrics.py
@@ -395,8 +395,9 @@ def test_goodness_of_fit_score(seed):
         max_iterations=5,
         batch_size=512,
     )
-    X = torch.tensor(np.random.uniform(0, 1, (5000, 50)))
-    y = torch.tensor(np.random.uniform(0, 1, (5000, 5)))
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(5000, 50, dtype=torch.float32, generator=generator)
+    y = torch.rand(5000, 5, dtype=torch.float32, generator=generator)
     cebra_model.fit(X, y)
     score = cebra_sklearn_metrics.goodness_of_fit_score(cebra_model,
                                                         X,
@@ -447,3 +448,66 @@ def _fit_and_get_history(X, y):
     assert history_linear.shape[0] > 0
 
     assert np.all(history_linear[-20:] > history_random[-20:])
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_infonce_to_goodness_of_fit(seed):
+    """Test the conversion from InfoNCE loss to goodness of fit metric."""
+    # Test with model
+    cebra_model = cebra_sklearn_cebra.CEBRA(
+        model_architecture="offset10-model",
+        max_iterations=5,
+        batch_size=128,
+    )
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(1000, 50, dtype=torch.float32, generator=generator)
+    cebra_model.fit(X)
+
+    # Test single value
+    gof = cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                           model=cebra_model)
+    assert isinstance(gof, float)
+
+    # Test array of values
+    infonce_values = np.array([1.0, 2.0, 3.0])
+    gof_array = cebra_sklearn_metrics.infonce_to_goodness_of_fit(
+        infonce_values, model=cebra_model)
+    assert isinstance(gof_array, np.ndarray)
+    assert gof_array.shape == infonce_values.shape
+
+    # Test with explicit batch_size and num_sessions
+    gof = cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                           batch_size=128,
+                                                           num_sessions=1)
+    assert isinstance(gof, float)
+
+    # Test error cases
+    with pytest.raises(ValueError, match="batch_size.*should not be provided"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=cebra_model,
+                                                         batch_size=128)
+
+    with pytest.raises(ValueError, match="batch_size.*should not be provided"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=cebra_model,
+                                                         num_sessions=1)
+
+    # Test with unfitted model
+    unfitted_model = cebra_sklearn_cebra.CEBRA()
+    with pytest.raises(RuntimeError, match="Fit the CEBRA model first"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=unfitted_model)
+
+    # Test with model having batch_size=None
+    none_batch_model = cebra_sklearn_cebra.CEBRA(batch_size=None)
+    none_batch_model.fit(X)
+    with pytest.raises(ValueError, match="Computing the goodness of fit"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=none_batch_model)
+
+    # Test missing batch_size or num_sessions when model is None
+    with pytest.raises(ValueError, match="batch_size.*and num_sessions"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0, batch_size=128)
+
+    with pytest.raises(ValueError, match="batch_size.*and num_sessions"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0, num_sessions=1)

From 3771990f3e9345fa523dac1d478051a73965be67 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 2 Feb 2025 16:50:41 +0100
Subject: [PATCH 14/14] fix tests

---
 cebra/integrations/sklearn/metrics.py | 32 ++++++++++++++++-----------
 tests/test_sklearn_metrics.py         |  5 +++--
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index a49dd32a..0af44ecb 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -116,7 +116,7 @@ def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
     """Compute the goodness of fit score on a *single session* dataset on the model.
 
     This function uses the :func:`infonce_loss` function to compute the InfoNCE loss
-    for a given `cebra_model` and the :func:`infonce_to_goodness_of_fit` function 
+    for a given `cebra_model` and the :func:`infonce_to_goodness_of_fit` function
     to derive the goodness of fit from the InfoNCE loss.
 
     Args:
@@ -180,10 +180,11 @@ def goodness_of_fit_history(model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
     return infonce_to_goodness_of_fit(infonce, model)
 
 
-def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
-                               model: Optional[cebra_sklearn_cebra.CEBRA] = None,
-                               batch_size: Optional[int] = None,
-                               num_sessions: Optional[int] = None) -> Union[float, np.ndarray]:
+def infonce_to_goodness_of_fit(
+        infonce: Union[float, np.ndarray],
+        model: Optional[cebra_sklearn_cebra.CEBRA] = None,
+        batch_size: Optional[int] = None,
+        num_sessions: Optional[int] = None) -> Union[float, np.ndarray]:
     """Given a trained CEBRA model, return goodness of fit metric.
 
     The goodness of fit ranges from 0 (lowest meaningful value)
@@ -208,7 +209,7 @@ def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
 
     Args:
         infonce: The InfoNCE loss, either a single value or an iterable of values.
-        model: The trained CEBRA model. 
+        model: The trained CEBRA model.
         batch_size: The batch size used to train the model.
         num_sessions: The number of sessions used to train the model.
 
@@ -221,27 +222,32 @@ def infonce_to_goodness_of_fit(infonce: Union[float, np.ndarray],
     """
     if model is not None:
         if batch_size is not None or num_sessions is not None:
-            raise ValueError("batch_size and num_sessions should not be provided if model is provided.")
+            raise ValueError(
+                "batch_size and num_sessions should not be provided if model is provided."
+            )
         if not hasattr(model, "state_dict_"):
             raise RuntimeError("Fit the CEBRA model first.")
         if model.batch_size is None:
             raise ValueError(
                 "Computing the goodness of fit is not yet supported for "
-                "models trained on the full dataset (batchsize = None). " 
-            )
+                "models trained on the full dataset (batchsize = None). ")
         batch_size = model.batch_size
         num_sessions = model.num_sessions_
         if num_sessions is None:
             num_sessions = 1
+
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). ")
     else:
         if batch_size is None or num_sessions is None:
             raise ValueError(
-                  f"batch_size ({batch_size}) and num_sessions ({num_sessions})"
-                  f"should be provided if model is not provided."
-            )
+                f"batch_size ({batch_size}) and num_sessions ({num_sessions})"
+                f"should be provided if model is not provided.")
 
     nats_to_bits = np.log2(np.e)
-    chance_level = np.log(model.batch_size * num_sessions)
+    chance_level = np.log(batch_size * num_sessions)
     return (chance_level - infonce) * nats_to_bits
 
 
diff --git a/tests/test_sklearn_metrics.py b/tests/test_sklearn_metrics.py
index 3a3de159..4e765ba7 100644
--- a/tests/test_sklearn_metrics.py
+++ b/tests/test_sklearn_metrics.py
@@ -493,13 +493,14 @@ def test_infonce_to_goodness_of_fit(seed):
                                                          num_sessions=1)
 
     # Test with unfitted model
-    unfitted_model = cebra_sklearn_cebra.CEBRA()
+    unfitted_model = cebra_sklearn_cebra.CEBRA(max_iterations=5)
     with pytest.raises(RuntimeError, match="Fit the CEBRA model first"):
         cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
                                                          model=unfitted_model)
 
     # Test with model having batch_size=None
-    none_batch_model = cebra_sklearn_cebra.CEBRA(batch_size=None)
+    none_batch_model = cebra_sklearn_cebra.CEBRA(batch_size=None,
+                                                 max_iterations=5)
     none_batch_model.fit(X)
     with pytest.raises(ValueError, match="Computing the goodness of fit"):
         cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,