From c96615b8f18b6da5f8d8e2c92f6819f04fdea77a Mon Sep 17 00:00:00 2001 From: noemibuehrer Date: Mon, 2 Mar 2026 11:15:43 +0100 Subject: [PATCH] fix: correct TI evaluation and leave out uncertainty reporting --- src/lyscripts/compute/evidence.py | 25 +++++++++---------------- src/lyscripts/evaluate.py | 25 +++++++++---------------- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py index 1cd51e2..ffce481 100644 --- a/src/lyscripts/compute/evidence.py +++ b/src/lyscripts/compute/evidence.py @@ -48,23 +48,17 @@ def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: def compute_evidence( temp_schedule: np.ndarray, log_probs: np.ndarray, - num: int = 1000, -) -> tuple[float, float]: - """Compute the evidence and its standard deviation. +) -> float: + """Compute the evidence. Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, draw ``num`` "paths" of log-probabilities and compute the evidence - for each using trapezoidal integration. - - The evidence is then the mean of those ``num`` integrations, while the error is - their standard deviation. + ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate + the expectation value under the corresponding power posterior for each step in the + ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the + expectation values over the ``temp_schedule``. """ - integrals = np.zeros(shape=num) - for i in range(num): - rand_idx = RNG.choice(log_probs.shape[1], size=log_probs.shape[0]) - drawn_accuracy = log_probs[np.arange(log_probs.shape[0]), rand_idx].copy() - integrals[i] = trapezoid(y=drawn_accuracy, x=temp_schedule) - return np.mean(integrals), np.std(integrals) + a_mc = np.mean(log_probs, axis=1) + return trapezoid(y=a_mc, x=temp_schedule) def compute_ti_results( @@ -95,9 +89,8 @@ def compute_ti_results( ) ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"] - evidence, evidence_std = compute_evidence(temp_schedule, ti_log_probs) + evidence = compute_evidence(temp_schedule, ti_log_probs) metrics["evidence"] = evidence - metrics["evidence_std"] = evidence_std return temp_schedule, ti_log_probs diff --git a/src/lyscripts/evaluate.py b/src/lyscripts/evaluate.py index d09c5dd..5f9dbd0 100644 --- a/src/lyscripts/evaluate.py +++ b/src/lyscripts/evaluate.py @@ -90,23 +90,17 @@ def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: def compute_evidence( temp_schedule: np.ndarray, log_probs: np.ndarray, - num: int = 1000, -) -> tuple[float, float]: - """Compute the evidence and its standard deviation. +) -> float: + """Compute the evidence. Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, draw ``num`` "paths" of log-probabilities and compute the evidence - for each using trapezoidal integration. - - The evidence is then the mean of those ``num`` integrations, while the error is - their standard deviation. + ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate + the expectation value under the corresponding power posterior for each step in the + ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the + expectation values over the ``temp_schedule``. """ - integrals = np.zeros(shape=num) - for i in range(num): - rand_idx = RNG.choice(log_probs.shape[1], size=log_probs.shape[0]) - drawn_accuracy = log_probs[np.arange(log_probs.shape[0]), rand_idx].copy() - integrals[i] = trapezoid(y=drawn_accuracy, x=temp_schedule) - return np.mean(integrals), np.std(integrals) + a_mc = np.mean(log_probs, axis=1) + return trapezoid(y=a_mc, x=temp_schedule) def compute_ti_results( @@ -134,9 +128,8 @@ def compute_ti_results( reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True) ti_log_probs[i] = reader.get_blobs(flat=True) - evidence, evidence_std = compute_evidence(temp_schedule, ti_log_probs) + evidence = compute_evidence(temp_schedule, ti_log_probs) metrics["evidence"] = evidence - metrics["evidence_std"] = evidence_std return temp_schedule, ti_log_probs