IUCompPath · sarthakpati · Jul 31, 2025 · Jun 10, 2025
@@ -136,45 +136,47 @@ def perform_permutation_test(self) -> None:
         )
 
         for i in tqdm(range(n_methods), desc="Permutation test"):
-            # calculate for unique pairs (i < j)
             for j in range(i + 1, n_methods):
                 # get the ranks for the two methods
-                method_i = ranks_per_metric_sanitized.iloc[i]
-                method_j = ranks_per_metric_sanitized.iloc[j]
-                arr_i = method_i.to_numpy()
-                arr_j = method_j.to_numpy()
-                # # calculate the difference in ranks
-                diff_ranks = arr_i.sum() - arr_j.sum()
-                # initialize an array to store the differences
-                diff_greater = np.zeros(self.n_iterations)
+                arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy()
+                arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy()
+
+                # BUG FIX: Use the absolute difference for a two-sided test
+                observed_diff = abs(arr_i.sum() - arr_j.sum())
+
+                count_extreme = 0
 
                 # perform the permutation test
                 for it in range(self.n_iterations):
-                    # generate a random permutation
-                    r = np.random.randint(0, 2, arr_i.shape)
+                    # generate a random permutation mask
+                    r = np.random.randint(0, 2, size=arr_i.shape, dtype=bool)
 
                     # create a copy of the ranks
                     arr1_rand = arr_i.copy()
                     arr2_rand = arr_j.copy()
 
                     # swap the ranks based on the random permutation
-                    arr1_rand[r == 1], arr2_rand[r == 1] = (
-                        arr_j[r == 1],
-                        arr_i[r == 1],
-                    )
+                    # Note: Using boolean indexing is cleaner and often faster
+                    arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r]
 
                     # calculate the difference in ranks for the random permutation
-                    diff_ranks_rand = arr1_rand.sum() - arr2_rand.sum()
-                    # store the difference if it is greater than the actual difference
-                    if diff_ranks_rand < diff_ranks:
-                        diff_greater[it] = 1
+                    permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum())
+
+                    # BUG FIX: Check if the permuted difference is as or more extreme
+                    if permuted_diff >= observed_diff:
+                        count_extreme += 1
 
                 # calculate the p-value
-                self.pvals[i, j] = diff_greater.sum() / self.n_iterations
+                pval = count_extreme / self.n_iterations
+                self.pvals[i, j] = pval
+                # The p-value is symmetric
+                self.pvals[j, i] = pval
 
         # create a dataframe from the pvals
         self.pvals_df = pd.DataFrame(
-            self.pvals, columns=self.ranks_per_metric["method"]
+            self.pvals,
+            columns=self.ranks_per_metric["method"],
+            index=self.ranks_per_metric["method"],
         )
         self.pvals_df["method"] = self.ranks_per_metric["method"].tolist()
         self.pvals_df = self.pvals_df.set_index("method")