Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions pyranker/ranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,45 +136,47 @@ def perform_permutation_test(self) -> None:
)

for i in tqdm(range(n_methods), desc="Permutation test"):
# calculate for unique pairs (i < j)
for j in range(i + 1, n_methods):
# get the ranks for the two methods
method_i = ranks_per_metric_sanitized.iloc[i]
method_j = ranks_per_metric_sanitized.iloc[j]
arr_i = method_i.to_numpy()
arr_j = method_j.to_numpy()
# # calculate the difference in ranks
diff_ranks = arr_i.sum() - arr_j.sum()
# initialize an array to store the differences
diff_greater = np.zeros(self.n_iterations)
arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy()
arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy()

# BUG FIX: Use the absolute difference for a two-sided test
observed_diff = abs(arr_i.sum() - arr_j.sum())

count_extreme = 0

# perform the permutation test
for it in range(self.n_iterations):
# generate a random permutation
r = np.random.randint(0, 2, arr_i.shape)
# generate a random permutation mask
r = np.random.randint(0, 2, size=arr_i.shape, dtype=bool)

# create a copy of the ranks
arr1_rand = arr_i.copy()
arr2_rand = arr_j.copy()

# swap the ranks based on the random permutation
arr1_rand[r == 1], arr2_rand[r == 1] = (
arr_j[r == 1],
arr_i[r == 1],
)
# Note: Using boolean indexing is cleaner and often faster
arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r]

# calculate the difference in ranks for the random permutation
diff_ranks_rand = arr1_rand.sum() - arr2_rand.sum()
# store the difference if it is greater than the actual difference
if diff_ranks_rand < diff_ranks:
diff_greater[it] = 1
permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum())

# BUG FIX: Check if the permuted difference is as or more extreme
if permuted_diff >= observed_diff:
count_extreme += 1

# calculate the p-value
self.pvals[i, j] = diff_greater.sum() / self.n_iterations
pval = count_extreme / self.n_iterations
self.pvals[i, j] = pval
# The p-value is symmetric
self.pvals[j, i] = pval

# create a dataframe from the pvals
self.pvals_df = pd.DataFrame(
self.pvals, columns=self.ranks_per_metric["method"]
self.pvals,
columns=self.ranks_per_metric["method"],
index=self.ranks_per_metric["method"],
)
self.pvals_df["method"] = self.ranks_per_metric["method"].tolist()
self.pvals_df = self.pvals_df.set_index("method")
Expand Down