From 230c67be0fd5a6781ec4f8d71f8374dd37e3868c Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Tue, 10 Jun 2025 13:51:12 -0400 Subject: [PATCH 01/11] Update ranker.py fixing bug for extremes --- pyranker/ranker.py | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 355ce4d..7b28354 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -136,45 +136,47 @@ def perform_permutation_test(self) -> None: ) for i in tqdm(range(n_methods), desc="Permutation test"): - # calculate for unique pairs (i < j) for j in range(i + 1, n_methods): # get the ranks for the two methods - method_i = ranks_per_metric_sanitized.iloc[i] - method_j = ranks_per_metric_sanitized.iloc[j] - arr_i = method_i.to_numpy() - arr_j = method_j.to_numpy() - # # calculate the difference in ranks - diff_ranks = arr_i.sum() - arr_j.sum() - # initialize an array to store the differences - diff_greater = np.zeros(self.n_iterations) + arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() + arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() + + # BUG FIX: Use the absolute difference for a two-sided test + observed_diff = abs(arr_i.sum() - arr_j.sum()) + + count_extreme = 0 # perform the permutation test for it in range(self.n_iterations): - # generate a random permutation - r = np.random.randint(0, 2, arr_i.shape) + # generate a random permutation mask + r = np.random.randint(0, 2, size=arr_i.shape, dtype=bool) # create a copy of the ranks arr1_rand = arr_i.copy() arr2_rand = arr_j.copy() # swap the ranks based on the random permutation - arr1_rand[r == 1], arr2_rand[r == 1] = ( - arr_j[r == 1], - arr_i[r == 1], - ) + # Note: Using boolean indexing is cleaner and often faster + arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r] # calculate the difference in ranks for the random permutation - diff_ranks_rand = arr1_rand.sum() - arr2_rand.sum() - # store the difference if it is greater than the actual difference - if diff_ranks_rand < diff_ranks: - diff_greater[it] = 1 + permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum()) + + # BUG FIX: Check if the permuted difference is as or more extreme + if permuted_diff >= observed_diff: + count_extreme += 1 # calculate the p-value - self.pvals[i, j] = diff_greater.sum() / self.n_iterations + pval = count_extreme / self.n_iterations + self.pvals[i, j] = pval + # The p-value is symmetric + self.pvals[j, i] = pval # create a dataframe from the pvals self.pvals_df = pd.DataFrame( - self.pvals, columns=self.ranks_per_metric["method"] + self.pvals, + columns=self.ranks_per_metric["method"], + index=self.ranks_per_metric["method"], ) self.pvals_df["method"] = self.ranks_per_metric["method"].tolist() self.pvals_df = self.pvals_df.set_index("method") From 8daf191eeb9f9c99830c73fa3600849d47269b8e Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Fri, 15 Aug 2025 17:02:58 -0400 Subject: [PATCH 02/11] added final ranking save to csv first --- pyranker/ranker.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 6bfc27e..7cca551 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -1,6 +1,7 @@ from typing import Dict, List, Tuple -import pandas as pd + import numpy as np +import pandas as pd from tqdm import tqdm @@ -28,6 +29,7 @@ def __init__( # dict of lists with the key being ${metric}_${subjectid} and value being the list of scores self.combined_scores_per_subject = {} + self.participant_ranks = {} self.combine_csvs_and_scores() @@ -55,6 +57,12 @@ def combine_csvs_and_scores(self) -> None: metrics_columns.sort() current_df = current_df[["subjectid"] + metrics_columns] + # calculate ranks for each metric + for metric in metrics_columns: + current_df[metric] = current_df[metric].rank( + method=self.ranking_method, ascending=False + ) + # convert to a single row df with unique column names based on subjectid column current_df_flattened = {"method": method} for _, row in current_df.iterrows(): @@ -74,11 +82,11 @@ def combine_csvs_and_scores(self) -> None: def rank_methods(self) -> None: """ - Rank the methods based on the metrics. + Rank the methods based on each metric and calculate the combined final rank. """ # calculate rank per metric self.ranks_per_metric = self.metrics_per_subject.rank( - method=self.ranking_method, ascending=False, numeric_only=True + method=self.ranking_method, ascending=True, numeric_only=True ) # ensure all metrics are lowercase to avoid case sensitivity @@ -94,10 +102,8 @@ def rank_methods(self) -> None: - self.ranks_per_metric[column] ) - # calculate cumulative rank by summing the ranks of all metrics and dividing by the number of metrics - cumulative_rank_column = self.ranks_per_metric.sum(axis=1) / len( - self.ranks_per_metric.columns - ) + # calculate cumulative rank by summing the ranks of all metrics + cumulative_rank_column = self.ranks_per_metric.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True ) @@ -145,10 +151,10 @@ def perform_permutation_test(self) -> None: # get the ranks for the two methods arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() - + # BUG FIX: Use the absolute difference for a two-sided test observed_diff = abs(arr_i.sum() - arr_j.sum()) - + count_extreme = 0 # perform the permutation test @@ -166,14 +172,14 @@ def perform_permutation_test(self) -> None: # calculate the difference in ranks for the random permutation permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum()) - + # BUG FIX: Check if the permuted difference is as or more extreme if permuted_diff >= observed_diff: count_extreme += 1 # Check if count_extreme is still zero, which would create pval=0. # A p-value of 0 implies absolute certainty, which is unrealistic given the finite - # number of permutations. To avoid this, we adjust count_extreme to ensure a + # number of permutations. To avoid this, we adjust count_extreme to ensure a # conservative estimate of the p-value, aligning with standard statistical practices. if count_extreme == 0: count_extreme += 1 @@ -199,4 +205,11 @@ def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: Returns: Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the rankings and p-values dataframes. """ - return self.ranks_per_metric, self.pvals_df + # sort ranks by final_rank + ranks_df = self.ranks_per_metric.sort_values(by="final_rank").reset_index( + drop=True + ) + # sort pvals by the same order as ranks + pvals_df = self.pvals_df.reindex(index=ranks_df["method"], columns=ranks_df["method"]) + + return ranks_df, pvals_df From 0fec488adbc49c352a222e3e366e64eb9b2b85ef Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Mon, 18 Aug 2025 13:41:50 -0400 Subject: [PATCH 03/11] formatting --- pyranker/ranker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 7cca551..5fa4ad1 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -210,6 +210,8 @@ def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: drop=True ) # sort pvals by the same order as ranks - pvals_df = self.pvals_df.reindex(index=ranks_df["method"], columns=ranks_df["method"]) + pvals_df = self.pvals_df.reindex( + index=ranks_df["method"], columns=ranks_df["method"] + ) return ranks_df, pvals_df From 46d788ec14e1de8ec35293c93a6115296a0f9eed Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Mon, 18 Aug 2025 15:02:50 -0400 Subject: [PATCH 04/11] might or might not work --- pyranker/cli/run.py | 23 +++++- pyranker/ranker.py | 176 +++++++++++++++++++++++++------------------- 2 files changed, 121 insertions(+), 78 deletions(-) diff --git a/pyranker/cli/run.py b/pyranker/cli/run.py index 5d11c58..244f92a 100644 --- a/pyranker/cli/run.py +++ b/pyranker/cli/run.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from typing import Optional @@ -168,7 +169,7 @@ def main( "--iterations", help="The number of iterations to perform for the permutation test.", ), - ] = 1000, + ] = 100000, ranking_method: Annotated[ str, typer.Option( @@ -177,6 +178,14 @@ def main( help="The method to use for ranking the methods; one of 'average', 'min', 'max', 'first', 'dense'.", ), ] = "average", + n_jobs: Annotated[ + int, + typer.Option( + "-j", + "--n-jobs", + help="The number of CPU cores to use for parallel processing.", + ), + ] = 4, version: Annotated[ Optional[bool], typer.Option( @@ -195,9 +204,9 @@ def main( csvs_to_compare_with_full_path = get_csv_paths(input) # basic sanity checks - assert ( - len(csvs_to_compare_with_full_path) > 1 - ), "At least two methods are required for comparison" + assert len(csvs_to_compare_with_full_path) > 1, ( + "At least two methods are required for comparison" + ) ranking_method = ranking_method.lower() assert ranking_method in [ "average", @@ -208,6 +217,11 @@ def main( ], "Invalid ranking method" assert iterations > 0, "Number of iterations must be greater than 0" + # Assert that the number of jobs is not greater than the number of cores + assert n_jobs <= os.cpu_count(), ( + "Number of jobs cannot be greater than the number of cores" + ) + # convert the metrics_for_reversal to a list metrics_for_reversal_list = ( metrics_for_reversal.split(",") if metrics_for_reversal else [] @@ -227,6 +241,7 @@ def main( metrics_for_reversal=metrics_for_reversal_list, n_iterations=iterations, ranking_method=ranking_method, + n_jobs=n_jobs, ) ranks, pvals = ranker.get_rankings_and_pvals() Path(outputdir).mkdir(parents=True, exist_ok=True) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 5fa4ad1..bf1a58f 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -1,3 +1,5 @@ +import os +from concurrent.futures import ProcessPoolExecutor, as_completed from typing import Dict, List, Tuple import numpy as np @@ -5,6 +7,57 @@ from tqdm import tqdm +# This worker function is defined at the top level so it can be pickled +# and sent to other processes by the ProcessPoolExecutor. +def _calculate_pval_for_pair( + arr_i: np.ndarray, arr_j: np.ndarray, n_iterations: int +) -> float: + """ + Performs the permutation test for a single pair of rank arrays. + + Args: + arr_i (np.ndarray): Rank array for the first method. + arr_j (np.ndarray): Rank array for the second method. + n_iterations (int): The number of permutation iterations. + + Returns: + float: The calculated p-value. + """ + # Use the absolute difference for a two-sided test + observed_diff = abs(arr_i.sum() - arr_j.sum()) + count_extreme = 0 + + # Create a local random number generator for thread-safety + rng = np.random.default_rng() + + # Perform the permutation test + for _ in range(n_iterations): + # Generate a random permutation mask + r = rng.integers(0, 2, size=arr_i.shape, dtype=bool) + + # Create a copy of the ranks + arr1_rand = arr_i.copy() + arr2_rand = arr_j.copy() + + # Swap the ranks based on the random permutation + arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r] + + # Calculate the difference in ranks for the random permutation + permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum()) + + # Check if the permuted difference is as or more extreme + if permuted_diff >= observed_diff: + count_extreme += 1 + + # To avoid p-values of 0, which implies absolute certainty, we add 1 + # if no permuted difference was more extreme than the observed one. + if count_extreme == 0: + count_extreme = 1 + + pval = count_extreme / n_iterations + return pval + + class Ranker: def __init__( self, @@ -12,6 +65,7 @@ def __init__( metrics_for_reversal: List[str], n_iterations: int = 1000, ranking_method: str = "average", + n_jobs: int = 4, # New argument for number of CPU cores ) -> None: """ Ranker class to compare the scores of different methods. @@ -21,13 +75,19 @@ def __init__( metrics_for_reversal (List[str]): The metrics for which the reversal should be calculated. n_iterations (int): The number of iterations to perform for the permutation test. ranking_method (str): The method to use for ranking the methods. + n_jobs (int): The number of CPU cores to use for parallel processing. Defaults to 4. + Set to -1 to use all available cores. """ self.input_csvs_to_compare = input_csvs_to_compare self.metrics_for_reversal = metrics_for_reversal self.n_iterations = n_iterations self.ranking_method = ranking_method - # dict of lists with the key being ${metric}_${subjectid} and value being the list of scores + if n_jobs == -1: + self.n_jobs = os.cpu_count() + else: + self.n_jobs = n_jobs + self.combined_scores_per_subject = {} self.participant_ranks = {} @@ -38,32 +98,24 @@ def combine_csvs_and_scores(self) -> None: Combine the CSVs and scores of the methods. """ self.combined_scores_per_subject["method"] = [] - - # create a dataframe to store the metrics per subject self.metrics_per_subject = pd.DataFrame() for method in self.input_csvs_to_compare: self.combined_scores_per_subject["method"].append(method) current_df = pd.read_csv(self.input_csvs_to_compare[method]) - # ensure all columns are lowercase to avoid case sensitivity current_df.columns = current_df.columns.str.lower() - - # sort along subjectid column to ensure that metrics are in the same order current_df = current_df.sort_values(by="subjectid") - # sort metrics columns to ensure that metrics are in the same order - metrics_columns = current_df.columns.tolist() - metrics_columns.remove("subjectid") - metrics_columns.sort() + metrics_columns = sorted( + [col for col in current_df.columns if col != "subjectid"] + ) current_df = current_df[["subjectid"] + metrics_columns] - # calculate ranks for each metric for metric in metrics_columns: current_df[metric] = current_df[metric].rank( method=self.ranking_method, ascending=False ) - # convert to a single row df with unique column names based on subjectid column current_df_flattened = {"method": method} for _, row in current_df.iterrows(): for metric in current_df.columns: @@ -72,10 +124,11 @@ def combine_csvs_and_scores(self) -> None: metric ] - # convert to a dataframe and append to the metrics_per_subject dataframe current_df_flattened = pd.DataFrame(current_df_flattened, index=[0]) self.metrics_per_subject = pd.concat( - [self.metrics_per_subject, current_df_flattened], axis=0 + [self.metrics_per_subject, current_df_flattened], + axis=0, + ignore_index=True, ) self.rank_methods() @@ -84,15 +137,12 @@ def rank_methods(self) -> None: """ Rank the methods based on each metric and calculate the combined final rank. """ - # calculate rank per metric self.ranks_per_metric = self.metrics_per_subject.rank( method=self.ranking_method, ascending=True, numeric_only=True ) - # ensure all metrics are lowercase to avoid case sensitivity metrics_for_reversal_lower = [x.lower() for x in self.metrics_for_reversal] - # reverse the ranks for the metrics that need reversal for metric in metrics_for_reversal_lower: for column in self.ranks_per_metric.columns: if metric in column: @@ -102,12 +152,11 @@ def rank_methods(self) -> None: - self.ranks_per_metric[column] ) - # calculate cumulative rank by summing the ranks of all metrics cumulative_rank_column = self.ranks_per_metric.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True ) - # combine cumulative_rank_column, final_rank_column, and method column to the ranks_per_metric dataframe + self.ranks_per_metric = pd.concat( [ self.ranks_per_metric, @@ -118,7 +167,6 @@ def rank_methods(self) -> None: axis=1, ) - # reorder columns to put method, final_rank, cumulative_rank in the beginning self.ranks_per_metric = self.ranks_per_metric[ ["method", "final_rank", "cumulative_rank"] + [ @@ -132,12 +180,11 @@ def rank_methods(self) -> None: def perform_permutation_test(self) -> None: """ - Perform permutation test to determine the significance of the ranks. + Perform permutation test in parallel to determine the significance of the ranks. """ n_methods = len(self.ranks_per_metric) self.pvals = np.zeros((n_methods, n_methods)) - # sort in order of cumulative rank and reset index in one step ranks_per_metric_sorted = self.ranks_per_metric.sort_values( by="cumulative_rank" ).reset_index(drop=True) @@ -146,72 +193,53 @@ def perform_permutation_test(self) -> None: columns=["method", "cumulative_rank", "final_rank"] ) - for i in tqdm(range(n_methods), desc="Permutation test"): - for j in range(i + 1, n_methods): - # get the ranks for the two methods - arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() - arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() - - # BUG FIX: Use the absolute difference for a two-sided test - observed_diff = abs(arr_i.sum() - arr_j.sum()) - - count_extreme = 0 - - # perform the permutation test - for it in range(self.n_iterations): - # generate a random permutation mask - r = np.random.randint(0, 2, size=arr_i.shape, dtype=bool) - - # create a copy of the ranks - arr1_rand = arr_i.copy() - arr2_rand = arr_j.copy() - - # swap the ranks based on the random permutation - # Note: Using boolean indexing is cleaner and often faster - arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r] - - # calculate the difference in ranks for the random permutation - permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum()) - - # BUG FIX: Check if the permuted difference is as or more extreme - if permuted_diff >= observed_diff: - count_extreme += 1 - - # Check if count_extreme is still zero, which would create pval=0. - # A p-value of 0 implies absolute certainty, which is unrealistic given the finite - # number of permutations. To avoid this, we adjust count_extreme to ensure a - # conservative estimate of the p-value, aligning with standard statistical practices. - if count_extreme == 0: - count_extreme += 1 - # calculate the p-value - pval = count_extreme / self.n_iterations - self.pvals[i, j] = pval - # The p-value is symmetric - self.pvals[j, i] = pval - - # create a dataframe from the pvals + # Use ProcessPoolExecutor for parallel processing + with ProcessPoolExecutor(max_workers=self.n_jobs) as executor: + # A dictionary to map future objects to their matrix indices (i, j) + future_to_indices = {} + # Submit all pairs to the executor + for i in range(n_methods): + for j in range(i + 1, n_methods): + arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() + arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() + # Submit the worker function with its arguments + future = executor.submit( + _calculate_pval_for_pair, arr_i, arr_j, self.n_iterations + ) + future_to_indices[future] = (i, j) + + # Create a progress bar that updates as tasks are completed + pbar = tqdm( + as_completed(future_to_indices), + total=len(future_to_indices), + desc="Permutation test", + ) + for future in pbar: + i, j = future_to_indices[future] + try: + pval = future.result() + self.pvals[i, j] = pval + self.pvals[j, i] = pval # p-value is symmetric + except Exception as exc: + print(f"Pair ({i}, {j}) generated an exception: {exc}") + self.pvals_df = pd.DataFrame( self.pvals, - columns=self.ranks_per_metric["method"], - index=self.ranks_per_metric["method"], + columns=ranks_per_metric_sorted["method"], + index=ranks_per_metric_sorted["method"], ) - self.pvals_df["method"] = self.ranks_per_metric["method"].tolist() - self.pvals_df = self.pvals_df.set_index("method") def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Get the rankings of the methods. + Get the final rankings and p-values. Returns: Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the rankings and p-values dataframes. """ - # sort ranks by final_rank ranks_df = self.ranks_per_metric.sort_values(by="final_rank").reset_index( drop=True ) - # sort pvals by the same order as ranks pvals_df = self.pvals_df.reindex( index=ranks_df["method"], columns=ranks_df["method"] ) - return ranks_df, pvals_df From fda95595c64d478854faf2510bcff0ae8388e322 Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Mon, 18 Aug 2025 18:10:19 -0400 Subject: [PATCH 05/11] fixed abs ranking for null hypothesis bug --- pyranker/ranker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index bf1a58f..b801c23 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -23,8 +23,8 @@ def _calculate_pval_for_pair( Returns: float: The calculated p-value. """ - # Use the absolute difference for a two-sided test - observed_diff = abs(arr_i.sum() - arr_j.sum()) + # Use the difference for a one-sided test + observed_diff = arr_i.sum() - arr_j.sum() count_extreme = 0 # Create a local random number generator for thread-safety @@ -43,7 +43,7 @@ def _calculate_pval_for_pair( arr1_rand[r], arr2_rand[r] = arr_j[r], arr_i[r] # Calculate the difference in ranks for the random permutation - permuted_diff = abs(arr1_rand.sum() - arr2_rand.sum()) + permuted_diff = arr1_rand.sum() - arr2_rand.sum() # Check if the permuted difference is as or more extreme if permuted_diff >= observed_diff: From 244d54a51f47a0090c33ad04ab6c55af73872b29 Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Tue, 19 Aug 2025 11:20:09 -0400 Subject: [PATCH 06/11] need to fix the tests --- data/m1.csv | 14 ++------ data/m2.csv | 14 ++------ data/m3.csv | 14 ++------ data/m4.csv | 14 ++------ data/temp_output/pvals.csv | 5 +++ data/temp_output/ranks.csv | 5 +++ pyranker/cli/run.py | 30 +++++++++++++++++ pyranker/ranker.py | 17 +++------- tests_full.py | 68 ++++++++++++++++++++++++++++++++++---- 9 files changed, 118 insertions(+), 63 deletions(-) create mode 100644 data/temp_output/pvals.csv create mode 100644 data/temp_output/ranks.csv diff --git a/data/m1.csv b/data/m1.csv index e23342c..1dbbb7c 100644 --- a/data/m1.csv +++ b/data/m1.csv @@ -1,11 +1,3 @@ -SubjectID,A,B,C,D,E,F -s001,-0.676662165,-1.406645477,0.736895876,-0.174272834,0.576927715,-0.232139845 -s002,-1.182135526,0.325161174,1.265839829,0.637533468,0.717606195,-0.232249719 -s003,0.393762147,-1.366917238,-1.974747205,-2.029359097,-0.91486706,-0.110356815 -s004,-0.560421215,-0.916606755,-0.244361005,0.173264029,-0.018263561,-1.112137106 -s005,-0.018074945,0.909978883,0.654103198,-0.412681032,0.415519864,0.415147598 -s006,0.584884843,-0.365552063,-0.125284377,0.420532768,1.048717925,-0.520722918 -s007,0.246445503,0.018436118,0.540072217,-0.059316335,-1.102092291,0.446401257 -s008,-0.78842192,-0.634175082,0.312935264,0.272096895,-0.151559698,-2.457860693 -s009,0.134775369,-0.241349035,0.711768614,-0.387514653,0.090663752,0.71284279 -s010,-0.96395775,-0.663571103,0.838443773,-0.933803671,-0.722117911,-0.189414521 +subjectid,A,B,C,D,E,F +s1,1,2,3,4,5,6 +s2,7,8,9,10,11,12 \ No newline at end of file diff --git a/data/m2.csv b/data/m2.csv index 059996b..e93738f 100644 --- a/data/m2.csv +++ b/data/m2.csv @@ -1,11 +1,3 @@ -SubjectID,A,B,C,D,E,F -s001,-0.371449174,0.956404946,-0.959452443,-0.309927689,0.905046916,0.819083005 -s002,0.935687942,0.109916076,-0.689643721,1.068025385,-1.154739305,-0.462448565 -s003,-0.049420815,0.64668578,-0.318198107,0.724407035,0.583641064,-0.704724761 -s004,-1.49698864,1.249697716,0.04787162,0.188726789,-0.819034985,-0.179096185 -s005,2.136690703,-0.868203102,-0.78604478,0.855744592,0.857935164,0.492256653 -s006,-0.355118237,0.517377129,0.928951769,0.792176927,-0.805270336,1.117546966 -s007,-0.778346825,1.683369425,-0.443459427,-0.593956209,4.0971389,-0.445679171 -s008,0.267208376,0.184556657,0.323158227,2.282268373,1.364794637,0.181174591 -s009,-0.386538967,-0.916456619,1.271967332,-0.052378684,-1.205062795,-0.626923254 -s010,0.435225064,0.91151586,-1.113652003,-0.220028617,-1.05347926,0.365272475 +subjectid,A,B,C,D,E,F +s1,2,3,4,5,6,7 +s2,8,9,10,11,12,13 \ No newline at end of file diff --git a/data/m3.csv b/data/m3.csv index 9bf468c..d60cbd7 100644 --- a/data/m3.csv +++ b/data/m3.csv @@ -1,11 +1,3 @@ -SubjectID,A,B,C,D,E,F -s001,-0.495294073,0.949116249,0.296072803,1.868387862,-0.272883702,-1.818801645 -s002,1.216439744,0.197072557,-0.081120879,1.469343652,2.263823391,0.181492295 -s003,-0.155607109,0.337023954,-0.458342088,-1.031167585,0.218811382,0.148051802 -s004,-1.209131999,-0.096524866,1.197362593,-0.062309653,-0.658751113,-0.262658666 -s005,0.645690766,0.899682779,-1.202114635,-0.452507338,0.178007526,-0.526872668 -s006,-0.527395342,-0.585397127,0.601057827,-0.438992879,9.23E-05,2.411401279 -s007,-0.781069044,-0.651766877,-0.003398167,-0.254586911,-0.048605563,1.6079838 -s008,-0.005850292,1.152494476,1.064747549,-0.227608884,1.45054756,1.422734322 -s009,0.796185038,-1.295533863,-0.007947827,0.624035116,-0.605764923,-0.856374829 -s010,0.952854212,-1.007389474,0.686420686,1.377020745,1.221967627,-0.120206896 +subjectid,A,B,C,D,E,F +s1,3,4,5,6,7,8 +s2,9,10,11,12,13,14 \ No newline at end of file diff --git a/data/m4.csv b/data/m4.csv index 36e69eb..05cc7a1 100644 --- a/data/m4.csv +++ b/data/m4.csv @@ -1,11 +1,3 @@ -SubjectID,A,B,C,D,E,F -s001,0.127830235,0.543904483,0.169190618,-0.849953283,-0.563713316,0.736931479 -s002,0.567418525,0.965856382,1.266015552,0.471422651,-0.758025824,-0.427404497 -s003,-1.221693479,-1.121073154,-1.677648371,2.016433719,-0.087967121,-0.472855621 -s004,0.954423388,-0.093452563,0.659446581,-0.190049419,-0.921771701,0.090774055 -s005,0.950052283,-0.621810664,0.254520025,0.360940315,-0.483358752,-0.935151931 -s006,1.455226207,-0.721900186,0.801810726,-0.641529199,0.563422873,0.772440661 -s007,-1.053644931,0.098930728,0.999364504,1.029298347,-0.632529862,-1.666171306 -s008,-0.671755474,0.389256225,0.697323813,-0.483432377,0.073658468,-0.233170802 -s009,0.059997347,0.583152369,-1.371183183,-0.528158479,0.435198404,0.705164885 -s010,-0.458500476,-1.526985622,0.370253517,0.844777527,-0.500950386,0.75340932 +subjectid,A,B,C,D,E,F +s1,4,5,6,7,8,9 +s2,10,11,12,13,14,15 \ No newline at end of file diff --git a/data/temp_output/pvals.csv b/data/temp_output/pvals.csv new file mode 100644 index 0000000..14915f0 --- /dev/null +++ b/data/temp_output/pvals.csv @@ -0,0 +1,5 @@ +method,m1,m2,m3,m4 +m1,0.0,1.0,1.0,1.0 +m2,1.0,0.0,1.0,1.0 +m3,1.0,1.0,0.0,1.0 +m4,1.0,1.0,1.0,0.0 diff --git a/data/temp_output/ranks.csv b/data/temp_output/ranks.csv new file mode 100644 index 0000000..e34367f --- /dev/null +++ b/data/temp_output/ranks.csv @@ -0,0 +1,5 @@ +method,final_rank,cumulative_rank,average_rank_s1,average_rank_s2 +m1,2.5,5.0,2.5,2.5 +m2,2.5,5.0,2.5,2.5 +m3,2.5,5.0,2.5,2.5 +m4,2.5,5.0,2.5,2.5 diff --git a/pyranker/cli/run.py b/pyranker/cli/run.py index 244f92a..9d2f081 100644 --- a/pyranker/cli/run.py +++ b/pyranker/cli/run.py @@ -162,6 +162,20 @@ def main( help="The comma-separated metric columns for which the reversal should be calculated; for example, 'hausdorff_tc,hausdorff_et'.", ), ] = "", + metric_to_use: Annotated[ + str, + typer.Option( + "--metric-to-use", + help="The comma-separated metric columns to use for ranking.", + ), + ] = "", + weight: Annotated[ + str, + typer.Option( + "--weight", + help="The comma-separated weights for the metrics.", + ), + ] = "", iterations: Annotated[ int, typer.Option( @@ -227,6 +241,21 @@ def main( metrics_for_reversal.split(",") if metrics_for_reversal else [] ) + metric_weights = {} + if metric_to_use and weight: + metrics = [m.strip() for m in metric_to_use.split(",")] + weights = [float(w.strip()) for w in weight.split(",")] + assert len(metrics) == len(weights), ( + "Number of metrics and weights must be the same." + ) + + total_weight = sum(weights) + metric_weights = { + metric.lower().strip(): w / total_weight + for metric, w in zip(metrics, weights) + } + print(f"Using weighted ranking with metrics: {metric_weights}") + num_subjects, num_metrics = validate_csvs(csvs_to_compare_with_full_path) # print the summary of the input files @@ -242,6 +271,7 @@ def main( n_iterations=iterations, ranking_method=ranking_method, n_jobs=n_jobs, + metric_weights=metric_weights, ) ranks, pvals = ranker.get_rankings_and_pvals() Path(outputdir).mkdir(parents=True, exist_ok=True) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index b801c23..7bba987 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -65,7 +65,8 @@ def __init__( metrics_for_reversal: List[str], n_iterations: int = 1000, ranking_method: str = "average", - n_jobs: int = 4, # New argument for number of CPU cores + n_jobs: int = 4, + metric_weights: Dict[str, float] = None, ) -> None: """ Ranker class to compare the scores of different methods. @@ -77,11 +78,13 @@ def __init__( ranking_method (str): The method to use for ranking the methods. n_jobs (int): The number of CPU cores to use for parallel processing. Defaults to 4. Set to -1 to use all available cores. + metric_weights (Dict[str, float]): A dictionary of metric names and their weights. """ self.input_csvs_to_compare = input_csvs_to_compare self.metrics_for_reversal = metrics_for_reversal self.n_iterations = n_iterations self.ranking_method = ranking_method + self.metric_weights = metric_weights if metric_weights else {} if n_jobs == -1: self.n_jobs = os.cpu_count() @@ -131,6 +134,7 @@ def combine_csvs_and_scores(self) -> None: ignore_index=True, ) + self.metrics_per_subject.to_csv("metric_rank.csv", index=False) self.rank_methods() def rank_methods(self) -> None: @@ -141,17 +145,6 @@ def rank_methods(self) -> None: method=self.ranking_method, ascending=True, numeric_only=True ) - metrics_for_reversal_lower = [x.lower() for x in self.metrics_for_reversal] - - for metric in metrics_for_reversal_lower: - for column in self.ranks_per_metric.columns: - if metric in column: - self.ranks_per_metric[column] = ( - self.ranks_per_metric[column].max() - + 1 - - self.ranks_per_metric[column] - ) - cumulative_rank_column = self.ranks_per_metric.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True diff --git a/tests_full.py b/tests_full.py index 6a936fd..6eac063 100644 --- a/tests_full.py +++ b/tests_full.py @@ -1,6 +1,7 @@ from pathlib import Path -import pandas as pd + import numpy as np +import pandas as pd from pyranker.cli.run import main @@ -43,9 +44,9 @@ def _sanity_check(output_dir: str) -> None: def test_main_dir_input(): - cwd = Path.cwd() - test_data_dir = (cwd / "data").absolute().as_posix() - test_output_dir = (cwd / "data" / "temp_output").absolute().as_posix() + test_dir = Path(__file__).parent + test_data_dir = (test_dir / "data").absolute().as_posix() + test_output_dir = (test_dir / "data" / "temp_output").absolute().as_posix() main( input=test_data_dir, outputdir=test_output_dir, @@ -56,9 +57,9 @@ def test_main_dir_input(): def test_main_files_input(): - cwd = Path.cwd() - test_data_dir = cwd / "data" - test_output_dir = (cwd / "data" / "temp_output").absolute().as_posix() + test_dir = Path(__file__).parent + test_data_dir = test_dir / "data" + test_output_dir = (test_dir / "data" / "temp_output").absolute().as_posix() input_files = "" for file in test_data_dir.iterdir(): if file.suffix == ".csv": @@ -71,3 +72,56 @@ def test_main_files_input(): ) _sanity_check(test_output_dir) + + +def test_main_weighted_ranking(tmp_path): + """ + Test the weighted ranking functionality. + """ + # Create a temporary directory for test data + data_dir = tmp_path / "data" + data_dir.mkdir() + output_dir = tmp_path / "output" + output_dir.mkdir() + + # Create sample CSV files + method1_data = { + "subjectid": ["s1", "s2"], + "metricA": [10, 20], + "metricB": [0.1, 0.2], + } + method1_df = pd.DataFrame(method1_data) + method1_df.to_csv(data_dir / "method1.csv", index=False) + + method2_data = { + "subjectid": ["s1", "s2"], + "metricA": [15, 5], + "metricB": [0.3, 0.4], + } + method2_df = pd.DataFrame(method2_data) + method2_df.to_csv(data_dir / "method2.csv", index=False) + + # Call main with weighted ranking arguments + main( + input=str(data_dir), + outputdir=str(output_dir), + metrics_for_reversal="metricB", + metric_to_use="metricA,metricB", + weight="3,1", + ) + + # Check the output + ranks_file = output_dir / "ranks.csv" + assert ranks_file.exists(), "Ranks file does not exist" + ranks_df = pd.read_csv(ranks_file) + + expected_ranks = { + "method1": 1.5, + "method2": 1.5, + } + + for method, expected_rank in expected_ranks.items(): + assert ( + ranks_df[ranks_df["method"] == method]["final_rank"].values[0] + == expected_rank + ), f"Final rank for {method} is not as expected" From 681d741d1962f0b52217655d39ed227df04965e3 Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Tue, 19 Aug 2025 12:47:47 -0400 Subject: [PATCH 07/11] needs documentation, but working as expected now --- pyranker/cli/run.py | 31 +-------- pyranker/ranker.py | 149 ++++++++++++++++++++++---------------------- 2 files changed, 75 insertions(+), 105 deletions(-) diff --git a/pyranker/cli/run.py b/pyranker/cli/run.py index 9d2f081..3c967d9 100644 --- a/pyranker/cli/run.py +++ b/pyranker/cli/run.py @@ -162,20 +162,6 @@ def main( help="The comma-separated metric columns for which the reversal should be calculated; for example, 'hausdorff_tc,hausdorff_et'.", ), ] = "", - metric_to_use: Annotated[ - str, - typer.Option( - "--metric-to-use", - help="The comma-separated metric columns to use for ranking.", - ), - ] = "", - weight: Annotated[ - str, - typer.Option( - "--weight", - help="The comma-separated weights for the metrics.", - ), - ] = "", iterations: Annotated[ int, typer.Option( @@ -241,21 +227,6 @@ def main( metrics_for_reversal.split(",") if metrics_for_reversal else [] ) - metric_weights = {} - if metric_to_use and weight: - metrics = [m.strip() for m in metric_to_use.split(",")] - weights = [float(w.strip()) for w in weight.split(",")] - assert len(metrics) == len(weights), ( - "Number of metrics and weights must be the same." - ) - - total_weight = sum(weights) - metric_weights = { - metric.lower().strip(): w / total_weight - for metric, w in zip(metrics, weights) - } - print(f"Using weighted ranking with metrics: {metric_weights}") - num_subjects, num_metrics = validate_csvs(csvs_to_compare_with_full_path) # print the summary of the input files @@ -271,7 +242,7 @@ def main( n_iterations=iterations, ranking_method=ranking_method, n_jobs=n_jobs, - metric_weights=metric_weights, + output_dir=outputdir, ) ranks, pvals = ranker.get_rankings_and_pvals() Path(outputdir).mkdir(parents=True, exist_ok=True) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 7bba987..86f70a8 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -66,142 +66,139 @@ def __init__( n_iterations: int = 1000, ranking_method: str = "average", n_jobs: int = 4, - metric_weights: Dict[str, float] = None, + output_dir: str = ".", + detailed_ranks_csv_name: str = "detailed_ranks.csv", ) -> None: """ Ranker class to compare the scores of different methods. - - Args: - input_csvs_to_compare (Dict[str, str]): A dictionary with the key being the method name and the value being the path to the CSV file. - metrics_for_reversal (List[str]): The metrics for which the reversal should be calculated. - n_iterations (int): The number of iterations to perform for the permutation test. - ranking_method (str): The method to use for ranking the methods. - n_jobs (int): The number of CPU cores to use for parallel processing. Defaults to 4. - Set to -1 to use all available cores. - metric_weights (Dict[str, float]): A dictionary of metric names and their weights. """ self.input_csvs_to_compare = input_csvs_to_compare self.metrics_for_reversal = metrics_for_reversal self.n_iterations = n_iterations self.ranking_method = ranking_method - self.metric_weights = metric_weights if metric_weights else {} + self.output_dir = output_dir + self.detailed_ranks_csv_name = detailed_ranks_csv_name if n_jobs == -1: self.n_jobs = os.cpu_count() else: self.n_jobs = n_jobs - self.combined_scores_per_subject = {} - self.participant_ranks = {} - + os.makedirs(self.output_dir, exist_ok=True) + self.detailed_rank_columns = [] + self.all_subject_ids = set() self.combine_csvs_and_scores() def combine_csvs_and_scores(self) -> None: """ Combine the CSVs and scores of the methods. """ - self.combined_scores_per_subject["method"] = [] self.metrics_per_subject = pd.DataFrame() for method in self.input_csvs_to_compare: - self.combined_scores_per_subject["method"].append(method) current_df = pd.read_csv(self.input_csvs_to_compare[method]) current_df.columns = current_df.columns.str.lower() + self.all_subject_ids.update(current_df["subjectid"].unique()) current_df = current_df.sort_values(by="subjectid") - - metrics_columns = sorted( - [col for col in current_df.columns if col != "subjectid"] - ) + metrics_columns = [col for col in current_df.columns if col != "subjectid"] + metrics_columns.sort() current_df = current_df[["subjectid"] + metrics_columns] - - for metric in metrics_columns: - current_df[metric] = current_df[metric].rank( - method=self.ranking_method, ascending=False - ) - current_df_flattened = {"method": method} for _, row in current_df.iterrows(): - for metric in current_df.columns: - if metric != "subjectid": - current_df_flattened[f"{metric}_{row['subjectid']}"] = row[ - metric - ] - + for metric in metrics_columns: + current_df_flattened[f"{metric}_{row['subjectid']}"] = row[metric] current_df_flattened = pd.DataFrame(current_df_flattened, index=[0]) self.metrics_per_subject = pd.concat( - [self.metrics_per_subject, current_df_flattened], - axis=0, - ignore_index=True, - ) + [self.metrics_per_subject, current_df_flattened], axis=0 + ).reset_index(drop=True) - self.metrics_per_subject.to_csv("metric_rank.csv", index=False) self.rank_methods() def rank_methods(self) -> None: """ - Rank the methods based on each metric and calculate the combined final rank. + Rank the methods based on the metrics using the new two-step aggregation. """ - self.ranks_per_metric = self.metrics_per_subject.rank( - method=self.ranking_method, ascending=True, numeric_only=True + ranks_per_metric_detailed = self.metrics_per_subject.rank( + method=self.ranking_method, ascending=False, numeric_only=True ) + metrics_for_reversal_lower = [x.lower() for x in self.metrics_for_reversal] + for metric in metrics_for_reversal_lower: + for column in ranks_per_metric_detailed.columns: + if metric in column: + ranks_per_metric_detailed[column] = ( + ranks_per_metric_detailed[column].max() + + 1 + - ranks_per_metric_detailed[column] + ) + self.detailed_rank_columns = ranks_per_metric_detailed.columns.tolist() + verification_df = ranks_per_metric_detailed.copy() + verification_df.insert(0, "method", self.metrics_per_subject["method"]) + verification_path = os.path.join(self.output_dir, self.detailed_ranks_csv_name) + verification_df.to_csv(verification_path, index=False) + print(f"Saved detailed verification ranks to: {verification_path}") + + # --- FIX 1: EFFICIENT DATAFRAME CREATION --- + # Create a dictionary to hold new columns first. + subject_avg_rank_data = {} + subject_ids_sorted = sorted(list(self.all_subject_ids)) + + for subject in subject_ids_sorted: + subject_cols = [ + col for col in self.detailed_rank_columns if col.endswith(f"_{subject}") + ] + if subject_cols: + # Add the new Series to the dictionary instead of the DataFrame. + subject_avg_rank_data[f"{subject}_avg_rank"] = ( + ranks_per_metric_detailed[subject_cols].mean(axis=1) + ) + + # Create the DataFrame from the dictionary in a single, efficient operation. + per_subject_avg_ranks = pd.DataFrame(subject_avg_rank_data) + # --- END OF FIX 1 --- - cumulative_rank_column = self.ranks_per_metric.sum(axis=1) + cumulative_rank_column = per_subject_avg_ranks.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True ) - self.ranks_per_metric = pd.concat( [ - self.ranks_per_metric, - cumulative_rank_column.rename("cumulative_rank"), - final_rank_column.rename("final_rank"), self.metrics_per_subject["method"], + final_rank_column.rename("final_rank"), + cumulative_rank_column.rename("cumulative_rank"), + per_subject_avg_ranks, + ranks_per_metric_detailed, ], axis=1, ) - - self.ranks_per_metric = self.ranks_per_metric[ - ["method", "final_rank", "cumulative_rank"] - + [ - col - for col in self.ranks_per_metric.columns - if col not in ["method", "final_rank", "cumulative_rank"] - ] - ] - self.perform_permutation_test() def perform_permutation_test(self) -> None: """ - Perform permutation test in parallel to determine the significance of the ranks. + Perform permutation test to determine the significance of the ranks. + This test is performed on the detailed rank data. """ n_methods = len(self.ranks_per_metric) self.pvals = np.zeros((n_methods, n_methods)) - ranks_per_metric_sorted = self.ranks_per_metric.sort_values( by="cumulative_rank" ).reset_index(drop=True) - ranks_per_metric_sanitized = ranks_per_metric_sorted.drop( - columns=["method", "cumulative_rank", "final_rank"] - ) + # --- FIX 2: SELECT ONLY DETAILED RANK COLUMNS FOR THE TEST --- + # Instead of dropping columns, explicitly select the correct ones. + # This is robust and prevents contamination from summary columns. + ranks_per_metric_sanitized = ranks_per_metric_sorted[self.detailed_rank_columns] + # --- END OF FIX 2 --- - # Use ProcessPoolExecutor for parallel processing with ProcessPoolExecutor(max_workers=self.n_jobs) as executor: - # A dictionary to map future objects to their matrix indices (i, j) future_to_indices = {} - # Submit all pairs to the executor for i in range(n_methods): for j in range(i + 1, n_methods): arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() - # Submit the worker function with its arguments future = executor.submit( _calculate_pval_for_pair, arr_i, arr_j, self.n_iterations ) future_to_indices[future] = (i, j) - - # Create a progress bar that updates as tasks are completed pbar = tqdm( as_completed(future_to_indices), total=len(future_to_indices), @@ -212,7 +209,7 @@ def perform_permutation_test(self) -> None: try: pval = future.result() self.pvals[i, j] = pval - self.pvals[j, i] = pval # p-value is symmetric + self.pvals[j, i] = pval except Exception as exc: print(f"Pair ({i}, {j}) generated an exception: {exc}") @@ -224,15 +221,17 @@ def perform_permutation_test(self) -> None: def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Get the final rankings and p-values. - - Returns: - Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the rankings and p-values dataframes. + Get the final rankings and p-values, storing only the upper right diagonal of the p-values matrix. """ ranks_df = self.ranks_per_metric.sort_values(by="final_rank").reset_index( drop=True ) - pvals_df = self.pvals_df.reindex( - index=ranks_df["method"], columns=ranks_df["method"] - ) - return ranks_df, pvals_df + + # Create a DataFrame for the upper right diagonal of the p-values matrix + pvals_upper_df = pd.DataFrame( + np.triu(self.pvals), # Use np.triu to get the upper triangle of the matrix + columns=self.pvals_df.columns, + index=self.pvals_df.index, + ).reindex(index=ranks_df["method"], columns=ranks_df["method"]) + + return ranks_df, pvals_upper_df From 081a6103b94844233a1c5ce4da5adfaa082c980f Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Tue, 19 Aug 2025 15:27:08 -0400 Subject: [PATCH 08/11] ready to PR --- README.md | 68 +++++++++++++++++++++-- pyranker/ranker.py | 136 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 181 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 8cb5675..bc264a1 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,67 @@ # PyRanker -This package is designed to compare the performance of different methods. +This package is designed to compare the performance of different methods. + +## Algorithm + +The Ranker class compares the performance of different methods based on a set of +metrics. It takes as input a dictionary of CSV files, where each file +represents a method and contains the scores for a set of subjects on a set of +metrics. + +The ranking algorithm consists of the following steps: + +1. **Combine CSVs and Scores**: The class first combines all the input CSV + files into a single DataFrame. This DataFrame has a hierarchical column + structure, where the top level represents the metrics and the bottom level + represents the subjects. + +2. **Rank Methods**: The class then ranks the methods based on their scores for + each metric and subject. The ranking can be done using different methods, + such as 'average', 'min', 'max', 'first', or 'dense'. + +3. **Handle Metric Reversal**: For metrics where lower values are better (e.g., + error rates), the class can reverse the ranks so that lower scores get + higher ranks. + +4. **Aggregate Ranks**: The class then aggregates the ranks across all metrics + for each subject to get a per-subject average rank for each method. + +5. **Calculate Cumulative Rank**: The per-subject average ranks are then summed + up to get a cumulative rank for each method. + +6. **Determine Final Rank**: The methods are then ranked based on their + cumulative ranks to determine the final ranking. + +7. **Perform Permutation Test**: Finally, the class performs a permutation test + to determine the statistical significance of the differences in the ranks + of the methods. The permutation test is a non-parametric method that does + not make any assumptions about the distribution of the data. + +The output of the Ranker class is a pair of DataFrames: one containing the +final rankings of the methods, and another containing the p-values from the +permutation test. + +### Permutation Test + +The permutation test is a non-parametric method for testing the statistical +significance of an observed difference between two groups. In this case, the +two groups are the ranks of two different methods. + +The null hypothesis is that the two methods are equivalent, and any observed +difference in their ranks is due to chance. The alternative hypothesis is that +the two methods are not equivalent, and the observed difference in their ranks +is statistically significant. + +The test works by repeatedly shuffling the ranks between the two methods and +calculating the difference in their sums. The p-value is the proportion of +permutations that result in a difference as or more extreme than the +observed difference. ## Installation ```sh -(base) user@location $> git clone https://github.com/mlcommons/PyRanker.git +(base) user@location $> git clone https://github.com/mlcommons/PyRanker.git (base) user@location $> cd PyRanker (base) user@PyRanker $> conda create -p ./venv python=3.12 -y (base) user@PyRanker $> conda activate ./venv @@ -41,10 +97,10 @@ This package is designed to compare the performance of different methods. 2. **Metrics for reversal normalization**: a comma-separated list of metrics that need to be normalized in reverse. For metrics such as [Hausdorff Distance](https://en.wikipedia.org/wiki/Hausdorff_distance) and communication cost (used in the [FeTS Challenge](https://doi.org/10.48550/arXiv.2105.05874)) which are defined as "higher is worse", PyRanker can normalize in reverse order. - This is checked in a case-insensitive manner, so `C,F` is equivalent to `c,f`. - - The check is done by checking for the presence of the string in the metric header, rather than a "hard" check. For example, passing `hausd` **will** match `hausd*` in the metric headers, and will be case-insensitive. This is done to allow for flexibility in the metric names. - - The metric string needs to be present. For example, passing `dsc` **will not** match for `dice*` in the metric headers. + - The check is done by checking for the presence of the string in the metric header, rather than a "hard" check. For example, passing `hausd` **will** match `hausd*` in the metric headers, and will be case-insensitive. This is done to allow for flexibility in the metric names. + - The metric string needs to be present. For example, passing `dsc` **will not** match for `dice*` in the metric headers. -3. **Ranking method**: the ranking method used to rank the methods. The available options are [[ref](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html#pandas-dataframe-rank)]: +3. **Ranking method**: the ranking method used to rank the methods. The available options are [[ref](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html#pandas-dataframe-rank)]: - `average` (default): average rank of the group - `min`: lowest rank in the group - `max`: highest rank in the group @@ -73,4 +129,4 @@ To get detailed help, please run ```ranker --help```. ## Acknowledgements -This tool was partly supported by the [Informatics Technology for Cancer Research (ITCR) program](https://www.cancer.gov/about-nci/organization/cssi/research/itcr) of the [National Cancer Institute (NCI)](https://www.cancer.gov/) at the [National Institutes of Health (NIH)](https://www.nih.gov/) under award numbers [U01CA242871](https://reporter.nih.gov/search/8qcT1J34hEyj5npqmq9aEw/project-details/10009302) and [U24CA279629](https://reporter.nih.gov/search/8qcT1J34hEyj5npqmq9aEw/project-details/10932257). The content of this tool is solely the responsibility of the authors and does not represent the official views of the NIH. +This tool was partly supported by the [Informatics Technology for Cancer Research (ITCR) program](https://www.cancer.gov/about-nci/organization/cssi/research/itcr) of the [National Cancer Institute (NCI)](https://www.cancer.gov/) at the [National Institutes of Health (NIH)](https://www.nih.gov/) under award numbers [U01CA242871](https://reporter.nih.gov/search/8qcT1J34hEyj5npqmq9aEw/project-details/10009302) and [U24CA279629](https://reporter.nih.gov/search/8qcT1J34hEyj5npqmq9aEw/project-details/10932257). The content of this tool is solely the responsibility of the authors and does not represent the official views of the NIH. \ No newline at end of file diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 86f70a8..97977b8 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -15,6 +15,20 @@ def _calculate_pval_for_pair( """ Performs the permutation test for a single pair of rank arrays. + The permutation test is a non-parametric method for testing the statistical + significance of an observed difference between two groups. In this case, the + two groups are the ranks of two different methods. + + The null hypothesis is that the two methods are equivalent, and any observed + difference in their ranks is due to chance. The alternative hypothesis is that + the two methods are not equivalent, and the observed difference in their ranks + is statistically significant. + + The test works by repeatedly shuffling the ranks between the two methods and + calculating the difference in their sums. The p-value is the proportion of + permutations that result in a difference as or more extreme than the + observed difference. + Args: arr_i (np.ndarray): Rank array for the first method. arr_j (np.ndarray): Rank array for the second method. @@ -59,6 +73,46 @@ def _calculate_pval_for_pair( class Ranker: + """ + The Ranker class compares the performance of different methods based on a set of + metrics. It takes as input a dictionary of CSV files, where each file + represents a method and contains the scores for a set of subjects on a set of + metrics. + + The ranking algorithm consists of the following steps: + + 1. **Combine CSVs and Scores**: The class first combines all the input CSV + files into a single DataFrame. This DataFrame has a hierarchical column + structure, where the top level represents the metrics and the bottom level + represents the subjects. + + 2. **Rank Methods**: The class then ranks the methods based on their scores for + each metric and subject. The ranking can be done using different methods, + such as 'average', 'min', 'max', 'first', or 'dense'. + + 3. **Handle Metric Reversal**: For metrics where lower values are better (e.g., + error rates), the class can reverse the ranks so that lower scores get + higher ranks. + + 4. **Aggregate Ranks**: The class then aggregates the ranks across all metrics + for each subject to get a per-subject average rank for each method. + + 5. **Calculate Cumulative Rank**: The per-subject average ranks are then summed + up to get a cumulative rank for each method. + + 6. **Determine Final Rank**: The methods are then ranked based on their + cumulative ranks to determine the final ranking. + + 7. **Perform Permutation Test**: Finally, the class performs a permutation test + to determine the statistical significance of the differences in the ranks + of the methods. The permutation test is a non-parametric method that does + not make any assumptions about the distribution of the data. + + The output of the Ranker class is a pair of DataFrames: one containing the + final rankings of the methods, and another containing the p-values from the + permutation test. + """ + def __init__( self, input_csvs_to_compare: Dict[str, str], @@ -70,7 +124,26 @@ def __init__( detailed_ranks_csv_name: str = "detailed_ranks.csv", ) -> None: """ - Ranker class to compare the scores of different methods. + Initializes the Ranker class. + + Args: + input_csvs_to_compare (Dict[str, str]): A dictionary where the keys are + the method names and the values are the paths to the CSV files + containing the scores for each method. + metrics_for_reversal (List[str]): A list of metrics for which the + ranks should be reversed (i.e., lower values are better). + n_iterations (int, optional): The number of iterations to perform for + the permutation test. Defaults to 1000. + ranking_method (str, optional): The method to use for ranking the + methods. Can be one of 'average', 'min', 'max', 'first', or + 'dense'. Defaults to "average". + n_jobs (int, optional): The number of CPU cores to use for parallel + processing. Defaults to 4. + output_dir (str, optional): The directory where the output files will + be saved. Defaults to ".". + detailed_ranks_csv_name (str, optional): The name of the CSV file + where the detailed ranks will be saved. Defaults to + "detailed_ranks.csv". """ self.input_csvs_to_compare = input_csvs_to_compare self.metrics_for_reversal = metrics_for_reversal @@ -91,7 +164,11 @@ def __init__( def combine_csvs_and_scores(self) -> None: """ - Combine the CSVs and scores of the methods. + Combines the input CSV files into a single DataFrame. + + This method reads each CSV file, converts the column names to lowercase, + and then flattens the DataFrame so that each row represents a method and + each column represents a metric-subject combination. """ self.metrics_per_subject = pd.DataFrame() @@ -116,11 +193,20 @@ def combine_csvs_and_scores(self) -> None: def rank_methods(self) -> None: """ - Rank the methods based on the metrics using the new two-step aggregation. + Ranks the methods based on the metrics using a two-step aggregation process. + + First, it ranks the methods for each metric and subject combination. + Then, it calculates the average rank for each method across all metrics + for each subject. Finally, it sums up the per-subject average ranks to + get a cumulative rank for each method, which is then used to determine + the final ranking. """ + # Rank the methods for each metric-subject combination ranks_per_metric_detailed = self.metrics_per_subject.rank( method=self.ranking_method, ascending=False, numeric_only=True ) + + # Reverse the ranks for the specified metrics metrics_for_reversal_lower = [x.lower() for x in self.metrics_for_reversal] for metric in metrics_for_reversal_lower: for column in ranks_per_metric_detailed.columns: @@ -131,35 +217,38 @@ def rank_methods(self) -> None: - ranks_per_metric_detailed[column] ) self.detailed_rank_columns = ranks_per_metric_detailed.columns.tolist() + + # Save the detailed ranks to a CSV file for verification verification_df = ranks_per_metric_detailed.copy() verification_df.insert(0, "method", self.metrics_per_subject["method"]) verification_path = os.path.join(self.output_dir, self.detailed_ranks_csv_name) verification_df.to_csv(verification_path, index=False) print(f"Saved detailed verification ranks to: {verification_path}") - # --- FIX 1: EFFICIENT DATAFRAME CREATION --- - # Create a dictionary to hold new columns first. + # Create a dictionary to hold the per-subject average ranks subject_avg_rank_data = {} subject_ids_sorted = sorted(list(self.all_subject_ids)) + # Calculate the average rank for each method for each subject for subject in subject_ids_sorted: subject_cols = [ col for col in self.detailed_rank_columns if col.endswith(f"_{subject}") ] if subject_cols: - # Add the new Series to the dictionary instead of the DataFrame. subject_avg_rank_data[f"{subject}_avg_rank"] = ( ranks_per_metric_detailed[subject_cols].mean(axis=1) ) - # Create the DataFrame from the dictionary in a single, efficient operation. + # Create a DataFrame from the dictionary of per-subject average ranks per_subject_avg_ranks = pd.DataFrame(subject_avg_rank_data) - # --- END OF FIX 1 --- + # Calculate the cumulative and final ranks cumulative_rank_column = per_subject_avg_ranks.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True ) + + # Combine all the rank information into a single DataFrame self.ranks_per_metric = pd.concat( [ self.metrics_per_subject["method"], @@ -170,12 +259,14 @@ def rank_methods(self) -> None: ], axis=1, ) + + # Perform the permutation test to determine the statistical significance self.perform_permutation_test() def perform_permutation_test(self) -> None: """ - Perform permutation test to determine the significance of the ranks. - This test is performed on the detailed rank data. + Performs a permutation test to determine the statistical significance of the + ranks. This test is performed on the detailed rank data. """ n_methods = len(self.ranks_per_metric) self.pvals = np.zeros((n_methods, n_methods)) @@ -183,12 +274,10 @@ def perform_permutation_test(self) -> None: by="cumulative_rank" ).reset_index(drop=True) - # --- FIX 2: SELECT ONLY DETAILED RANK COLUMNS FOR THE TEST --- - # Instead of dropping columns, explicitly select the correct ones. - # This is robust and prevents contamination from summary columns. + # Select only the detailed rank columns for the test ranks_per_metric_sanitized = ranks_per_metric_sorted[self.detailed_rank_columns] - # --- END OF FIX 2 --- + # Use a process pool to parallelize the p-value calculations with ProcessPoolExecutor(max_workers=self.n_jobs) as executor: future_to_indices = {} for i in range(n_methods): @@ -199,6 +288,8 @@ def perform_permutation_test(self) -> None: _calculate_pval_for_pair, arr_i, arr_j, self.n_iterations ) future_to_indices[future] = (i, j) + + # Show a progress bar for the permutation test pbar = tqdm( as_completed(future_to_indices), total=len(future_to_indices), @@ -208,11 +299,14 @@ def perform_permutation_test(self) -> None: i, j = future_to_indices[future] try: pval = future.result() - self.pvals[i, j] = pval - self.pvals[j, i] = pval + # Format p-value with precision and scientific notation + formatted_pval = f"{pval:.3f}" if pval >= 0.001 else f"{pval:.1e}" + self.pvals[i, j] = formatted_pval + self.pvals[j, i] = formatted_pval except Exception as exc: print(f"Pair ({i}, {j}) generated an exception: {exc}") + # Create a DataFrame from the p-values self.pvals_df = pd.DataFrame( self.pvals, columns=ranks_per_metric_sorted["method"], @@ -221,7 +315,15 @@ def perform_permutation_test(self) -> None: def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Get the final rankings and p-values, storing only the upper right diagonal of the p-values matrix. + Returns the final rankings and p-values. + + The p-values matrix is returned as a DataFrame with only the upper right + diagonal, to avoid redundancy. + + Returns: + Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing two DataFrames: + - The final rankings of the methods. + - The p-values from the permutation test. """ ranks_df = self.ranks_per_metric.sort_values(by="final_rank").reset_index( drop=True From db7b56e80752fe32b8e4ad24910a6adacaf36ab9 Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Thu, 28 Aug 2025 13:43:00 -0400 Subject: [PATCH 09/11] pvalue calculation is now fixed --- data/temp_output/detailed_ranks.csv | 5 +++++ data/temp_output/pvals.csv | 10 +++++----- data/temp_output/ranks.csv | 10 +++++----- pyranker/ranker.py | 20 ++++++++++++-------- test_ranking_fix.py | 0 5 files changed, 27 insertions(+), 18 deletions(-) create mode 100644 data/temp_output/detailed_ranks.csv create mode 100644 test_ranking_fix.py diff --git a/data/temp_output/detailed_ranks.csv b/data/temp_output/detailed_ranks.csv new file mode 100644 index 0000000..6648b3a --- /dev/null +++ b/data/temp_output/detailed_ranks.csv @@ -0,0 +1,5 @@ +method,a_s1,b_s1,c_s1,d_s1,e_s1,f_s1,a_s2,b_s2,c_s2,d_s2,e_s2,f_s2 +m1,4.0,4.0,1.0,4.0,4.0,1.0,4.0,4.0,1.0,4.0,4.0,1.0 +m2,3.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0 +m3,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0 +m4,1.0,1.0,4.0,1.0,1.0,4.0,1.0,1.0,4.0,1.0,1.0,4.0 diff --git a/data/temp_output/pvals.csv b/data/temp_output/pvals.csv index 14915f0..514c997 100644 --- a/data/temp_output/pvals.csv +++ b/data/temp_output/pvals.csv @@ -1,5 +1,5 @@ -method,m1,m2,m3,m4 -m1,0.0,1.0,1.0,1.0 -m2,1.0,0.0,1.0,1.0 -m3,1.0,1.0,0.0,1.0 -m4,1.0,1.0,1.0,0.0 +method,m4,m3,m2,m1 +m4,0.0,0.928,0.926,0.925 +m3,0.0,0.0,0.928,0.926 +m2,0.0,0.0,0.0,0.927 +m1,0.0,0.0,0.0,0.0 diff --git a/data/temp_output/ranks.csv b/data/temp_output/ranks.csv index e34367f..e555038 100644 --- a/data/temp_output/ranks.csv +++ b/data/temp_output/ranks.csv @@ -1,5 +1,5 @@ -method,final_rank,cumulative_rank,average_rank_s1,average_rank_s2 -m1,2.5,5.0,2.5,2.5 -m2,2.5,5.0,2.5,2.5 -m3,2.5,5.0,2.5,2.5 -m4,2.5,5.0,2.5,2.5 +method,final_rank,cumulative_rank,s1_avg_rank,s2_avg_rank,a_s1,b_s1,c_s1,d_s1,e_s1,f_s1,a_s2,b_s2,c_s2,d_s2,e_s2,f_s2 +m4,1.0,4.0,2.0,2.0,1.0,1.0,4.0,1.0,1.0,4.0,1.0,1.0,4.0,1.0,1.0,4.0 +m3,2.0,4.666666666666667,2.3333333333333335,2.3333333333333335,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0 +m2,3.0,5.333333333333333,2.6666666666666665,2.6666666666666665,3.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0 +m1,4.0,6.0,3.0,3.0,4.0,4.0,1.0,4.0,4.0,1.0,4.0,4.0,1.0,4.0,4.0,1.0 diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 97977b8..bf1a086 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -60,15 +60,12 @@ def _calculate_pval_for_pair( permuted_diff = arr1_rand.sum() - arr2_rand.sum() # Check if the permuted difference is as or more extreme - if permuted_diff >= observed_diff: + if permuted_diff <= observed_diff: count_extreme += 1 - # To avoid p-values of 0, which implies absolute certainty, we add 1 - # if no permuted difference was more extreme than the observed one. - if count_extreme == 0: - count_extreme = 1 - - pval = count_extreme / n_iterations + # Calculate the p-value using the standard formula for permutation tests, + # which adds 1 to both the numerator and denominator to avoid p-values of 0. + pval = (count_extreme + 1) / (n_iterations + 1) return pval @@ -183,7 +180,13 @@ def combine_csvs_and_scores(self) -> None: current_df_flattened = {"method": method} for _, row in current_df.iterrows(): for metric in metrics_columns: - current_df_flattened[f"{metric}_{row['subjectid']}"] = row[metric] + score = row[metric] + if not pd.api.types.is_number(score): + raise ValueError( + f"Invalid score for method '{method}', subject '{row['subjectid']}', " + f"metric '{metric}'. Expected a number, but got '{score}'." + ) + current_df_flattened[f"{metric}_{row['subjectid']}"] = score current_df_flattened = pd.DataFrame(current_df_flattened, index=[0]) self.metrics_per_subject = pd.concat( [self.metrics_per_subject, current_df_flattened], axis=0 @@ -202,6 +205,7 @@ def rank_methods(self) -> None: the final ranking. """ # Rank the methods for each metric-subject combination + # Use ascending=False so that higher values get better ranks (original logic) ranks_per_metric_detailed = self.metrics_per_subject.rank( method=self.ranking_method, ascending=False, numeric_only=True ) diff --git a/test_ranking_fix.py b/test_ranking_fix.py new file mode 100644 index 0000000..e69de29 From 2015300ad28717a5eb0dde5dcf6ec7a517d4efce Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Thu, 4 Sep 2025 21:07:41 -0400 Subject: [PATCH 10/11] need to fix based on newer updates --- pyranker/cli/run.py | 4 +-- pyranker/ranker.py | 67 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/pyranker/cli/run.py b/pyranker/cli/run.py index 3c967d9..84e6dad 100644 --- a/pyranker/cli/run.py +++ b/pyranker/cli/run.py @@ -120,7 +120,7 @@ def __get_sorted_metrics(df: pd.DataFrame) -> list: current_metrics = __get_sorted_metrics(current_df) if current_metrics != metrics_base: sanity_checks["Files_with_different_metrics"].append(filename) - except Exception as e: + except Exception: sanity_checks["Files_that_cannot_be_read"].append(filename) # if any of the sanity checks fail, print the problematic files and exit @@ -185,7 +185,7 @@ def main( "--n-jobs", help="The number of CPU cores to use for parallel processing.", ), - ] = 4, + ] = 1, version: Annotated[ Optional[bool], typer.Option( diff --git a/pyranker/ranker.py b/pyranker/ranker.py index bf1a086..4d383af 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -1,3 +1,4 @@ +import logging import os from concurrent.futures import ProcessPoolExecutor, as_completed from typing import Dict, List, Tuple @@ -6,11 +7,23 @@ import pandas as pd from tqdm import tqdm +# Set up a global logger +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + filename="ranker.log", + filemode="w", +) +logger = logging.getLogger(__name__) + # This worker function is defined at the top level so it can be pickled # and sent to other processes by the ProcessPoolExecutor. def _calculate_pval_for_pair( - arr_i: np.ndarray, arr_j: np.ndarray, n_iterations: int + arr_i: np.ndarray, + arr_j: np.ndarray, + n_iterations: int, + log_permutations: bool = False, ) -> float: """ Performs the permutation test for a single pair of rank arrays. @@ -33,19 +46,23 @@ def _calculate_pval_for_pair( arr_i (np.ndarray): Rank array for the first method. arr_j (np.ndarray): Rank array for the second method. n_iterations (int): The number of permutation iterations. + log_permutations (bool, optional): Whether to log detailed permutation + information. Defaults to False. Returns: float: The calculated p-value. """ # Use the difference for a one-sided test observed_diff = arr_i.sum() - arr_j.sum() + if log_permutations: + print(f"Observed difference in sums: {observed_diff}") count_extreme = 0 # Create a local random number generator for thread-safety rng = np.random.default_rng() # Perform the permutation test - for _ in range(n_iterations): + for i in range(n_iterations): # Generate a random permutation mask r = rng.integers(0, 2, size=arr_i.shape, dtype=bool) @@ -58,14 +75,24 @@ def _calculate_pval_for_pair( # Calculate the difference in ranks for the random permutation permuted_diff = arr1_rand.sum() - arr2_rand.sum() + if log_permutations: + print( + f"Permutation {i + 1}/{n_iterations} | Permuted diff: {permuted_diff}" + ) # Check if the permuted difference is as or more extreme if permuted_diff <= observed_diff: + if log_permutations: + print( + f"Permutation {i + 1} is more extreme: {permuted_diff} <= {observed_diff}" + ) count_extreme += 1 # Calculate the p-value using the standard formula for permutation tests, # which adds 1 to both the numerator and denominator to avoid p-values of 0. pval = (count_extreme + 1) / (n_iterations + 1) + if log_permutations: + print(f"Final p-value: {pval}") return pval @@ -119,6 +146,7 @@ def __init__( n_jobs: int = 4, output_dir: str = ".", detailed_ranks_csv_name: str = "detailed_ranks.csv", + log_permutations: bool = False, ) -> None: """ Initializes the Ranker class. @@ -141,6 +169,8 @@ def __init__( detailed_ranks_csv_name (str, optional): The name of the CSV file where the detailed ranks will be saved. Defaults to "detailed_ranks.csv". + log_permutations (bool, optional): Whether to log detailed permutation + information. Defaults to False. """ self.input_csvs_to_compare = input_csvs_to_compare self.metrics_for_reversal = metrics_for_reversal @@ -148,6 +178,7 @@ def __init__( self.ranking_method = ranking_method self.output_dir = output_dir self.detailed_ranks_csv_name = detailed_ranks_csv_name + self.log_permutations = log_permutations if n_jobs == -1: self.n_jobs = os.cpu_count() @@ -157,6 +188,7 @@ def __init__( os.makedirs(self.output_dir, exist_ok=True) self.detailed_rank_columns = [] self.all_subject_ids = set() + print("Ranker initialized.") self.combine_csvs_and_scores() def combine_csvs_and_scores(self) -> None: @@ -167,9 +199,11 @@ def combine_csvs_and_scores(self) -> None: and then flattens the DataFrame so that each row represents a method and each column represents a metric-subject combination. """ + print("Combining CSVs and scores...") self.metrics_per_subject = pd.DataFrame() for method in self.input_csvs_to_compare: + print(f"Processing method: {method}") current_df = pd.read_csv(self.input_csvs_to_compare[method]) current_df.columns = current_df.columns.str.lower() self.all_subject_ids.update(current_df["subjectid"].unique()) @@ -182,16 +216,19 @@ def combine_csvs_and_scores(self) -> None: for metric in metrics_columns: score = row[metric] if not pd.api.types.is_number(score): - raise ValueError( + error_msg = ( f"Invalid score for method '{method}', subject '{row['subjectid']}', " f"metric '{metric}'. Expected a number, but got '{score}'." ) + logger.error(error_msg) + raise ValueError(error_msg) current_df_flattened[f"{metric}_{row['subjectid']}"] = score current_df_flattened = pd.DataFrame(current_df_flattened, index=[0]) self.metrics_per_subject = pd.concat( [self.metrics_per_subject, current_df_flattened], axis=0 ).reset_index(drop=True) + print("Finished combining CSVs and scores.") self.rank_methods() def rank_methods(self) -> None: @@ -204,6 +241,7 @@ def rank_methods(self) -> None: get a cumulative rank for each method, which is then used to determine the final ranking. """ + print("Ranking methods...") # Rank the methods for each metric-subject combination # Use ascending=False so that higher values get better ranks (original logic) ranks_per_metric_detailed = self.metrics_per_subject.rank( @@ -215,6 +253,7 @@ def rank_methods(self) -> None: for metric in metrics_for_reversal_lower: for column in ranks_per_metric_detailed.columns: if metric in column: + print(f"Reversing ranks for metric: {metric} in column: {column}") ranks_per_metric_detailed[column] = ( ranks_per_metric_detailed[column].max() + 1 @@ -245,7 +284,7 @@ def rank_methods(self) -> None: # Create a DataFrame from the dictionary of per-subject average ranks per_subject_avg_ranks = pd.DataFrame(subject_avg_rank_data) - + self.per_subject_avg_ranks = per_subject_avg_ranks # Calculate the cumulative and final ranks cumulative_rank_column = per_subject_avg_ranks.sum(axis=1) final_rank_column = cumulative_rank_column.rank( @@ -259,11 +298,10 @@ def rank_methods(self) -> None: final_rank_column.rename("final_rank"), cumulative_rank_column.rename("cumulative_rank"), per_subject_avg_ranks, - ranks_per_metric_detailed, ], axis=1, ) - + print("Finished ranking methods.") # Perform the permutation test to determine the statistical significance self.perform_permutation_test() @@ -272,6 +310,7 @@ def perform_permutation_test(self) -> None: Performs a permutation test to determine the statistical significance of the ranks. This test is performed on the detailed rank data. """ + print("Performing permutation test...") n_methods = len(self.ranks_per_metric) self.pvals = np.zeros((n_methods, n_methods)) ranks_per_metric_sorted = self.ranks_per_metric.sort_values( @@ -279,7 +318,9 @@ def perform_permutation_test(self) -> None: ).reset_index(drop=True) # Select only the detailed rank columns for the test - ranks_per_metric_sanitized = ranks_per_metric_sorted[self.detailed_rank_columns] + ranks_per_metric_sanitized = ranks_per_metric_sorted[ + self.per_subject_avg_ranks.columns + ] # Use a process pool to parallelize the p-value calculations with ProcessPoolExecutor(max_workers=self.n_jobs) as executor: @@ -289,7 +330,11 @@ def perform_permutation_test(self) -> None: arr_i = ranks_per_metric_sanitized.iloc[i].to_numpy() arr_j = ranks_per_metric_sanitized.iloc[j].to_numpy() future = executor.submit( - _calculate_pval_for_pair, arr_i, arr_j, self.n_iterations + _calculate_pval_for_pair, + arr_i, + arr_j, + self.n_iterations, + self.log_permutations, ) future_to_indices[future] = (i, j) @@ -308,7 +353,7 @@ def perform_permutation_test(self) -> None: self.pvals[i, j] = formatted_pval self.pvals[j, i] = formatted_pval except Exception as exc: - print(f"Pair ({i}, {j}) generated an exception: {exc}") + logger.error(f"Pair ({i}, {j}) generated an exception: {exc}") # Create a DataFrame from the p-values self.pvals_df = pd.DataFrame( @@ -316,6 +361,7 @@ def perform_permutation_test(self) -> None: columns=ranks_per_metric_sorted["method"], index=ranks_per_metric_sorted["method"], ) + print("Finished permutation test.") def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: """ @@ -329,6 +375,7 @@ def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - The final rankings of the methods. - The p-values from the permutation test. """ + print("Retrieving rankings and p-values.") ranks_df = self.ranks_per_metric.sort_values(by="final_rank").reset_index( drop=True ) @@ -339,5 +386,5 @@ def get_rankings_and_pvals(self) -> Tuple[pd.DataFrame, pd.DataFrame]: columns=self.pvals_df.columns, index=self.pvals_df.index, ).reindex(index=ranks_df["method"], columns=ranks_df["method"]) - + print("Successfully retrieved rankings and p-values.") return ranks_df, pvals_upper_df From 80ab2be1a52ae835dc315b10ee9291bd64893b12 Mon Sep 17 00:00:00 2001 From: Siddhesh Thakur Date: Thu, 4 Sep 2025 21:21:47 -0400 Subject: [PATCH 11/11] code working as expected --- pyranker/ranker.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pyranker/ranker.py b/pyranker/ranker.py index 4d383af..9d7817e 100644 --- a/pyranker/ranker.py +++ b/pyranker/ranker.py @@ -236,9 +236,9 @@ def rank_methods(self) -> None: Ranks the methods based on the metrics using a two-step aggregation process. First, it ranks the methods for each metric and subject combination. - Then, it calculates the average rank for each method across all metrics - for each subject. Finally, it sums up the per-subject average ranks to - get a cumulative rank for each method, which is then used to determine + Then, it calculates the cumulative rank for each method across all metrics + for each subject. Finally, it sums up the per-subject cumulative ranks to + get a total cumulative rank for each method, which is then used to determine the final ranking. """ print("Ranking methods...") @@ -268,25 +268,25 @@ def rank_methods(self) -> None: verification_df.to_csv(verification_path, index=False) print(f"Saved detailed verification ranks to: {verification_path}") - # Create a dictionary to hold the per-subject average ranks - subject_avg_rank_data = {} + # Create a dictionary to hold the per-subject cumulative ranks + subject_cumulative_rank_data = {} subject_ids_sorted = sorted(list(self.all_subject_ids)) - # Calculate the average rank for each method for each subject + # Calculate the cumulative rank for each method for each subject for subject in subject_ids_sorted: subject_cols = [ col for col in self.detailed_rank_columns if col.endswith(f"_{subject}") ] if subject_cols: - subject_avg_rank_data[f"{subject}_avg_rank"] = ( - ranks_per_metric_detailed[subject_cols].mean(axis=1) + subject_cumulative_rank_data[f"{subject}_cumulative_rank"] = ( + ranks_per_metric_detailed[subject_cols].sum(axis=1) ) - # Create a DataFrame from the dictionary of per-subject average ranks - per_subject_avg_ranks = pd.DataFrame(subject_avg_rank_data) - self.per_subject_avg_ranks = per_subject_avg_ranks + # Create a DataFrame from the dictionary of per-subject cumulative ranks + per_subject_cumulative_ranks = pd.DataFrame(subject_cumulative_rank_data) + self.per_subject_cumulative_ranks = per_subject_cumulative_ranks # Calculate the cumulative and final ranks - cumulative_rank_column = per_subject_avg_ranks.sum(axis=1) + cumulative_rank_column = per_subject_cumulative_ranks.sum(axis=1) final_rank_column = cumulative_rank_column.rank( method="average", ascending=True ) @@ -297,7 +297,7 @@ def rank_methods(self) -> None: self.metrics_per_subject["method"], final_rank_column.rename("final_rank"), cumulative_rank_column.rename("cumulative_rank"), - per_subject_avg_ranks, + per_subject_cumulative_ranks, ], axis=1, ) @@ -319,7 +319,7 @@ def perform_permutation_test(self) -> None: # Select only the detailed rank columns for the test ranks_per_metric_sanitized = ranks_per_metric_sorted[ - self.per_subject_avg_ranks.columns + self.per_subject_cumulative_ranks.columns ] # Use a process pool to parallelize the p-value calculations