diff --git a/.gitignore b/.gitignore index bfb58cb..59ad75f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ Task_1/venv/* +Task_1/my_code +Task_1/ucb_r* Task_1/fets_challenge/__pycache__/* +*pycache* +Task_1/cert* diff --git a/Task_1/.gitignore b/Task_1/.gitignore new file mode 100644 index 0000000..f3220d1 --- /dev/null +++ b/Task_1/.gitignore @@ -0,0 +1,8 @@ +FeTS_Challenge-flair.py +FeTS_Challenge_FedPOD_partitioning2.py +FeTS_Challenge_RL.py +FeTS_Challenge_RecEng.py +FeTS_Challenge_leonardklausman.py +*final_submission* +build* +cert* diff --git a/Task_1/FeTS_Challenge.py b/Task_1/FeTS_Challenge.py index 94d7598..9d1525f 100644 --- a/Task_1/FeTS_Challenge.py +++ b/Task_1/FeTS_Challenge.py @@ -14,7 +14,7 @@ import os import numpy as np - +from pathlib import Path from fets_challenge import run_challenge_experiment @@ -518,15 +518,23 @@ def FedAvgM_Selection(local_tensors, # to those you specify immediately above. Changing the below value to False will change # this fact, excluding the three hausdorff measurements. As hausdorff distance is # expensive to compute, excluding them will speed up your experiments. -include_validation_with_hausdorff=True +include_validation_with_hausdorff=True # IF SET TO FALSE WHOLE CODE RUNS SEGMENTATION FOR SOME REASON # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create # other partitionings to test your changes for generalization to multiple partitionings. #institution_split_csv_filename = 'partitioning_1.csv' -institution_split_csv_filename = 'small_split.csv' +#institution_split_csv_filename = 'partitioning_1.csv' + + +# Dynamically get working directory or environment variable +data_root = Path.cwd() / 'datasets' / 'MICCAI_FeTS2022_TrainingData' +institution_split_csv_filename = '/home/locolinux2/datasets/MICCAI_FeTS2022_TrainingData/partitioning_2.csv' +institution_split_csv_filename = data_root / 'sanity_partitioning.csv' + # change this to point to the parent directory of the data -brats_training_data_parent_dir = '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData' +data_root = Path.cwd() / 'datasets' / 'MICCAI_FeTS2022_TrainingData' +brats_training_data_parent_dir = Path.cwd() / 'datasets' / 'MICCAI_FeTS2022_Resized' # increase this if you need a longer history for your algorithms # decrease this if you need to reduce system RAM consumption @@ -537,7 +545,7 @@ def FedAvgM_Selection(local_tensors, # you'll want to increase this most likely. You can set it as high as you like, # however, the experiment will exit once the simulated time exceeds one week. -rounds_to_train = 5 +rounds_to_train = 20 # (bool) Determines whether checkpoints should be saved during the experiment. # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled @@ -579,7 +587,6 @@ def FedAvgM_Selection(local_tensors, from fets_challenge import model_outputs_to_disc -from pathlib import Path # infer participant home folder home = str(Path.home()) diff --git a/Task_1/README.md b/Task_1/README.md index 15c8b13..ab77b29 100644 --- a/Task_1/README.md +++ b/Task_1/README.md @@ -20,18 +20,18 @@ Please ask any additional questions in our discussion pages on our github site a 2. ```git clone https://github.com/FETS-AI/Challenge.git``` 3. ```cd Challenge/Task_1``` 4. ```git lfs pull``` -5. Create virtual environment (python 3.6-3.8): using Anaconda, a new environment can be created and activated using the following commands: +5. Create virtual environment (python 3.9): using Anaconda, a new environment can be created and activated using the following commands: ```sh ## create venv in specific path - conda create -p ./venv python=3.7 -y + conda create -p ./venv python=3.9 -y conda activate ./venv ``` 6. ```pip install --upgrade pip``` -7. Install Pytorch LTS (1.8.2) for your system (use CUDA 11): - ```pip3 install torch==1.8.2 torchvision==0.9.2 torchaudio==0.8.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu111``` -*Note all previous versions of pytorch can be found in [these instructions]([https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/previous-versions/)) -9. Set the environment variable `SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) -10. ```pip install .``` +7. Install Pytorch (2.3.1) for your system: + ```pip install torch==2.3.1 torchvision==0.18.1``` +*Note all previous versions of pytorch can be found in [this link](https://pytorch.org/get-started/previous-versions/#v231). +8. Set the environment variable `SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True` (to avoid sklearn deprecation error) +9. ```pip install .``` > * _Note: if you run into ```ERROR: Failed building wheel for SimpleITK```, try running ```pip install SimpleITK --only-binary :all:``` then rerunning ```pip install .```_ 10. ```python FeTS_Challenge.py``` > * _Note_: if you run into ```ImportError: /home/locolinux/FETS2024/fets2024env/bin/../lib/libstdc++.so.6: version GLIBCXX_3.4.30' not found (required by /home/locolinux/FETS2024/fets2024env/lib/python3.7/site-packages/SimpleITK/_SimpleITK.so)```, try installing a previous version of SimpleITK (version 2.2.0 works) diff --git a/Task_1/fets_challenge/custom_aggregation_wrapper.py b/Task_1/fets_challenge/custom_aggregation_wrapper.py index ae7abc4..63472a6 100644 --- a/Task_1/fets_challenge/custom_aggregation_wrapper.py +++ b/Task_1/fets_challenge/custom_aggregation_wrapper.py @@ -1,4 +1,4 @@ -from openfl.component.aggregation_functions.experimental import PrivilegedAggregationFunction +from openfl.interface.aggregation_functions.experimental import PrivilegedAggregationFunction # extends the openfl agg func interface to include challenge-relevant information diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py index f561e66..4b9c5e7 100644 --- a/Task_1/fets_challenge/experiment.py +++ b/Task_1/fets_challenge/experiment.py @@ -4,6 +4,7 @@ # Patrick Foley (Intel), Micah Sheller (Intel) import os +from sys import path, exit import warnings from collections import namedtuple from copy import copy @@ -13,12 +14,13 @@ import numpy as np import pandas as pd -from openfl.utilities import split_tensor_dict_for_holdouts, TensorKey +from openfl.utilities import TensorKey +from openfl.utilities.split import split_tensor_dict_for_holdouts from openfl.protocols import utils import openfl.native as fx import torch -from .gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions +from .gandlf_csv_adapter import construct_fedsim_csv, extract_segmentation_csv_partitions, extract_classification_csv_partitions from .custom_aggregation_wrapper import CustomAggregationWrapper from .checkpoint_utils import setup_checkpoint_folder, save_checkpoint, load_checkpoint @@ -214,6 +216,28 @@ def compute_times_per_collaborator(collaborator_names, times[col] = time return times +def split_tensor_dict_into_floats_and_non_floats(tensor_dict): + """ + Split the tensor dictionary into float and non-floating point values. + + Splits a tensor dictionary into float and non-float values. + + Args: + tensor_dict: A dictionary of tensors + + Returns: + Two dictionaries: the first contains all of the floating point tensors + and the second contains all of the non-floating point tensors + + """ + float_dict = {} + non_float_dict = {} + for k, v in tensor_dict.items(): + if np.issubdtype(v.dtype, np.floating): + float_dict[k] = v + else: + non_float_dict[k] = v + return float_dict, non_float_dict def get_metric(metric, fl_round, tensor_db): metric_name = metric @@ -231,12 +255,10 @@ def run_challenge_experiment(aggregation_function, save_checkpoints=True, restore_from_checkpoint_folder=None, include_validation_with_hausdorff=True, - use_pretrained_model=True): + use_pretrained_model=False): fx.init('fets_challenge_workspace') - from sys import path, exit - file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() @@ -264,9 +286,10 @@ def run_challenge_experiment(aggregation_function, # Update the plan if necessary plan = fx.update_plan(overrides) + print("****Debugging: plan is", plan) if not include_validation_with_hausdorff: - plan.config['task_runner']['settings']['fets_config_dict']['metrics'] = ['dice','dice_per_label'] + plan.config['task_runner']['settings']['gandlf_config']['metrics'] = ['dice','dice_per_label'] # Overwrite collaborator names plan.authorized_cols = collaborator_names @@ -274,15 +297,29 @@ def run_challenge_experiment(aggregation_function, for col in collaborator_names: plan.cols_data_paths[col] = col + # # Update the plan's data loader template for each collaborator + # correct_template = "openfl.federated.data.loader_gandlf" + + # # Modify the plan's data loader settings if needed + # plan.config['data_loader'][col] = correct_template + # get the data loaders for each collaborator collaborator_data_loaders = {col: copy(plan).get_data_loader(col) for col in collaborator_names} - transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + # Check the task type and use the appropriate function + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'segmentation': + transformed_csv_dict = extract_segmentation_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + elif plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'classification': + transformed_csv_dict = extract_classification_csv_partitions(os.path.join(work, 'gandlf_paths.csv')) + else: + raise ValueError("Invalid problem type. Expected 'segmentation' or 'classification'.") + # get the task runner, passing the first data loader for col in collaborator_data_loaders: #Insert logic to serialize train / val CSVs here - transformed_csv_dict[col]['train'].to_csv(os.path.join(work, 'seg_test_train.csv')) - transformed_csv_dict[col]['val'].to_csv(os.path.join(work, 'seg_test_val.csv')) + os.makedirs(os.path.join(work, col), exist_ok=True) + transformed_csv_dict[col]['train'].to_csv(os.path.join(work, col, 'train.csv')) + transformed_csv_dict[col]['val'].to_csv(os.path.join(work, col, 'valid.csv')) task_runner = copy(plan).get_task_runner(collaborator_data_loaders[col]) if use_pretrained_model: @@ -327,27 +364,27 @@ def run_challenge_experiment(aggregation_function, logger.info('Starting experiment') total_simulated_time = 0 - best_dice = -1.0 - best_dice_over_time_auc = 0 + best_score = -1.0 + best_score_over_time_auc = 0 # results dataframe data experiment_results = { 'round':[], 'time': [], 'convergence_score': [], - 'round_dice': [], - 'dice_label_0': [], - 'dice_label_1': [], - 'dice_label_2': [], - 'dice_label_4': [], + 'round_score': [], + # 'dice_label_0': [], + # 'dice_label_1': [], + # 'dice_label_2': [], + # 'dice_label_4': [], } - if include_validation_with_hausdorff: - experiment_results.update({ - 'hausdorff95_label_0': [], - 'hausdorff95_label_1': [], - 'hausdorff95_label_2': [], - 'hausdorff95_label_4': [], - }) + # if include_validation_with_hausdorff: + # experiment_results.update({ + # 'hausdorff95_label_0': [], + # 'hausdorff95_label_1': [], + # 'hausdorff95_label_2': [], + # 'hausdorff95_label_4': [], + # }) if restore_from_checkpoint_folder is None: @@ -364,7 +401,7 @@ def run_challenge_experiment(aggregation_function, checkpoint_folder = restore_from_checkpoint_folder [loaded_collaborator_names, starting_round_num, collaborator_time_stats, - total_simulated_time, best_dice, best_dice_over_time_auc, + total_simulated_time, best_score, best_score_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, experiment_results, summary, agg_tensor_db] = state @@ -402,6 +439,7 @@ def run_challenge_experiment(aggregation_function, collaborator_times_per_round) learning_rate, epochs_per_round = hparams + # learning_rate, epochs_per_round, _ = hparams #IrfanKhan if (epochs_per_round is None): logger.warning('Hyper-parameter function warning: function returned None for "epochs_per_round". Setting "epochs_per_round" to 1') @@ -464,30 +502,53 @@ def run_challenge_experiment(aggregation_function, # get the performace validation scores for the round - round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) - dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) - dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) - dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) - dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) - if include_validation_with_hausdorff: - hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) - hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) - hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) - hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) - - # update best score - if best_dice < round_dice: - best_dice = round_dice - # Set the weights for the final model - if round_num == 0: - # here the initial model was validated (temp model does not exist) - logger.info(f'Skipping best model saving to disk as it is a random initialization.') - elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): - raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') - else: - # here the temp model was the one validated - shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') - logger.info(f'Saved model with best average binary DICE: {best_dice} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'segmentation': + round_dice = get_metric('valid_dice', round_num, aggregator.tensor_db) + # dice_label_0 = get_metric('valid_dice_per_label_0', round_num, aggregator.tensor_db) + # dice_label_1 = get_metric('valid_dice_per_label_1', round_num, aggregator.tensor_db) + # dice_label_2 = get_metric('valid_dice_per_label_2', round_num, aggregator.tensor_db) + # dice_label_4 = get_metric('valid_dice_per_label_4', round_num, aggregator.tensor_db) + # if include_validation_with_hausdorff: + # hausdorff95_label_0 = get_metric('valid_hd95_per_label_0', round_num, aggregator.tensor_db) + # hausdorff95_label_1 = get_metric('valid_hd95_per_label_1', round_num, aggregator.tensor_db) + # hausdorff95_label_2 = get_metric('valid_hd95_per_label_2', round_num, aggregator.tensor_db) + # hausdorff95_label_4 = get_metric('valid_hd95_per_label_4', round_num, aggregator.tensor_db) + + # update best score + if best_score < round_dice: + best_score = round_dice + # Set the weights for the final model + if round_num == 0: + # here the initial model was validated (temp model does not exist) + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + # here the temp model was the one validated + shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl',dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary DICE: {best_score} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + round_score = round_dice + + if plan.config['task_runner']['settings']['gandlf_config']['problem_type'] == 'classification': + round_f1 = get_metric('valid_f1', round_num, aggregator.tensor_db) + + # update best score + if best_score < round_f1: + best_score = round_f1 + # Set the weights for the final model + if round_num == 0: + # here the initial model was validated (temp model does not exist) + logger.info(f'Skipping best model saving to disk as it is a random initialization.') + elif not os.path.exists(f'checkpoint/{checkpoint_folder}/temp_model.pkl'): + raise ValueError(f'Expected temporary model at: checkpoint/{checkpoint_folder}/temp_model.pkl to exist but it was not found.') + else: + # here the temp model was the one validated + shutil.copyfile(src=f'checkpoint/{checkpoint_folder}/temp_model.pkl', dst=f'checkpoint/{checkpoint_folder}/best_model.pkl') + logger.info(f'Saved model with best average binary F1: {best_score} to ~/.local/workspace/checkpoint/{checkpoint_folder}/best_model.pkl') + + round_score = round_f1 + ## RUN VALIDATION ON INTERMEDIATE CONSENSUS MODEL # set the task_runner data loader @@ -495,41 +556,41 @@ def run_challenge_experiment(aggregation_function, ## CONVERGENCE METRIC COMPUTATION # update the auc score - best_dice_over_time_auc += best_dice * round_time + best_score_over_time_auc += best_score * round_time # project the auc score as remaining time * best dice # this projection assumes that the current best score is carried forward for the entire week - projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_dice + best_dice_over_time_auc + projected_auc = (MAX_SIMULATION_TIME - total_simulated_time) * best_score + best_score_over_time_auc projected_auc /= MAX_SIMULATION_TIME # End of round summary summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num) summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2)) summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc) - summary += "\n\tDICE Label 0: {}".format(dice_label_0) - summary += "\n\tDICE Label 1: {}".format(dice_label_1) - summary += "\n\tDICE Label 2: {}".format(dice_label_2) - summary += "\n\tDICE Label 4: {}".format(dice_label_4) - if include_validation_with_hausdorff: - summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) - summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) - summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) - summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) + # summary += "\n\tDICE Label 0: {}".format(dice_label_0) + # summary += "\n\tDICE Label 1: {}".format(dice_label_1) + # summary += "\n\tDICE Label 2: {}".format(dice_label_2) + # summary += "\n\tDICE Label 4: {}".format(dice_label_4) + # if include_validation_with_hausdorff: + # summary += "\n\tHausdorff95 Label 0: {}".format(hausdorff95_label_0) + # summary += "\n\tHausdorff95 Label 1: {}".format(hausdorff95_label_1) + # summary += "\n\tHausdorff95 Label 2: {}".format(hausdorff95_label_2) + # summary += "\n\tHausdorff95 Label 4: {}".format(hausdorff95_label_4) experiment_results['round'].append(round_num) experiment_results['time'].append(total_simulated_time) experiment_results['convergence_score'].append(projected_auc) - experiment_results['round_dice'].append(round_dice) - experiment_results['dice_label_0'].append(dice_label_0) - experiment_results['dice_label_1'].append(dice_label_1) - experiment_results['dice_label_2'].append(dice_label_2) - experiment_results['dice_label_4'].append(dice_label_4) - if include_validation_with_hausdorff: - experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) - experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) - experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) - experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) + experiment_results['round_score'].append(round_score) + # experiment_results['dice_label_0'].append(dice_label_0) + # experiment_results['dice_label_1'].append(dice_label_1) + # experiment_results['dice_label_2'].append(dice_label_2) + # experiment_results['dice_label_4'].append(dice_label_4) + # if include_validation_with_hausdorff: + # experiment_results['hausdorff95_label_0'].append(hausdorff95_label_0) + # experiment_results['hausdorff95_label_1'].append(hausdorff95_label_1) + # experiment_results['hausdorff95_label_2'].append(hausdorff95_label_2) + # experiment_results['hausdorff95_label_4'].append(hausdorff95_label_4) logger.info(summary) if save_checkpoints: @@ -538,8 +599,8 @@ def run_challenge_experiment(aggregation_function, save_checkpoint(checkpoint_folder, aggregator, collaborator_names, collaborators, round_num, collaborator_time_stats, - total_simulated_time, best_dice, - best_dice_over_time_auc, + total_simulated_time, best_score, + best_score_over_time_auc, collaborators_chosen_each_round, collaborator_times_per_round, experiment_results, diff --git a/Task_1/fets_challenge/gandlf_csv_adapter.py b/Task_1/fets_challenge/gandlf_csv_adapter.py index cafad78..59c60d6 100644 --- a/Task_1/fets_challenge/gandlf_csv_adapter.py +++ b/Task_1/fets_challenge/gandlf_csv_adapter.py @@ -5,7 +5,7 @@ # Patrick Foley (Intel) # Micah Sheller (Intel) -import os +import os, sys import numpy as np import pandas as pd @@ -108,6 +108,7 @@ def construct_fedsim_csv(pardir, split_subdirs_path, percent_train, federated_simulation_train_val_csv_path, + problem_type, training_and_validation=True): # read in the csv defining the subdirs per institution @@ -159,15 +160,56 @@ def construct_fedsim_csv(pardir, train_val_headers=train_val_headers, numeric_header_name_to_key=numeric_header_name_to_key) else: - df = construct_validation_dataframe(paths_dict=paths_dict, - val_headers=val_headers, - numeric_header_name_to_key=numeric_header_name_to_key) + if problem_type == "classification": + df = construct_validation_dataframe_classification(paths_dict=paths_dict, + val_headers=val_headers, + numeric_header_name_to_key=numeric_header_name_to_key) + elif problem_type == "segmentation": + df = construct_validation_dataframe_segmentation(paths_dict=paths_dict, + val_headers=val_headers, + numeric_header_name_to_key=numeric_header_name_to_key) return df df.to_csv(federated_simulation_train_val_csv_path, index=False) return list(sorted(df.Partition_ID.unique())) -def construct_validation_dataframe(paths_dict, val_headers, numeric_header_name_to_key): + +def construct_validation_dataframe_classification(paths_dict, val_headers, numeric_header_name_to_key): + # Define a mapping for channel labels + channel_label_mapping = { + 'Channel_0': 0, # t1 + 'Channel_1': 1, # t2 + 'Channel_2': 2, # flair + 'Channel_3': 3 # t1ce + } + + # Initialize list to store rows in the new format + rows = [] + + for inst_name, inst_paths_dict in paths_dict.items(): + for usage in ['train', 'val']: + for key_to_fpath in inst_paths_dict[usage]: + subject_id = key_to_fpath['Subject_ID'] + + # Iterate through each channel to create a separate row for each + for header in val_headers: + if header != 0: # Skip SubjectID, as it's handled separately + channel_key = f"Channel_{header - 1}" # Map header to 'Channel_0', 'Channel_1', etc. + channel_path = key_to_fpath[numeric_header_name_to_key[header]] + value_to_predict = channel_label_mapping[channel_key] + + # Append a row with the final headers format + rows.append({ + 'SubjectID': subject_id, + 'Channel': channel_path, + 'ValueToPredict': value_to_predict + }) + + # Convert the list of rows into a DataFrame + df = pd.DataFrame(rows, dtype=str) + return df + +def construct_validation_dataframe_segmentation(paths_dict, val_headers, numeric_header_name_to_key): # intitialize columns columns = {str(header): [] for header in val_headers} @@ -193,9 +235,7 @@ def construct_validation_dataframe(paths_dict, val_headers, numeric_header_name_ '4': 'Channel_3'}) return df - - -def extract_csv_partitions(csv_path): +def extract_segmentation_csv_partitions(csv_path): df = pd.read_csv(csv_path) df = df.rename(columns={'0': 'SubjectID', '1': 'Channel_0', '2': 'Channel_1', '3': 'Channel_2', @@ -210,5 +250,70 @@ def extract_csv_partitions(csv_path): transformed_csv_dict[str(col)]['val'] = \ df[(df['Partition_ID'] == col) & (df['TrainOrVal'] == 'val')].drop(columns=['TrainOrVal','Partition_ID']) + # Prints for easy debugging + # print(f"\n=== Sample of Partition {col} - Train Data ===") + # transformed_csv_dict[str(col)]['train'].head(10).to_csv(sys.stdout, index=False) + + # print(f"\n=== Sample of Partition {col} - Validation Data ===") + # transformed_csv_dict[str(col)]['val'].head(10).to_csv(sys.stdout, index=False) + return transformed_csv_dict +def extract_classification_csv_partitions(csv_path): + df = pd.read_csv(csv_path) + df = df.rename(columns={'0': 'SubjectID', '1': 'Channel_0', + '2': 'Channel_1', '3': 'Channel_2', + '4': 'Channel_3', '5': 'Label'}) + + cols = df['Partition_ID'].unique() + transformed_csv_dict = {} + + # Define a mapping for channel labels + channel_label_mapping = { + 'Channel_0': 0, # t1 + 'Channel_1': 1, # t2 + 'Channel_2': 2, # flair + 'Channel_3': 3 # t1ce + } + + for col in cols: + transformed_csv_dict[str(col)] = {} + + # Create lists for train and val partitions + train_list = [] + val_list = [] + + # Filter rows by partition + for _, row in df[df['Partition_ID'] == col].iterrows(): + subject_id = row['SubjectID'] + train_or_val = row['TrainOrVal'] + + # Iterate through the channels (up to 4 channels) + for channel_name, channel_index in channel_label_mapping.items(): + channel_path = row[channel_name] + + # Create a row for the CSV output with the correct channel label + row_dict = { + 'SubjectID': subject_id, + 'Channel': channel_path, + 'ValueToPredict': channel_index # Correct label (0-3 for t1, t2, flair, t1ce) + } + + # Add row to the correct partition list + if train_or_val == 'train': + train_list.append(row_dict) + else: + val_list.append(row_dict) + + # Convert lists to DataFrames for train and val + transformed_csv_dict[str(col)]['train'] = pd.DataFrame(train_list) + transformed_csv_dict[str(col)]['val'] = pd.DataFrame(val_list) + + # # Prints for easy debugging + print(f"\n=== Sample of Partition {col} - Train Data ===") + transformed_csv_dict[str(col)]['train'].head(10).to_csv(sys.stdout, index=False) + + print(f"\n=== Sample of Partition {col} - Validation Data ===") + transformed_csv_dict[str(col)]['val'].head(10).to_csv(sys.stdout, index=False) + + return transformed_csv_dict diff --git a/Task_1/fets_challenge/inference.py b/Task_1/fets_challenge/inference.py index 13f0680..560bbf6 100644 --- a/Task_1/fets_challenge/inference.py +++ b/Task_1/fets_challenge/inference.py @@ -71,7 +71,7 @@ def get_binarized_and_belief(array, threshold=0.5): return binarized, belief -def generate_validation_csv(data_path, validation_csv_filename, working_dir): +def generate_validation_csv(data_path, validation_csv_filename, working_dir, problem_type): """ Create the validation CSV to be consumed by the FeTSChallengeTaskRunner """ @@ -80,8 +80,10 @@ def generate_validation_csv(data_path, validation_csv_filename, working_dir): validation_csv_path, 0.0, 'placeholder', - training_and_validation=False) - validation_csv_dict.to_csv(os.path.join(working_dir, 'validation_paths.csv'),index=False) + training_and_validation=False, + problem_type=problem_type) + os.makedirs(os.path.join(working_dir, 'inference_col'), exist_ok=True) + validation_csv_dict.to_csv(os.path.join(working_dir, 'inference_col', 'valid.csv'),index=False) def replace_initializations(done_replacing, array, mask, replacement_value, initialization_value): """ @@ -204,6 +206,7 @@ def model_outputs_to_disc(data_path, validation_csv, output_path, native_model_path, + problem_type, outputtag='', device='cpu'): @@ -218,7 +221,7 @@ def model_outputs_to_disc(data_path, path.append(str(root)) path.insert(0, str(work)) - generate_validation_csv(data_path,validation_csv, working_dir=work) + generate_validation_csv(data_path,validation_csv, working_dir=work, problem_type=problem_type) overrides = { 'task_runner.settings.device': device, @@ -228,12 +231,13 @@ def model_outputs_to_disc(data_path, # Update the plan if necessary plan = fx.update_plan(overrides) - plan.config['task_runner']['settings']['fets_config_dict']['save_output'] = True - plan.config['task_runner']['settings']['fets_config_dict']['output_dir'] = output_path + plan.config['task_runner']['settings']['gandlf_config']['save_output'] = True + plan.config['task_runner']['settings']['gandlf_config']['output_dir'] = output_path # overwrite datapath value for a single 'InferenceCol' collaborator - plan.cols_data_paths['InferenceCol'] = data_path - + # plan.cols_data_paths['InferenceCol'] = data_path + plan.cols_data_paths['InferenceCol'] = 'inference_col' + # get the inference data loader data_loader = copy(plan).get_data_loader('InferenceCol') diff --git a/Task_1/generate_predictions.py b/Task_1/generate_predictions.py deleted file mode 100644 index 872a62a..0000000 --- a/Task_1/generate_predictions.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # FeTS Challenge -# -# Contributing Authors (alphabetical order): -# - Brandon Edwards (Intel) -# - Patrick Foley (Intel) -# - Micah Sheller (Intel) - -from fets_challenge import model_outputs_to_disc -from pathlib import Path -import os -from sys import path -from fets_challenge.gandlf_csv_adapter import construct_fedsim_csv, extract_csv_partitions - -device='cpu' - -# infer participant home folder -home = str(Path.home()) - -# you will need to specify the correct experiment folder and the parent directory for -# the data you want to run inference over -checkpoint_folder='experiment_1' -#data_path = -data_path = '/raid/datasets/FeTS22/MICCAI_FeTS2022_ValidationData' - -# you can keep these the same if you wish -best_model_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'best_model.pkl') -outputs_path = os.path.join(home, '.local/workspace/checkpoint', checkpoint_folder, 'model_outputs') - -validation_csv_filename='validation.csv' - - -# Using this best model, we can now produce NIfTI files for model outputs -# using a provided data directory - -model_outputs_to_disc(data_path=data_path, - validation_csv=validation_csv_filename, - output_path=outputs_path, - native_model_path=best_model_path, - outputtag='', - device=device) diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml index ca4476c..912c614 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml +++ b/Task_1/openfl-workspace/fets_challenge_workspace/plan/plan.yaml @@ -21,7 +21,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : openfl.federated.data.loader_fets_challenge.FeTSChallengeDataLoaderWrapper + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper settings : feature_shape : [32, 32, 32] @@ -31,14 +31,14 @@ task_runner : train_csv : seg_test_train.csv val_csv : seg_test_val.csv device : cpu - fets_config_dict : + gandlf_config : batch_size: 1 - clip_grad: null - clip_mode: null + clip_mode: norm + clip_grad: 0.1 data_augmentation: {} data_postprocessing: {} data_preprocessing: - normalize: null + normalize: None enable_padding: false in_memory: false inference_mechanism : @@ -50,8 +50,8 @@ task_runner : output_dir: '.' metrics: - dice - - dice_per_label - - hd95_per_label + # - dice_per_label + # - hd95_per_label model: amp: true architecture: resunet @@ -63,7 +63,7 @@ task_runner : - 4 dimension: 3 final_layer: softmax - ignore_label_validation: null + ignore_label_validation: None norm_type: instance nested_training: testing: 1 @@ -92,9 +92,10 @@ task_runner : track_memory_usage: false verbose: false version: - maximum: 0.0.14 + maximum: 0.1.0 minimum: 0.0.14 weighted_loss: true + modality: rad network : diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv b/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv new file mode 100644 index 0000000..e1701a7 --- /dev/null +++ b/Task_1/openfl-workspace/fets_challenge_workspace/smaller_split.csv @@ -0,0 +1,7 @@ +Partition_ID,Subject_ID +2,FeTS2022_01412 +2,FeTS2022_01415 +2,FeTS2022_01411 +3,FeTS2022_01439 +3,FeTS2022_01435 +3,FeTS2022_01434 diff --git a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py index 3794be6..66a0c82 100644 --- a/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py +++ b/Task_1/openfl-workspace/fets_challenge_workspace/src/fets_challenge_model.py @@ -8,10 +8,10 @@ import numpy as np import torch as pt -from openfl.utilities import split_tensor_dict_for_holdouts +from openfl.utilities.split import split_tensor_dict_for_holdouts from openfl.utilities import TensorKey -from openfl.federated.task.runner_fets_challenge import * +from openfl.federated.task.runner_gandlf import * from GANDLF.compute.generic import create_pytorch_objects from GANDLF.compute.training_loop import train_network @@ -19,7 +19,7 @@ from . import TRAINING_HPARAMS -class FeTSChallengeModel(FeTSChallengeTaskRunner): +class FeTSChallengeModel(GaNDLFTaskRunner): """FeTSChallenge Model class for Federated Learning.""" def validate(self, col_name, round_num, input_tensor_dict, diff --git a/Task_1/plans/cla_plan.yaml b/Task_1/plans/cla_plan.yaml new file mode 100644 index 0000000..ceb1b85 --- /dev/null +++ b/Task_1/plans/cla_plan.yaml @@ -0,0 +1,130 @@ +# Copyright (C) 2022 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/fets_seg_test_init.pbuf + best_state_path : save/fets_seg_test_best.pbuf + last_state_path : save/fets_seg_test_last.pbuf + rounds_to_train : 3 + write_logs : true + + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper + settings : + feature_shape : [128,128,64] #[32,32,32] + +task_runner : + template : src.fets_challenge_model.FeTSChallengeModel + settings : + # train_csv : cla_test_train.csv + # val_csv : cla_test_val.csv + device : cpu + gandlf_config : + problem_type: classification #segmentation, classification + # label: None + # clip_mode: norm + # clip_grad: 0.1 + output_dir: '.' + + batch_size: 8 + data_augmentation: {} + data_preprocessing: {} + in_memory: false + learning_rate: 0.001 + loss_function: cel + metrics: + - classification_accuracy + - recall + - precision + - f1 + modality: rad + model: + amp: false + onnx_export: false + architecture: densenet121 + base_filters: 16 + class_list: + - '0' + - '1' + - '2' + - '3' + dimension: 3 + final_layer: softmax + num_channels: 1 + norm_type: instance + weighted_loss: false + num_epochs: 500 + nested_training: + testing: 1 + validation: -4 ## these are 4 because there is one site (i.e., Site3) that has only 4 samples. + optimizer: adam + patch_sampler: uniform + patch_size: + - 128 + - 128 + - 64 + patience: 100 + q_max_length: 100 + q_num_workers: 0 + q_samples_per_volume: 40 + q_verbose: false + save_masks: false + scheduler: triangle + version: + maximum: 0.1.0 + minimum: 0.0.14 + + +network : + defaults : plan/defaults/network.yaml + +assigner: + template : src.challenge_assigner.FeTSChallengeAssigner + settings : + training_tasks : + - aggregated_model_validation + - train + - locally_tuned_model_validation + validation_tasks : + - aggregated_model_validation + +tasks : + aggregated_model_validation: + function : validate + kwargs : + apply : global + metrics : + - valid_loss + - valid_dice + + locally_tuned_model_validation: + function : validate + kwargs : + apply: local + metrics : + - valid_loss + - valid_dice + + train: + function : train + kwargs : + metrics : + - loss + - train_dice + epochs : 1 + + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/Task_1/plans/seg_plan.yaml b/Task_1/plans/seg_plan.yaml new file mode 100644 index 0000000..912c614 --- /dev/null +++ b/Task_1/plans/seg_plan.yaml @@ -0,0 +1,141 @@ +# Copyright (C) 2022 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/fets_seg_test_init.pbuf + best_state_path : save/fets_seg_test_best.pbuf + last_state_path : save/fets_seg_test_last.pbuf + rounds_to_train : 3 + write_logs : true + + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : openfl.federated.data.loader_gandlf.GaNDLFDataLoaderWrapper + settings : + feature_shape : [32, 32, 32] + +task_runner : + template : src.fets_challenge_model.FeTSChallengeModel + settings : + train_csv : seg_test_train.csv + val_csv : seg_test_val.csv + device : cpu + gandlf_config : + batch_size: 1 + clip_mode: norm + clip_grad: 0.1 + data_augmentation: {} + data_postprocessing: {} + data_preprocessing: + normalize: None + enable_padding: false + in_memory: false + inference_mechanism : + grid_aggregator_overlap: crop + patch_overlap: 0 + learning_rate: 0.001 + loss_function: dc + medcam_enabled: false + output_dir: '.' + metrics: + - dice + # - dice_per_label + # - hd95_per_label + model: + amp: true + architecture: resunet + base_filters: 32 + class_list: + - 0 + - 1 + - 2 + - 4 + dimension: 3 + final_layer: softmax + ignore_label_validation: None + norm_type: instance + nested_training: + testing: 1 + validation: -5 + num_epochs: 1 + optimizer: + type: sgd + parallel_compute_command: '' + patch_sampler: label + patch_size: + - 64 + - 64 + - 64 + patience: 100 + pin_memory_dataloader: false + print_rgb_label_warning: true + q_max_length: 100 + q_num_workers: 0 + q_samples_per_volume: 40 + q_verbose: false + save_output: false + save_training: false + scaling_factor: 1 + scheduler: + type: triangle_modified + track_memory_usage: false + verbose: false + version: + maximum: 0.1.0 + minimum: 0.0.14 + weighted_loss: true + modality: rad + + +network : + defaults : plan/defaults/network.yaml + +assigner: + template : src.challenge_assigner.FeTSChallengeAssigner + settings : + training_tasks : + - aggregated_model_validation + - train + - locally_tuned_model_validation + validation_tasks : + - aggregated_model_validation + +tasks : + aggregated_model_validation: + function : validate + kwargs : + apply : global + metrics : + - valid_loss + - valid_dice + + locally_tuned_model_validation: + function : validate + kwargs : + apply: local + metrics : + - valid_loss + - valid_dice + + train: + function : train + kwargs : + metrics : + - loss + - train_dice + epochs : 1 + + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/Task_1/setup.py b/Task_1/setup.py index 1ff561d..721304d 100644 --- a/Task_1/setup.py +++ b/Task_1/setup.py @@ -28,11 +28,11 @@ ], include_package_data=True, install_requires=[ - 'openfl @ git+https://github.com/intel/openfl.git@f4b28d710e2be31cdfa7487fdb4e8cb3a1387a5f', - 'GANDLF @ git+https://github.com/CBICA/GaNDLF.git@e4d0d4bfdf4076130817001a98dfb90189956278', + 'openfl @ git+https://github.com/securefederatedai/openfl.git@84cff481b1ec8c92fc88c759ab5b8b6f950f9ca4', + 'GANDLF @ git+https://github.com/mlcommons/GaNDLF.git@0.1.0', 'fets @ git+https://github.com/FETS-AI/Algorithms.git@fets_challenge', ], - python_requires='>=3.6, <3.9', + python_requires='>=3.9', classifiers=[ 'Environment :: Console', # How mature is this project? Common values are @@ -46,9 +46,6 @@ 'License :: OSI Approved :: FETS UI License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3' ] ) diff --git a/Task_1/utils/inspect_pickled_model.py b/Task_1/utils/inspect_pickled_model.py new file mode 100644 index 0000000..4481f74 --- /dev/null +++ b/Task_1/utils/inspect_pickled_model.py @@ -0,0 +1,25 @@ +import pickle +from pathlib import Path + +experiment_number = 109 # change to your experiment number + +pickle_file_path = Path.cwd() / '.local' / 'workspace' / 'checkpoint' / f'experiment_{experiment_number}' / 'best_model.pkl' + + +# Function to load and inspect the pickle file +def load_pickle(file_path): + try: + with open(file_path, 'rb') as f: + model_data = pickle.load(f) + print("Pickle file loaded successfully.") + return model_data + except Exception as e: + print(f"Error loading pickle file: {e}") + return None + +# Load the model +model_data = load_pickle(pickle_file_path) + +# Inspect the model (print relevant information) +if model_data: + print("Model Data: ", model_data)