From 8bc72d49abebcc32465df968506dd983de051509 Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 09:16:44 +0200 Subject: [PATCH 1/8] Add remote train scripts --- .gitignore | 1 + .../dev_scripts/Dockerfile.cellseg3d | 37 +++++ .../dev_scripts/remote_training.py | 130 ++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d create mode 100644 napari_cellseg3d/dev_scripts/remote_training.py diff --git a/.gitignore b/.gitignore index 4b52bed6..9e08da2e 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,4 @@ cov.syspath.txt #include docs images !docs/source/logo/* !docs/source/images/* +napari_cellseg3d/dev_scripts/wandb diff --git a/napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d b/napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d new file mode 100644 index 00000000..e57b9a52 --- /dev/null +++ b/napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d @@ -0,0 +1,37 @@ +FROM nvidia/cuda:11.7.0-runtime-ubuntu20.04 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update -yy +RUN apt-get install -yy --no-install-recommends \ + git curl wget build-essential libhdf5-dev \ + libgl1-mesa-glx libglib2.0-0 software-properties-common + +ENV PYTHON_VERSION 3.8 +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-cache policy python$${PYTHON_VERSION} +RUN apt-get install -yy --no-install-recommends \ + python${PYTHON_VERSION} \ + python3-pip \ + python${PYTHON_VERSION}-dev + +RUN apt-get clean +RUN rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir torch==2.0.0 --extra-index-url https://download.pytorch.org/whl/cu117 + +RUN apt-get update -yy \ + && apt-get install -yy git \ + && apt-get install -yy vim + +RUN git clone git+https://github.com/AdaptiveMotorControlLab/CellSeg3d@cy/jupyter-books-docs \ + && cd CellSeg3d \ + && pip3 install -e .[wandb] + +# create user session +RUN useradd -ms /bin/bash cyril +USER cyril +WORKDIR /home/cellseg3d + + +# docker build -f Dockerfile.cellseg3d -t cyril/cellseg3d . +# docker run -it --rm --gpus device=3 --shm-size=4gb -v "$(pwd)":/workspace/cellseg3d_results --name CellSeg3D-GPU3 diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py new file mode 100644 index 00000000..857690de --- /dev/null +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -0,0 +1,130 @@ +"""Showcases how to train a model without napari.""" + +from pathlib import Path + +from napari_cellseg3d import config as cfg +from napari_cellseg3d.code_models.worker_training import ( + SupervisedTrainingWorker, +) +from napari_cellseg3d.utils import LOGGER as logger + +RESULTS_PATH = Path.home() / "workspace/cellseg3d_results" +TRAINING_SPLIT = 0.8 # 0.4, 0.2 +MODEL_NAME = "SwinUNetR" # SegResNet +# BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 +BATCH_SIZE = 1 + + +IMAGES = ( + Path.home() / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15" +) +LABELS = ( + Path.home() + / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15/labels" +) + + +class LogFixture: + """Fixture for napari-less logging, replaces napari_cellseg3d.interface.Log in model_workers. + + This allows to redirect the output of the workers to stdout instead of a specialized widget. + """ + + def __init__(self): + """Creates a LogFixture object.""" + super(LogFixture, self).__init__() + + def print_and_log(self, text, printing=None): + """Prints and logs text.""" + print(text) + + def warn(self, warning): + """Logs warning.""" + logger.warning(warning) + + def error(self, e): + """Logs error.""" + raise (e) + + +def prepare_data(images_path, labels_path): + """Prepares data for training.""" + assert images_path.exists(), f"Images path does not exist: {images_path}" + assert labels_path.exists(), f"Labels path does not exist: {labels_path}" + + images = sorted(Path.glob(images_path, "*.tif")) + labels = sorted(Path.glob(labels_path, "*.tif")) + + print(f"Images paths: {images}") + print(f"Labels paths: {labels}") + + logger.info("Images :\n") + for file in images: + logger.info(Path(file).name) + logger.info("*" * 10) + logger.info("Labels :\n") + for file in images: + logger.info(Path(file).name) + + assert len(images) == len( + labels + ), "Number of images and labels must be the same" + + return [ + {"image": str(image_path), "label": str(label_path)} + for image_path, label_path in zip(images, labels) + ] + + +def remote_training(): + """Function to train a model without napari.""" + # print(f"Results path: {RESULTS_PATH.resolve()}") + + wandb_config = cfg.WandBConfig( + mode="online", # "online", + save_model_artifact=True, + ) + + deterministic_config = cfg.DeterministicConfig( + seed=34936339, + ) + + worker_config = cfg.SupervisedTrainingWorkerConfig( + device="cuda:0", + max_epochs=50, + learning_rate=0.001, # 1e-3 + validation_interval=2, + batch_size=BATCH_SIZE, # 10 for SegResNet + deterministic_config=deterministic_config, + scheduler_factor=0.5, + scheduler_patience=10, # use default scheduler + weights_info=cfg.WeightsInfo(), # no pretrained weights + # results_path_folder=str(RESULTS_PATH), + sampling=False, + do_augmentation=True, + train_data_dict=prepare_data(IMAGES, LABELS), + # supervised specific + model_info=cfg.ModelInfo( + name=MODEL_NAME, + model_input_size=(64, 64, 64), + ), + loss_function="Generalized Dice", + training_percent=TRAINING_SPLIT, + ) + + worker = SupervisedTrainingWorker(worker_config) + worker.wandb_config = wandb_config + ######### SET LOG + log = LogFixture() + worker.log_signal.connect(log.print_and_log) + worker.warn_signal.connect(log.warn) + worker.error_signal.connect(log.error) + + results = [] + for result in worker.train(): + results.append(result) + print("Training finished") + + +if __name__ == "__main__": + results = remote_training() From 512b72c797f892b101064d0cbddddf9b249c3c1d Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 09:25:49 +0200 Subject: [PATCH 2/8] mv dockerfile --- {napari_cellseg3d/dev_scripts => docker}/Dockerfile.cellseg3d | 1 + napari_cellseg3d/dev_scripts/remote_training.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) rename {napari_cellseg3d/dev_scripts => docker}/Dockerfile.cellseg3d (93%) diff --git a/napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d b/docker/Dockerfile.cellseg3d similarity index 93% rename from napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d rename to docker/Dockerfile.cellseg3d index e57b9a52..9a0d93f9 100644 --- a/napari_cellseg3d/dev_scripts/Dockerfile.cellseg3d +++ b/docker/Dockerfile.cellseg3d @@ -1,3 +1,4 @@ +# original file by Steffen Schneider https://github.com/stes/docker/tree/main FROM nvidia/cuda:11.7.0-runtime-ubuntu20.04 ENV DEBIAN_FRONTEND=noninteractive diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py index 857690de..ba146211 100644 --- a/napari_cellseg3d/dev_scripts/remote_training.py +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -20,7 +20,7 @@ ) LABELS = ( Path.home() - / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15/labels" + / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15/labels/semantic" ) From afa4b3ccb13c36be0c753f26f81b0a3fc4b95495 Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 09:38:00 +0200 Subject: [PATCH 3/8] Paths for remote train --- napari_cellseg3d/dev_scripts/remote_training.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py index ba146211..135c9329 100644 --- a/napari_cellseg3d/dev_scripts/remote_training.py +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -8,19 +8,21 @@ ) from napari_cellseg3d.utils import LOGGER as logger -RESULTS_PATH = Path.home() / "workspace/cellseg3d_results" -TRAINING_SPLIT = 0.8 # 0.4, 0.2 +RESULTS_PATH = Path("data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" +TRAINING_SPLIT = 0.2 # 0.4, 0.2 MODEL_NAME = "SwinUNetR" # SegResNet -# BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 -BATCH_SIZE = 1 +BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 +# BATCH_SIZE = 1 +SPLIT_FOLDER = "1_c15" # "2_c1_c4_visual" "3_c1245_visual" IMAGES = ( - Path.home() / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15" + Path("data/cyril") + / f"CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/{SPLIT_FOLDER}" ) LABELS = ( - Path.home() - / "Desktop/Code/CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/1_c15/labels/semantic" + Path("data/cyril") + / f"CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/{SPLIT_FOLDER}/labels/semantic" ) From 2a11150caaa1e4aafedc59dada0518b34aef0c04 Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 09:39:47 +0200 Subject: [PATCH 4/8] Change device --- napari_cellseg3d/dev_scripts/remote_training.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py index 135c9329..15197894 100644 --- a/napari_cellseg3d/dev_scripts/remote_training.py +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -8,14 +8,14 @@ ) from napari_cellseg3d.utils import LOGGER as logger -RESULTS_PATH = Path("data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" -TRAINING_SPLIT = 0.2 # 0.4, 0.2 MODEL_NAME = "SwinUNetR" # SegResNet -BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 -# BATCH_SIZE = 1 - +TRAINING_SPLIT = 0.2 # 0.4, 0.2 SPLIT_FOLDER = "1_c15" # "2_c1_c4_visual" "3_c1245_visual" +DEVICE = "cuda:3" +BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 +# BATCH_SIZE = 1 +RESULTS_PATH = Path("data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" IMAGES = ( Path("data/cyril") / f"CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/{SPLIT_FOLDER}" @@ -92,7 +92,7 @@ def remote_training(): ) worker_config = cfg.SupervisedTrainingWorkerConfig( - device="cuda:0", + device=DEVICE, max_epochs=50, learning_rate=0.001, # 1e-3 validation_interval=2, From 723f309da8ba66ad0434db55d4e0b4208751fc6a Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 09:51:04 +0200 Subject: [PATCH 5/8] Update remote_training.py --- .../dev_scripts/remote_training.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py index 15197894..a39469cb 100644 --- a/napari_cellseg3d/dev_scripts/remote_training.py +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -8,20 +8,20 @@ ) from napari_cellseg3d.utils import LOGGER as logger +RESULTS_PATH = Path("/data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" +TRAINING_SPLIT = 0.2 # 0.4, 0.8 MODEL_NAME = "SwinUNetR" # SegResNet -TRAINING_SPLIT = 0.2 # 0.4, 0.2 -SPLIT_FOLDER = "1_c15" # "2_c1_c4_visual" "3_c1245_visual" -DEVICE = "cuda:3" - BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 # BATCH_SIZE = 1 -RESULTS_PATH = Path("data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" + +SPLIT_FOLDER = "1_c15" # "2_c1_c4_visual" "3_c1245_visual" + IMAGES = ( - Path("data/cyril") + Path("/data/cyril") / f"CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/{SPLIT_FOLDER}" ) LABELS = ( - Path("data/cyril") + Path("/data/cyril") / f"CELLSEG_BENCHMARK/TPH2_mesospim/SPLITS/{SPLIT_FOLDER}/labels/semantic" ) @@ -53,6 +53,8 @@ def prepare_data(images_path, labels_path): """Prepares data for training.""" assert images_path.exists(), f"Images path does not exist: {images_path}" assert labels_path.exists(), f"Labels path does not exist: {labels_path}" + if not RESULTS_PATH.exists(): + RESULTS_PATH.mkdir(parents=True, exist_ok=True) images = sorted(Path.glob(images_path, "*.tif")) labels = sorted(Path.glob(labels_path, "*.tif")) @@ -83,7 +85,7 @@ def remote_training(): # print(f"Results path: {RESULTS_PATH.resolve()}") wandb_config = cfg.WandBConfig( - mode="online", # "online", + mode="online", save_model_artifact=True, ) @@ -92,7 +94,7 @@ def remote_training(): ) worker_config = cfg.SupervisedTrainingWorkerConfig( - device=DEVICE, + device="cuda:0", max_epochs=50, learning_rate=0.001, # 1e-3 validation_interval=2, @@ -101,7 +103,7 @@ def remote_training(): scheduler_factor=0.5, scheduler_patience=10, # use default scheduler weights_info=cfg.WeightsInfo(), # no pretrained weights - # results_path_folder=str(RESULTS_PATH), + results_path_folder=str(RESULTS_PATH), sampling=False, do_augmentation=True, train_data_dict=prepare_data(IMAGES, LABELS), From 91cb18b0c5ad2bb51c343a7dae1e7fab5bfd03af Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 10:02:39 +0200 Subject: [PATCH 6/8] Change results path to include more info --- napari_cellseg3d/dev_scripts/remote_training.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/napari_cellseg3d/dev_scripts/remote_training.py b/napari_cellseg3d/dev_scripts/remote_training.py index a39469cb..e2d05d12 100644 --- a/napari_cellseg3d/dev_scripts/remote_training.py +++ b/napari_cellseg3d/dev_scripts/remote_training.py @@ -8,13 +8,17 @@ ) from napari_cellseg3d.utils import LOGGER as logger -RESULTS_PATH = Path("/data/cyril") / "CELLSEG_BENCHMARK/cellseg3d_train" TRAINING_SPLIT = 0.2 # 0.4, 0.8 -MODEL_NAME = "SwinUNetR" # SegResNet +MODEL_NAME = "SegResNet" # "SwinUNetR" BATCH_SIZE = 10 if MODEL_NAME == "SegResNet" else 5 # BATCH_SIZE = 1 SPLIT_FOLDER = "1_c15" # "2_c1_c4_visual" "3_c1245_visual" +RESULTS_PATH = ( + Path("/data/cyril") + / "CELLSEG_BENCHMARK/cellseg3d_train" + / f"{MODEL_NAME}_{SPLIT_FOLDER}_{int(TRAINING_SPLIT*100)}" +) IMAGES = ( Path("/data/cyril") From e4d4a70bc3f9426b245346ddde9f5c0f692a7c79 Mon Sep 17 00:00:00 2001 From: C-Achard Date: Tue, 17 Oct 2023 10:14:40 +0200 Subject: [PATCH 7/8] Update colab_training.py --- napari_cellseg3d/dev_scripts/colab_training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/napari_cellseg3d/dev_scripts/colab_training.py b/napari_cellseg3d/dev_scripts/colab_training.py index 090031d0..2af0568f 100644 --- a/napari_cellseg3d/dev_scripts/colab_training.py +++ b/napari_cellseg3d/dev_scripts/colab_training.py @@ -82,7 +82,7 @@ def __init__( ) self.dice_metric = DiceMetric( - include_background=True, reduction="mean", get_not_nans=False + include_background=False, reduction="mean", get_not_nans=False ) self.normalize_function = utils.remap_image self.start_time = time.time() From 989eadbe29a7970407a5a4f004a00a94ca61a24e Mon Sep 17 00:00:00 2001 From: C-Achard Date: Wed, 8 Nov 2023 09:47:37 +0100 Subject: [PATCH 8/8] Delete Dockerfile.cellseg3d --- docker/Dockerfile.cellseg3d | 38 ------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 docker/Dockerfile.cellseg3d diff --git a/docker/Dockerfile.cellseg3d b/docker/Dockerfile.cellseg3d deleted file mode 100644 index 9a0d93f9..00000000 --- a/docker/Dockerfile.cellseg3d +++ /dev/null @@ -1,38 +0,0 @@ -# original file by Steffen Schneider https://github.com/stes/docker/tree/main -FROM nvidia/cuda:11.7.0-runtime-ubuntu20.04 -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update -yy -RUN apt-get install -yy --no-install-recommends \ - git curl wget build-essential libhdf5-dev \ - libgl1-mesa-glx libglib2.0-0 software-properties-common - -ENV PYTHON_VERSION 3.8 -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt-cache policy python$${PYTHON_VERSION} -RUN apt-get install -yy --no-install-recommends \ - python${PYTHON_VERSION} \ - python3-pip \ - python${PYTHON_VERSION}-dev - -RUN apt-get clean -RUN rm -rf /var/lib/apt/lists/* - -RUN pip install --no-cache-dir torch==2.0.0 --extra-index-url https://download.pytorch.org/whl/cu117 - -RUN apt-get update -yy \ - && apt-get install -yy git \ - && apt-get install -yy vim - -RUN git clone git+https://github.com/AdaptiveMotorControlLab/CellSeg3d@cy/jupyter-books-docs \ - && cd CellSeg3d \ - && pip3 install -e .[wandb] - -# create user session -RUN useradd -ms /bin/bash cyril -USER cyril -WORKDIR /home/cellseg3d - - -# docker build -f Dockerfile.cellseg3d -t cyril/cellseg3d . -# docker run -it --rm --gpus device=3 --shm-size=4gb -v "$(pwd)":/workspace/cellseg3d_results --name CellSeg3D-GPU3