From d6e59cde4f5db74c29e00f414761f5e0ad00ce05 Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Tue, 27 Aug 2024 15:35:23 -0400
Subject: [PATCH 01/12] Fix transpose and patch coords bug

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 monai/apps/vista3d/sampler.py  | 15 ++++++++-------
 monai/networks/nets/vista3d.py |  7 +++++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index b7aeb89a2e..7bc091f013 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -58,16 +58,16 @@ def sample_prompt_pairs(
         labels: [1, 1, H, W, D], ground truth labels.
         label_set: the label list for the specific dataset. Note if 0 is included in label_set,
             it will be added into automatic branch training. Recommend removing 0 from label_set
-            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset.
-            The reason is region with 0 in one partially labeled dataset may contain foregrounds in
-            another dataset.
+            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset. 
+            The reason is region with 0 in one partially labeled dataset may contain foregrounds in 
+            another dataset. 
         max_prompt: int, max number of total prompt, including foreground and background.
         max_foreprompt: int, max number of prompt from foreground.
         max_backprompt: int, max number of prompt from background.
         max_point: maximum number of points for each object.
         include_background: if include 0 into training prompt. If included, background 0 is treated
-            the same as foreground. Always be False for multi-partial-dataset training. If needed,
-            can be true for finetuning specific dataset, .
+            the same as foreground and points will be sampled. Can be true only if user want to segment 
+            background 0 with point clicks, otherwise always be false. 
         drop_label_prob: probability to drop label prompt.
         drop_point_prob: probability to drop point prompt.
         point_sampler: sampler to augment masks with supervoxel.
@@ -76,12 +76,13 @@ def sample_prompt_pairs(
     Returns:
         label_prompt: [B, 1]. The classes used for training automatic segmentation.
         point: [B, N, 3]. The corresponding points for each class.
-        Note that background label prompt requires matching point as well ([0,0,0] is used).
+            Note that background label prompt requires matching point as well ([0,0,0] is used).
         point_label: [B, N]. The corresponding point labels for each point (negative or positive).
-        -1 is used for padding the background label prompt and will be ignored.
+            -1 is used for padding the background label prompt and will be ignored.
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
         label_prompt can be None, and prompt_class is used to identify point classes.
     """
+
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")
diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 9148e36542..979a090df0 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -336,11 +336,11 @@ def set_auto_grad(self, auto_freeze: bool = False, point_freeze: bool = False):
     def forward(
         self,
         input_images: torch.Tensor,
+        patch_coords: Sequence[slice] | None = None,
         point_coords: torch.Tensor | None = None,
         point_labels: torch.Tensor | None = None,
         class_vector: torch.Tensor | None = None,
         prompt_class: torch.Tensor | None = None,
-        patch_coords: Sequence[slice] | None = None,
         labels: torch.Tensor | None = None,
         label_set: Sequence[int] | None = None,
         prev_mask: torch.Tensor | None = None,
@@ -421,7 +421,10 @@ def forward(
                     point_coords, point_labels = None, None
 
         if point_coords is None and class_vector is None:
-            return self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            logits = self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            if transpose:
+                logits = logits.transpose(1, 0)
+            return logits
 
         if self.image_embeddings is not None and kwargs.get("keep_cache", False) and class_vector is None:
             out, out_auto = self.image_embeddings, None

From 1b119766144a9d3bfea417fc456f75c5857f0bac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 19:38:26 +0000
Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/vista3d/sampler.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 7bc091f013..6ede500997 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -58,16 +58,16 @@ def sample_prompt_pairs(
         labels: [1, 1, H, W, D], ground truth labels.
         label_set: the label list for the specific dataset. Note if 0 is included in label_set,
             it will be added into automatic branch training. Recommend removing 0 from label_set
-            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset. 
-            The reason is region with 0 in one partially labeled dataset may contain foregrounds in 
-            another dataset. 
+            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset.
+            The reason is region with 0 in one partially labeled dataset may contain foregrounds in
+            another dataset.
         max_prompt: int, max number of total prompt, including foreground and background.
         max_foreprompt: int, max number of prompt from foreground.
         max_backprompt: int, max number of prompt from background.
         max_point: maximum number of points for each object.
         include_background: if include 0 into training prompt. If included, background 0 is treated
-            the same as foreground and points will be sampled. Can be true only if user want to segment 
-            background 0 with point clicks, otherwise always be false. 
+            the same as foreground and points will be sampled. Can be true only if user want to segment
+            background 0 with point clicks, otherwise always be false.
         drop_label_prob: probability to drop label prompt.
         drop_point_prob: probability to drop point prompt.
         point_sampler: sampler to augment masks with supervoxel.

From a4920306489eb6b2c6f90ec38ca5ea57c55f8daf Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:45:05 +0800
Subject: [PATCH 03/12] fix doc build

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 6ede500997..ff9f99542e 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -80,7 +80,7 @@ def sample_prompt_pairs(
         point_label: [B, N]. The corresponding point labels for each point (negative or positive).
             -1 is used for padding the background label prompt and will be ignored.
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
-        label_prompt can be None, and prompt_class is used to identify point classes.
+            label_prompt can be None, and prompt_class is used to identify point classes.
     """
 
     # class label number

From 67341661fb0866e8f8d1cb26f7317c2943a1146a Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:55:47 +0800
Subject: [PATCH 04/12] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ff9f99542e..05c69b0307 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -22,6 +22,7 @@
 
 __all__ = ["sample_prompt_pairs"]
 
+
 ENABLE_SPECIAL = True
 SPECIAL_INDEX = (23, 24, 25, 26, 27, 57, 128)
 MERGE_LIST = {
@@ -82,7 +83,6 @@ def sample_prompt_pairs(
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
             label_prompt can be None, and prompt_class is used to identify point classes.
     """
-
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")

From fc9d4c3b9b89ac63080084da5acb96869d24126e Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:24:40 +0800
Subject: [PATCH 05/12] fix doc

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 05c69b0307..21a0ac54b5 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -20,9 +20,6 @@
 import torch
 from torch import Tensor
 
-__all__ = ["sample_prompt_pairs"]
-
-
 ENABLE_SPECIAL = True
 SPECIAL_INDEX = (23, 24, 25, 26, 27, 57, 128)
 MERGE_LIST = {
@@ -31,6 +28,8 @@
     132: [57],  # overlap with trachea merge into airway
 }
 
+__all__ = ["sample_prompt_pairs"]
+
 
 def _get_point_label(id: int) -> tuple[int, int]:
     if id in SPECIAL_INDEX and ENABLE_SPECIAL:
@@ -83,6 +82,7 @@ def sample_prompt_pairs(
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
             label_prompt can be None, and prompt_class is used to identify point classes.
     """
+
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")

From 4f781aacf7b62df38b803fb68f5aa6ea70763862 Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:41:32 +0800
Subject: [PATCH 06/12] fix doc

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 docs/requirements.txt         | 1 -
 monai/apps/vista3d/sampler.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index ff94f7b6de..fc72be6b9e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -35,7 +35,6 @@ pydicom
 h5py
 nni; platform_system == "Linux"
 optuna
-opencv-python-headless
 onnx>=1.13.0
 onnxruntime; python_version <= '3.10'
 zarr
diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 21a0ac54b5..80927f190e 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -37,7 +37,6 @@ def _get_point_label(id: int) -> tuple[int, int]:
     else:
         return 0, 1
 
-
 def sample_prompt_pairs(
     labels: Tensor,
     label_set: Sequence[int],

From d643d703b653f565b8b6c7023ea74d82e669ef2f Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:19:29 +0800
Subject: [PATCH 07/12] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 docs/requirements.txt         |  2 ++
 monai/apps/vista3d/sampler.py | 20 +++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index fc72be6b9e..7307d8e5f9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -35,9 +35,11 @@ pydicom
 h5py
 nni; platform_system == "Linux"
 optuna
+opencv-python-headless
 onnx>=1.13.0
 onnxruntime; python_version <= '3.10'
 zarr
 huggingface_hub
 pyamg>=5.0.0
 packaging
+polygraphy
diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 80927f190e..ec84fafcf6 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -73,13 +73,19 @@ def sample_prompt_pairs(
         point_sampler_kwargs: arguments for point_sampler.
 
     Returns:
-        label_prompt: [B, 1]. The classes used for training automatic segmentation.
-        point: [B, N, 3]. The corresponding points for each class.
-            Note that background label prompt requires matching point as well ([0,0,0] is used).
-        point_label: [B, N]. The corresponding point labels for each point (negative or positive).
-            -1 is used for padding the background label prompt and will be ignored.
-        prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
-            label_prompt can be None, and prompt_class is used to identify point classes.
+        tuple:
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+              training automatic segmentation.
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
+              for each class. Note that background label prompts require matching points as well 
+              (e.g., [0, 0, 0] is used).
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
+              labels for each point (negative or positive). -1 is used for padding the background 
+              label prompt and will be ignored.
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
+              for label indexing during training. If label_prompt is None, prompt_class is used to 
+              identify point classes.
+
     """
 
     # class label number

From af53f6fe6f56ddc563e7c9de78c6ec256a4940b9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 05:20:02 +0000
Subject: [PATCH 08/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/vista3d/sampler.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ec84fafcf6..c1429e1d53 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -74,16 +74,16 @@ def sample_prompt_pairs(
 
     Returns:
         tuple:
-            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for
               training automatic segmentation.
-            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
-              for each class. Note that background label prompts require matching points as well 
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points
+              for each class. Note that background label prompts require matching points as well
               (e.g., [0, 0, 0] is used).
-            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
-              labels for each point (negative or positive). -1 is used for padding the background 
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point
+              labels for each point (negative or positive). -1 is used for padding the background
               label prompt and will be ignored.
-            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
-              for label indexing during training. If label_prompt is None, prompt_class is used to 
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt
+              for label indexing during training. If label_prompt is None, prompt_class is used to
               identify point classes.
 
     """

From 3e0d115d93fe22779c02ab2c91a1bae2dafc3773 Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:11:59 +0800
Subject: [PATCH 09/12] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ec84fafcf6..17b2d34911 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -37,6 +37,7 @@ def _get_point_label(id: int) -> tuple[int, int]:
     else:
         return 0, 1
 
+
 def sample_prompt_pairs(
     labels: Tensor,
     label_set: Sequence[int],
@@ -74,16 +75,16 @@ def sample_prompt_pairs(
 
     Returns:
         tuple:
-            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for
               training automatic segmentation.
-            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
-              for each class. Note that background label prompts require matching points as well 
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points
+              for each class. Note that background label prompts require matching points as well
               (e.g., [0, 0, 0] is used).
-            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
-              labels for each point (negative or positive). -1 is used for padding the background 
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point
+              labels for each point (negative or positive). -1 is used for padding the background
               label prompt and will be ignored.
-            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
-              for label indexing during training. If label_prompt is None, prompt_class is used to 
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt
+              for label indexing during training. If label_prompt is None, prompt_class is used to
               identify point classes.
 
     """

From 1d287ef152045b5939486de704df5655d1307fd1 Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Fri, 30 Aug 2024 11:07:21 -0400
Subject: [PATCH 10/12] Fix patch coords bug

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 monai/apps/vista3d/inferer.py  |  2 +-
 monai/networks/nets/vista3d.py | 15 +++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/monai/apps/vista3d/inferer.py b/monai/apps/vista3d/inferer.py
index 709f81f624..8f622ef6cd 100644
--- a/monai/apps/vista3d/inferer.py
+++ b/monai/apps/vista3d/inferer.py
@@ -100,7 +100,7 @@ def point_based_window_inferer(
                         point_labels=point_labels,
                         class_vector=class_vector,
                         prompt_class=prompt_class,
-                        patch_coords=unravel_slice,
+                        patch_coords=[unravel_slice],
                         prev_mask=prev_mask,
                         **kwargs,
                     )
diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 979a090df0..57c46c2e08 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -336,7 +336,7 @@ def set_auto_grad(self, auto_freeze: bool = False, point_freeze: bool = False):
     def forward(
         self,
         input_images: torch.Tensor,
-        patch_coords: Sequence[slice] | None = None,
+        patch_coords: list[Sequence[slice]] | None = None,
         point_coords: torch.Tensor | None = None,
         point_labels: torch.Tensor | None = None,
         class_vector: torch.Tensor | None = None,
@@ -364,8 +364,11 @@ def forward(
                 the points are for zero-shot or supported class. When class_vector and point_coords are both
                 provided, prompt_class is the same as class_vector. For prompt_class[b] > 512, point_coords[b]
                 will be considered novel class.
-            patch_coords: a sequence of the python slice objects representing the patch coordinates during sliding window inference.
-                This value is passed from sliding_window_inferer. This is an indicator for training phase or validation phase.
+            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window 
+                inference. This value is passed from sliding_window_inferer. This is an indicator for training phase or validation phase.
+                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude 
+                coordinates of multiple patches. If point prompts are included, the batch size can only be one and all the
+                functions using patch_coords will by default use patch_coords[0].
             labels: [1, 1, H, W, D], the groundtruth label tensor, only used for point-only evaluation
             label_set: the label index matching the indexes in labels. If labels are mapped to global index using RelabelID,
                 this label_set should be global mapped index. If labels are not mapped to global index, e.g. in zero-shot
@@ -395,14 +398,14 @@ def forward(
                 if val_point_sampler is None:
                     # TODO: think about how to refactor this part.
                     val_point_sampler = self.sample_points_patch_val
-                point_coords, point_labels, prompt_class = val_point_sampler(labels, patch_coords, label_set)
+                point_coords, point_labels, prompt_class = val_point_sampler(labels, patch_coords[0], label_set)
                 if prompt_class[0].item() == 0:  # type: ignore
                     point_labels[0] = -1  # type: ignore
                 labels, prev_mask = None, None
             elif point_coords is not None:
                 # If not performing patch-based point only validation, use user provided click points for inference.
                 # the point clicks is in original image space, convert it to current patch-coordinate space.
-                point_coords, point_labels = self.update_point_to_patch(patch_coords, point_coords, point_labels)  # type: ignore
+                point_coords, point_labels = self.update_point_to_patch(patch_coords[0], point_coords, point_labels)  # type: ignore
 
         if point_coords is not None and point_labels is not None:
             # remove points that used for padding purposes (point_label = -1)
@@ -455,7 +458,7 @@ def forward(
             logits[mapping_index] = self.point_head(out, point_coords, point_labels, class_vector=prompt_class)
             if prev_mask is not None and patch_coords is not None:
                 logits = self.connected_components_combine(
-                    prev_mask[patch_coords].transpose(1, 0).to(logits.device),
+                    prev_mask[patch_coords[0]].transpose(1, 0).to(logits.device),
                     logits[mapping_index],
                     point_coords,  # type: ignore
                     point_labels,  # type: ignore

From 7f4ae69aec75eac03f8731af2c86ece8d7312ac4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 04:43:29 +0000
Subject: [PATCH 11/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/networks/nets/vista3d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 57c46c2e08..42ae0983b4 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -364,9 +364,9 @@ def forward(
                 the points are for zero-shot or supported class. When class_vector and point_coords are both
                 provided, prompt_class is the same as class_vector. For prompt_class[b] > 512, point_coords[b]
                 will be considered novel class.
-            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window 
+            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window
                 inference. This value is passed from sliding_window_inferer. This is an indicator for training phase or validation phase.
-                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude 
+                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude
                 coordinates of multiple patches. If point prompts are included, the batch size can only be one and all the
                 functions using patch_coords will by default use patch_coords[0].
             labels: [1, 1, H, W, D], the groundtruth label tensor, only used for point-only evaluation

From a22efe40df6c335e1f79d12b2cc5836c235cedf6 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Mon, 2 Sep 2024 04:49:50 +0000
Subject: [PATCH 12/12] fix flake8

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 monai/networks/nets/vista3d.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 57c46c2e08..4215a9a594 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -364,9 +364,10 @@ def forward(
                 the points are for zero-shot or supported class. When class_vector and point_coords are both
                 provided, prompt_class is the same as class_vector. For prompt_class[b] > 512, point_coords[b]
                 will be considered novel class.
-            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window 
-                inference. This value is passed from sliding_window_inferer. This is an indicator for training phase or validation phase.
-                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude 
+            patch_coords: a list of sequence of the python slice objects representing the patch coordinates during sliding window
+                inference. This value is passed from sliding_window_inferer.
+                This is an indicator for training phase or validation phase.
+                Notice for sliding window batch size > 1 (only supported by automatic segmentation), patch_coords will inlcude
                 coordinates of multiple patches. If point prompts are included, the batch size can only be one and all the
                 functions using patch_coords will by default use patch_coords[0].
             labels: [1, 1, H, W, D], the groundtruth label tensor, only used for point-only evaluation