diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index a387c77ebd..367a24cbde 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -26,7 +26,7 @@ jobs:
     steps:
     - if: runner.os == 'windows'
       name: Config pagefile (Windows only)
-      uses: al-cheb/configure-pagefile-action@v1.3
+      uses: al-cheb/configure-pagefile-action@v1.4
       with:
         minimum-size: 8GB
         maximum-size: 16GB
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 792fda5279..0f9e6cd480 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -19,18 +19,18 @@ jobs:
           - "PTLATEST+CUDA121"
         include:
           # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
-          - environment: PT191+CUDA113
-            pytorch: "torch==1.9.1 torchvision==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu113"
-            base: "nvcr.io/nvidia/pytorch:21.06-py3"  # CUDA 11.3
           - environment: PT110+CUDA113
             pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu113"
             base: "nvcr.io/nvidia/pytorch:21.06-py3"  # CUDA 11.3
           - environment: PT113+CUDA113
             pytorch: "torch==1.13.1 torchvision==0.14.1 --extra-index-url https://download.pytorch.org/whl/cu113"
             base: "nvcr.io/nvidia/pytorch:21.06-py3"  # CUDA 11.3
-          - environment: PTLATEST+CUDA121
-            pytorch: "-U torch torchvision --extra-index-url https://download.pytorch.org/whl/cu118"
+          - environment: PT113+CUDA122
+            pytorch: "torch==1.13.1 torchvision==0.14.1 --extra-index-url https://download.pytorch.org/whl/cu121"
             base: "nvcr.io/nvidia/pytorch:23.08-py3"  # CUDA 12.2
+          - environment: PTLATEST+CUDA124
+            pytorch: "-U torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121"
+            base: "nvcr.io/nvidia/pytorch:24.03-py3"  # CUDA 12.4
     container:
       image: ${{ matrix.base }}
       options: "--gpus all"
@@ -76,7 +76,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:22.10", "pytorch:23.08"]
+        container: ["pytorch:23.08", "pytorch:24.03"]
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -121,7 +121,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:23.08"]
+        container: ["pytorch:24.03"]
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -221,7 +221,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     needs: cron-gpu  # so that monai itself is verified first
     container:
-      image: nvcr.io/nvidia/pytorch:23.08-py3  # testing with the latest pytorch base image
+      image: nvcr.io/nvidia/pytorch:24.03-py3  # testing with the latest pytorch base image
       options: "--gpus all --ipc=host"
     runs-on: [self-hosted, linux, x64, integration]
     steps:
diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml
index a6d7981814..f83d52f8e3 100644
--- a/.github/workflows/pythonapp-gpu.yml
+++ b/.github/workflows/pythonapp-gpu.yml
@@ -29,10 +29,6 @@ jobs:
           - "PT210+CUDA121DOCKER"
         include:
           # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
-          - environment: PT19+CUDA114DOCKER
-            # 21.10: 1.10.0a0+0aef44c
-            pytorch: "-h"  # we explicitly set pytorch to -h to avoid pip install error
-            base: "nvcr.io/nvidia/pytorch:21.10-py3"
           - environment: PT110+CUDA111
             pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu111"
             base: "nvcr.io/nvidia/cuda:11.1.1-devel-ubuntu18.04"
@@ -47,6 +43,10 @@ jobs:
             # 23.08: 2.1.0a0+29c30b1
             pytorch: "-h"  # we explicitly set pytorch to -h to avoid pip install error
             base: "nvcr.io/nvidia/pytorch:23.08-py3"
+          - environment: PT210+CUDA121DOCKER
+            # 24.03: 2.3.0a0+40ec155e58.nv24.3
+            pytorch: "-h"  # we explicitly set pytorch to -h to avoid pip install error
+            base: "nvcr.io/nvidia/pytorch:24.03-py3"
     container:
       image: ${{ matrix.base }}
       options: --gpus all --env NVIDIA_DISABLE_REQUIRE=true  # workaround for unsatisfied condition: cuda>=11.6
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index b011e65cf1..b7f2cfb9db 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -62,7 +62,7 @@ jobs:
     steps:
     - if: runner.os == 'windows'
       name: Config pagefile (Windows only)
-      uses: al-cheb/configure-pagefile-action@v1.3
+      uses: al-cheb/configure-pagefile-action@v1.4
       with:
         minimum-size: 8GB
         maximum-size: 16GB
diff --git a/Dockerfile b/Dockerfile
index cb1300ea90..d5777104c8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,11 +11,15 @@
 
 # To build with a different base image
 # please run `docker build` using the `--build-arg PYTORCH_IMAGE=...` flag.
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:23.08-py3
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3
 FROM ${PYTORCH_IMAGE}
 
 LABEL maintainer="monai.contact@gmail.com"
 
+# TODO: remark for issue [revise the dockerfile](https://github.com/zarr-developers/numcodecs/issues/431)
+WORKDIR /opt
+RUN git clone --recursive https://github.com/zarr-developers/numcodecs.git && pip wheel numcodecs
+
 WORKDIR /opt/monai
 
 # install full deps
diff --git a/docs/source/losses.rst b/docs/source/losses.rst
index e929e9d605..ba794af3eb 100644
--- a/docs/source/losses.rst
+++ b/docs/source/losses.rst
@@ -73,6 +73,11 @@ Segmentation Losses
 .. autoclass:: ContrastiveLoss
     :members:
 
+`BarlowTwinsLoss`
+~~~~~~~~~~~~~~~~~
+.. autoclass:: BarlowTwinsLoss
+    :members:
+
 `HausdorffDTLoss`
 ~~~~~~~~~~~~~~~~~
 .. autoclass:: HausdorffDTLoss
@@ -134,6 +139,11 @@ Reconstruction Losses
 .. autoclass:: JukeboxLoss
     :members:
 
+`SURELoss`
+~~~~~~~~~~
+.. autoclass:: SURELoss
+    :members:
+
 
 Loss Wrappers
 -------------
diff --git a/docs/source/networks.rst b/docs/source/networks.rst
index 8eada7933f..b59c8af5fc 100644
--- a/docs/source/networks.rst
+++ b/docs/source/networks.rst
@@ -408,6 +408,11 @@ Layers
 .. autoclass:: LLTM
     :members:
 
+`ConjugateGradient`
+~~~~~~~~~~~~~~~~~~~
+.. autoclass:: ConjugateGradient
+    :members:
+
 `Utilities`
 ~~~~~~~~~~~
 .. automodule:: monai.networks.layers.convutils
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index 8990e7991d..8bd5bfd99f 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -309,6 +309,12 @@ Intensity
     :members:
     :special-members: __call__
 
+`ClipIntensityPercentiles`
+""""""""""""""""""""""""""
+.. autoclass:: ClipIntensityPercentiles
+    :members:
+    :special-members: __call__
+
 `RandScaleIntensity`
 """"""""""""""""""""
 .. image:: https://raw.githubusercontent.com/Project-MONAI/DocImages/main/transforms/RandScaleIntensity.png
@@ -661,6 +667,27 @@ Post-processing
     :members:
     :special-members: __call__
 
+Regularization
+^^^^^^^^^^^^^^
+
+`CutMix`
+""""""""
+.. autoclass:: CutMix
+    :members:
+    :special-members: __call__
+
+`CutOut`
+""""""""
+.. autoclass:: CutOut
+    :members:
+    :special-members: __call__
+
+`MixUp`
+"""""""
+.. autoclass:: MixUp
+    :members:
+    :special-members: __call__
+
 Signal
 ^^^^^^^
 
@@ -1384,6 +1411,12 @@ Intensity (Dict)
     :members:
     :special-members: __call__
 
+`ClipIntensityPercentilesd`
+"""""""""""""""""""""""""""
+.. autoclass:: ClipIntensityPercentilesd
+    :members:
+    :special-members: __call__
+
 `RandScaleIntensityd`
 """""""""""""""""""""
 .. image:: https://raw.githubusercontent.com/Project-MONAI/DocImages/main/transforms/RandScaleIntensityd.png
@@ -1707,6 +1740,27 @@ Post-processing (Dict)
   :members:
   :special-members: __call__
 
+Regularization (Dict)
+^^^^^^^^^^^^^^^^^^^^^
+
+`CutMixd`
+"""""""""
+.. autoclass:: CutMixd
+    :members:
+    :special-members: __call__
+
+`CutOutd`
+"""""""""
+.. autoclass:: CutOutd
+    :members:
+    :special-members: __call__
+
+`MixUpd`
+""""""""
+.. autoclass:: MixUpd
+    :members:
+    :special-members: __call__
+
 Signal (Dict)
 ^^^^^^^^^^^^^
 
diff --git a/docs/source/transforms_idx.rst b/docs/source/transforms_idx.rst
index f4d02a483f..650d45db71 100644
--- a/docs/source/transforms_idx.rst
+++ b/docs/source/transforms_idx.rst
@@ -74,6 +74,16 @@ Post-processing
    post.array
    post.dictionary
 
+Regularization
+^^^^^^^^^^^^^^
+
+.. autosummary::
+   :toctree: _gen
+   :nosignatures:
+
+   regularization.array
+   regularization.dictionary
+
 Signal
 ^^^^^^
 
diff --git a/monai/__init__.py b/monai/__init__.py
index 638220f6df..eb05ac993d 100644
--- a/monai/__init__.py
+++ b/monai/__init__.py
@@ -83,6 +83,11 @@
     from .utils.tf32 import detect_default_tf32
 
     detect_default_tf32()
+    import torch
+
+    # workaround related to https://github.com/Project-MONAI/MONAI/issues/7575
+    if hasattr(torch.cuda.device_count, "cache_clear"):
+        torch.cuda.device_count.cache_clear()
 except BaseException:
     from .utils.misc import MONAIEnvVars
 
diff --git a/monai/apps/auto3dseg/hpo_gen.py b/monai/apps/auto3dseg/hpo_gen.py
index 688bf2b916..b755b99feb 100644
--- a/monai/apps/auto3dseg/hpo_gen.py
+++ b/monai/apps/auto3dseg/hpo_gen.py
@@ -193,7 +193,9 @@ def generate(self, output_folder: str = ".") -> None:
         self.obj_filename = os.path.join(write_path, "algo_object.pkl")
 
         if isinstance(self.algo, BundleAlgo):
-            self.algo.export_to_disk(output_folder, task_prefix + task_id, fill_with_datastats=False)
+            self.algo.export_to_disk(
+                output_folder, task_prefix + task_id, bundle_root=write_path, fill_with_datastats=False
+            )
         else:
             ConfigParser.export_config_file(self.params, write_path)
             logger.info(write_path)
diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 44b3c24256..8a10849904 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -22,6 +22,7 @@
 from monai.apps.nnunet.utils import analyze_data, create_new_data_copy, create_new_dataset_json
 from monai.bundle import ConfigParser
 from monai.utils import ensure_tuple, optional_import
+from monai.utils.misc import run_cmd
 
 load_pickle, _ = optional_import("batchgenerators.utilities.file_and_folder_operations", name="load_pickle")
 join, _ = optional_import("batchgenerators.utilities.file_and_folder_operations", name="join")
@@ -495,65 +496,64 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int
             fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4.
             gpu_id: an integer to select the device to use, or a tuple/list of GPU device indices used for multi-GPU
                 training (e.g., (0,1)). Default: 0.
-        from nnunetv2.run.run_training import run_training
             kwargs: this optional parameter allows you to specify additional arguments in
-                ``nnunetv2.run.run_training.run_training``. Currently supported args are
-                    - plans_identifier: custom plans identifier. Default: "nnUNetPlans".
-                    - pretrained_weights: path to nnU-Net checkpoint file to be used as pretrained model. Will only be
-                        used when actually training. Beta. Use with caution. Default: False.
-                    - use_compressed_data: True to use compressed data for training. Reading compressed data is much
-                        more CPU and (potentially) RAM intensive and should only be used if you know what you are
-                        doing. Default: False.
-                    - continue_training: continue training from latest checkpoint. Default: False.
-                    - only_run_validation: True to run the validation only. Requires training to have finished.
-                        Default: False.
-                    - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you
-                        don't want to flood your hard drive with checkpoints. Default: False.
+                ``nnunetv2.run.run_training.run_training_entry``.
+
+                Currently supported args are:
+
+                - p: custom plans identifier. Default: "nnUNetPlans".
+                - pretrained_weights: path to nnU-Net checkpoint file to be used as pretrained model. Will only be
+                    used when actually training. Beta. Use with caution. Default: False.
+                - use_compressed: True to use compressed data for training. Reading compressed data is much
+                    more CPU and (potentially) RAM intensive and should only be used if you know what you are
+                    doing. Default: False.
+                - c: continue training from latest checkpoint. Default: False.
+                - val: True to run the validation only. Requires training to have finished.
+                    Default: False.
+                - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you
+                    don't want to flood your hard drive with checkpoints. Default: False.
         """
         if "num_gpus" in kwargs:
             kwargs.pop("num_gpus")
             logger.warning("please use gpu_id to set the GPUs to use")
 
-        if "trainer_class_name" in kwargs:
-            kwargs.pop("trainer_class_name")
+        if "tr" in kwargs:
+            kwargs.pop("tr")
             logger.warning("please specify the `trainer_class_name` in the __init__ of `nnUNetV2Runner`.")
 
-        if "export_validation_probabilities" in kwargs:
-            kwargs.pop("export_validation_probabilities")
+        if "npz" in kwargs:
+            kwargs.pop("npz")
             logger.warning("please specify the `export_validation_probabilities` in the __init__ of `nnUNetV2Runner`.")
 
+        cmd = self.train_single_model_command(config, fold, gpu_id, kwargs)
+        run_cmd(cmd, shell=True)
+
+    def train_single_model_command(self, config, fold, gpu_id, kwargs):
         if isinstance(gpu_id, (tuple, list)):
             if len(gpu_id) > 1:
                 gpu_ids_str = ""
                 for _i in range(len(gpu_id)):
                     gpu_ids_str += f"{gpu_id[_i]},"
-                os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids_str[:-1]
+                device_setting = f"CUDA_VISIBLE_DEVICES={gpu_ids_str[:-1]}"
             else:
-                os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
-        else:
-            os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
-
-        from nnunetv2.run.run_training import run_training
-
-        if isinstance(gpu_id, int) or len(gpu_id) == 1:
-            run_training(
-                dataset_name_or_id=self.dataset_name_or_id,
-                configuration=config,
-                fold=fold,
-                trainer_class_name=self.trainer_class_name,
-                export_validation_probabilities=self.export_validation_probabilities,
-                **kwargs,
-            )
+                device_setting = f"CUDA_VISIBLE_DEVICES={gpu_id[0]}"
         else:
-            run_training(
-                dataset_name_or_id=self.dataset_name_or_id,
-                configuration=config,
-                fold=fold,
-                num_gpus=len(gpu_id),
-                trainer_class_name=self.trainer_class_name,
-                export_validation_probabilities=self.export_validation_probabilities,
-                **kwargs,
-            )
+            device_setting = f"CUDA_VISIBLE_DEVICES={gpu_id}"
+        num_gpus = 1 if isinstance(gpu_id, int) or len(gpu_id) == 1 else len(gpu_id)
+
+        cmd = (
+            f"{device_setting} nnUNetv2_train "
+            + f"{self.dataset_name_or_id} {config} {fold} "
+            + f"-tr {self.trainer_class_name} -num_gpus {num_gpus}"
+        )
+        if self.export_validation_probabilities:
+            cmd += " --npz"
+        for _key, _value in kwargs.items():
+            if _key == "p" or _key == "pretrained_weights":
+                cmd += f" -{_key} {_value}"
+            else:
+                cmd += f" --{_key} {_value}"
+        return cmd
 
     def train(
         self,
@@ -637,15 +637,7 @@ def train_parallel_cmd(
                 if _config in ensure_tuple(configs):
                     for _i in range(self.num_folds):
                         the_device = gpu_id_for_all[_index % n_devices]  # type: ignore
-                        cmd = (
-                            "python -m monai.apps.nnunet nnUNetV2Runner train_single_model "
-                            + f"--input_config '{self.input_config_or_dict}' --work_dir '{self.work_dir}' "
-                            + f"--config '{_config}' --fold {_i} --gpu_id {the_device} "
-                            + f"--trainer_class_name {self.trainer_class_name} "
-                            + f"--export_validation_probabilities {self.export_validation_probabilities}"
-                        )
-                        for _key, _value in kwargs.items():
-                            cmd += f" --{_key} {_value}"
+                        cmd = self.train_single_model_command(_config, _i, the_device, kwargs)
                         all_cmds[-1][the_device].append(cmd)
                         _index += 1
         return all_cmds
diff --git a/monai/auto3dseg/analyzer.py b/monai/auto3dseg/analyzer.py
index 56419da4cb..37f3faea21 100644
--- a/monai/auto3dseg/analyzer.py
+++ b/monai/auto3dseg/analyzer.py
@@ -460,7 +460,7 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
         torch.set_grad_enabled(False)
 
         ndas: list[MetaTensor] = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]  # type: ignore
-        ndas_label: MetaTensor = d[self.label_key].astype(torch.int8)  # (H,W,D)
+        ndas_label: MetaTensor = d[self.label_key].astype(torch.int16)  # (H,W,D)
 
         if ndas_label.shape != ndas[0].shape:
             raise ValueError(f"Label shape {ndas_label.shape} is different from image shape {ndas[0].shape}")
@@ -472,7 +472,7 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
         if isinstance(ndas_label, (MetaTensor, torch.Tensor)):
             unique_label = unique_label.data.cpu().numpy()
 
-        unique_label = unique_label.astype(np.int8).tolist()
+        unique_label = unique_label.astype(np.int16).tolist()
 
         label_substats = []  # each element is one label
         pixel_sum = 0
diff --git a/monai/bundle/config_item.py b/monai/bundle/config_item.py
index 844d5b30bf..e5122bf3de 100644
--- a/monai/bundle/config_item.py
+++ b/monai/bundle/config_item.py
@@ -289,10 +289,7 @@ def instantiate(self, **kwargs: Any) -> object:
         mode = self.get_config().get("_mode_", CompInitMode.DEFAULT)
         args = self.resolve_args()
         args.update(kwargs)
-        try:
-            return instantiate(modname, mode, **args)
-        except Exception as e:
-            raise RuntimeError(f"Failed to instantiate {self}") from e
+        return instantiate(modname, mode, **args)
 
 
 class ConfigExpression(ConfigItem):
diff --git a/monai/bundle/workflows.py b/monai/bundle/workflows.py
index da3aa30141..d876f6d7ae 100644
--- a/monai/bundle/workflows.py
+++ b/monai/bundle/workflows.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+import json
 import os
 import sys
 import time
@@ -24,6 +25,7 @@
 from monai.bundle.config_parser import ConfigParser
 from monai.bundle.properties import InferProperties, MetaProperties, TrainProperties
 from monai.bundle.utils import DEFAULT_EXP_MGMT_SETTINGS, EXPR_KEY, ID_REF_KEY, ID_SEP_KEY
+from monai.config import PathLike
 from monai.utils import BundleProperty, BundlePropertyConfig, deprecated_arg, deprecated_arg_default, ensure_tuple
 
 __all__ = ["BundleWorkflow", "ConfigWorkflow"]
@@ -46,6 +48,10 @@ class BundleWorkflow(ABC):
             or "infer", "inference", "eval", "evaluation" for a inference workflow,
             other unsupported string will raise a ValueError.
             default to `None` for common workflow.
+        properties_path: the path to the JSON file of properties.
+        meta_file: filepath of the metadata file, if this is a list of file paths, their contents will be merged in order.
+        logging_file: config file for `logging` module in the program. for more details:
+            https://docs.python.org/3/library/logging.config.html#logging.config.fileConfig.
 
     """
 
@@ -59,21 +65,62 @@ class BundleWorkflow(ABC):
         new_name="workflow_type",
         msg_suffix="please use `workflow_type` instead.",
     )
-    def __init__(self, workflow_type: str | None = None, workflow: str | None = None):
+    def __init__(
+        self,
+        workflow_type: str | None = None,
+        workflow: str | None = None,
+        properties_path: PathLike | None = None,
+        meta_file: str | Sequence[str] | None = None,
+        logging_file: str | None = None,
+    ):
+        if logging_file is not None:
+            if not os.path.isfile(logging_file):
+                raise FileNotFoundError(f"Cannot find the logging config file: {logging_file}.")
+            logger.info(f"Setting logging properties based on config: {logging_file}.")
+            fileConfig(logging_file, disable_existing_loggers=False)
+
+        if meta_file is not None:
+            if isinstance(meta_file, str) and not os.path.isfile(meta_file):
+                logger.error(
+                    f"Cannot find the metadata config file: {meta_file}. "
+                    "Please see: https://docs.monai.io/en/stable/mb_specification.html"
+                )
+                meta_file = None
+            if isinstance(meta_file, list):
+                for f in meta_file:
+                    if not os.path.isfile(f):
+                        logger.error(
+                            f"Cannot find the metadata config file: {f}. "
+                            "Please see: https://docs.monai.io/en/stable/mb_specification.html"
+                        )
+                        meta_file = None
+
         workflow_type = workflow if workflow is not None else workflow_type
-        if workflow_type is None:
+        if workflow_type is None and properties_path is None:
             self.properties = copy(MetaProperties)
             self.workflow_type = None
+            self.meta_file = meta_file
+            return
+        if properties_path is not None:
+            properties_path = Path(properties_path)
+            if not properties_path.is_file():
+                raise ValueError(f"Property file {properties_path} does not exist.")
+            with open(properties_path) as json_file:
+                self.properties = json.load(json_file)
+            self.workflow_type = None
+            self.meta_file = meta_file
             return
-        if workflow_type.lower() in self.supported_train_type:
+        if workflow_type.lower() in self.supported_train_type:  # type: ignore[union-attr]
             self.properties = {**TrainProperties, **MetaProperties}
             self.workflow_type = "train"
-        elif workflow_type.lower() in self.supported_infer_type:
+        elif workflow_type.lower() in self.supported_infer_type:  # type: ignore[union-attr]
             self.properties = {**InferProperties, **MetaProperties}
             self.workflow_type = "infer"
         else:
             raise ValueError(f"Unsupported workflow type: '{workflow_type}'.")
 
+        self.meta_file = meta_file
+
     @abstractmethod
     def initialize(self, *args: Any, **kwargs: Any) -> Any:
         """
@@ -142,6 +189,13 @@ def get_workflow_type(self):
         """
         return self.workflow_type
 
+    def get_meta_file(self):
+        """
+        Get the meta file.
+
+        """
+        return self.meta_file
+
     def add_property(self, name: str, required: str, desc: str | None = None) -> None:
         """
         Besides the default predefined properties, some 3rd party applications may need the bundle
@@ -206,6 +260,7 @@ class ConfigWorkflow(BundleWorkflow):
             or "infer", "inference", "eval", "evaluation" for a inference workflow,
             other unsupported string will raise a ValueError.
             default to `None` for common workflow.
+        properties_path: the path to the JSON file of properties.
         override: id-value pairs to override or add the corresponding config content.
             e.g. ``--net#input_chns 42``, ``--net %/data/other.json#net_arg``
 
@@ -230,28 +285,30 @@ def __init__(
         tracking: str | dict | None = None,
         workflow_type: str | None = None,
         workflow: str | None = None,
+        properties_path: PathLike | None = None,
         **override: Any,
     ) -> None:
         workflow_type = workflow if workflow is not None else workflow_type
-        super().__init__(workflow_type=workflow_type)
         if config_file is not None:
             _config_files = ensure_tuple(config_file)
-            self.config_root_path = Path(_config_files[0]).parent
+            config_root_path = Path(_config_files[0]).parent
             for _config_file in _config_files:
                 _config_file = Path(_config_file)
-                if _config_file.parent != self.config_root_path:
+                if _config_file.parent != config_root_path:
                     logger.warn(
-                        f"Not all config files are in {self.config_root_path}. If logging_file and meta_file are"
-                        f"not specified, {self.config_root_path} will be used as the default config root directory."
+                        f"Not all config files are in {config_root_path}. If logging_file and meta_file are"
+                        f"not specified, {config_root_path} will be used as the default config root directory."
                     )
                 if not _config_file.is_file():
                     raise FileNotFoundError(f"Cannot find the config file: {_config_file}.")
         else:
-            self.config_root_path = Path("configs")
-
+            config_root_path = Path("configs")
+        meta_file = str(config_root_path / "metadata.json") if meta_file is None else meta_file
+        super().__init__(workflow_type=workflow_type, meta_file=meta_file, properties_path=properties_path)
+        self.config_root_path = config_root_path
         logging_file = str(self.config_root_path / "logging.conf") if logging_file is None else logging_file
         if logging_file is not None:
-            if not os.path.exists(logging_file):
+            if not os.path.isfile(logging_file):
                 if logging_file == str(self.config_root_path / "logging.conf"):
                     logger.warn(f"Default logging file in {logging_file} does not exist, skipping logging.")
                 else:
@@ -262,14 +319,8 @@ def __init__(
 
         self.parser = ConfigParser()
         self.parser.read_config(f=config_file)
-        meta_file = str(self.config_root_path / "metadata.json") if meta_file is None else meta_file
-        if isinstance(meta_file, str) and not os.path.exists(meta_file):
-            logger.error(
-                f"Cannot find the metadata config file: {meta_file}. "
-                "Please see: https://docs.monai.io/en/stable/mb_specification.html"
-            )
-        else:
-            self.parser.read_meta(f=meta_file)
+        if self.meta_file is not None:
+            self.parser.read_meta(f=self.meta_file)
 
         # the rest key-values in the _args are to override config content
         self.parser.update(pairs=override)
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index 531893d768..79e066303e 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -427,7 +427,7 @@ def _transform(self, index: int):
 
 class CacheNTransDataset(PersistentDataset):
     """
-    Extension of `PersistentDataset`, tt can also cache the result of first N transforms, no matter it's random or not.
+    Extension of `PersistentDataset`, it can also cache the result of first N transforms, no matter it's random or not.
 
     """
 
diff --git a/monai/losses/__init__.py b/monai/losses/__init__.py
index 92898c81ca..e937b53fa4 100644
--- a/monai/losses/__init__.py
+++ b/monai/losses/__init__.py
@@ -12,6 +12,7 @@
 from __future__ import annotations
 
 from .adversarial_loss import PatchAdversarialLoss
+from .barlow_twins import BarlowTwinsLoss
 from .cldice import SoftclDiceLoss, SoftDiceclDiceLoss
 from .contrastive import ContrastiveLoss
 from .deform import BendingEnergyLoss, DiffusionLoss
@@ -40,5 +41,6 @@
 from .spatial_mask import MaskedLoss
 from .spectral_loss import JukeboxLoss
 from .ssim_loss import SSIMLoss
+from .sure_loss import SURELoss
 from .tversky import TverskyLoss
 from .unified_focal_loss import AsymmetricUnifiedFocalLoss
diff --git a/monai/losses/barlow_twins.py b/monai/losses/barlow_twins.py
new file mode 100644
index 0000000000..a61acca66e
--- /dev/null
+++ b/monai/losses/barlow_twins.py
@@ -0,0 +1,84 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import torch
+from torch.nn.modules.loss import _Loss
+
+
+class BarlowTwinsLoss(_Loss):
+    """
+    The Barlow Twins cost function takes the representations extracted by a neural network from two
+    distorted views and seeks to make the cross-correlation matrix of the two representations tend
+    towards identity. This encourages the neural network to learn similar representations with the least
+    amount of redundancy. This cost function can be used in particular in multimodal learning to work on
+    representations from two modalities. The most common use case is for unsupervised learning, where data
+    augmentations are used to generate 2 distorted views of the same sample to force the encoder to
+    extract useful features for downstream tasks.
+
+    Zbontar, Jure, et al. "Barlow Twins: Self-Supervised Learning via Redundancy Reduction" International
+    conference on machine learning. PMLR, 2020. (http://proceedings.mlr.press/v139/zbontar21a/zbontar21a.pdf)
+
+    Adapted from:
+        https://github.com/facebookresearch/barlowtwins
+
+    """
+
+    def __init__(self, lambd: float = 5e-3) -> None:
+        """
+        Args:
+            lamb: Can be any float to handle the informativeness and invariance trade-off. Ideally set to 5e-3.
+
+        Raises:
+            ValueError: When an input of dimension length > 2 is passed
+            ValueError: When input and target are of different shapes
+            ValueError: When batch size is less than or equal to 1
+
+        """
+        super().__init__()
+        self.lambd = lambd
+
+    def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            input: the shape should be B[F].
+            target: the shape should be B[F].
+        """
+        if len(target.shape) > 2 or len(input.shape) > 2:
+            raise ValueError(
+                f"Either target or input has dimensions greater than 2 where target "
+                f"shape is ({target.shape}) and input shape is ({input.shape})"
+            )
+
+        if target.shape != input.shape:
+            raise ValueError(f"ground truth has differing shape ({target.shape}) from input ({input.shape})")
+
+        if target.size(0) <= 1:
+            raise ValueError(
+                f"Batch size must be greater than 1 to compute Barlow Twins Loss, but got {target.size(0)}"
+            )
+
+        lambd_tensor = torch.as_tensor(self.lambd).to(input.device)
+        batch_size = input.shape[0]
+
+        # normalize input and target
+        input_norm = (input - input.mean(0)) / input.std(0).add(1e-6)
+        target_norm = (target - target.mean(0)) / target.std(0).add(1e-6)
+
+        # cross-correlation matrix
+        c = torch.mm(input_norm.t(), target_norm) / batch_size  # input_norm.t() is FxB, target_norm is BxF so c is FxF
+
+        # loss
+        c_diff = (c - torch.eye(c.size(0), device=c.device)).pow_(2)  # FxF
+        c_diff[~torch.eye(c.size(0), device=c.device).bool()] *= lambd_tensor
+
+        return c_diff.sum()
diff --git a/monai/losses/dice.py b/monai/losses/dice.py
index b3c0f57c6e..f1c357d31f 100644
--- a/monai/losses/dice.py
+++ b/monai/losses/dice.py
@@ -778,12 +778,22 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
 
         Raises:
             ValueError: When number of dimensions for input and target are different.
-            ValueError: When number of channels for target is neither 1 nor the same as input.
+            ValueError: When number of channels for target is neither 1 (without one-hot encoding) nor the same as input.
+
+        Returns:
+            torch.Tensor: value of the loss.
 
         """
-        if len(input.shape) != len(target.shape):
+        if input.dim() != target.dim():
             raise ValueError(
                 "the number of dimensions for input and target should be the same, "
+                f"got shape {input.shape} (nb dims: {len(input.shape)}) and {target.shape} (nb dims: {len(target.shape)}). "
+                "if target is not one-hot encoded, please provide a tensor with shape B1H[WD]."
+            )
+
+        if target.shape[1] != 1 and target.shape[1] != input.shape[1]:
+            raise ValueError(
+                "number of channels for target is neither 1 (without one-hot encoding) nor the same as input, "
                 f"got shape {input.shape} and {target.shape}."
             )
 
@@ -899,14 +909,24 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
 
         Raises:
             ValueError: When number of dimensions for input and target are different.
-            ValueError: When number of channels for target is neither 1 nor the same as input.
+            ValueError: When number of channels for target is neither 1 (without one-hot encoding) nor the same as input.
 
+        Returns:
+            torch.Tensor: value of the loss.
         """
-        if len(input.shape) != len(target.shape):
+        if input.dim() != target.dim():
             raise ValueError(
                 "the number of dimensions for input and target should be the same, "
+                f"got shape {input.shape} (nb dims: {len(input.shape)}) and {target.shape} (nb dims: {len(target.shape)}). "
+                "if target is not one-hot encoded, please provide a tensor with shape B1H[WD]."
+            )
+
+        if target.shape[1] != 1 and target.shape[1] != input.shape[1]:
+            raise ValueError(
+                "number of channels for target is neither 1 (without one-hot encoding) nor the same as input, "
                 f"got shape {input.shape} and {target.shape}."
             )
+
         if self.to_onehot_y:
             n_pred_ch = input.shape[1]
             if n_pred_ch == 1:
@@ -1015,15 +1035,23 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             target (torch.Tensor): the shape should be BNH[WD] or B1H[WD].
 
         Raises:
-            ValueError: When the input and target tensors have different numbers of dimensions, or the target
-                channel isn't either one-hot encoded or categorical with the same shape of the input.
+            ValueError: When number of dimensions for input and target are different.
+            ValueError: When number of channels for target is neither 1 (without one-hot encoding) nor the same as input.
 
         Returns:
             torch.Tensor: value of the loss.
         """
         if input.dim() != target.dim():
             raise ValueError(
-                f"Input - {input.shape} - and target - {target.shape} - must have the same number of dimensions."
+                "the number of dimensions for input and target should be the same, "
+                f"got shape {input.shape} (nb dims: {len(input.shape)}) and {target.shape} (nb dims: {len(target.shape)}). "
+                "if target is not one-hot encoded, please provide a tensor with shape B1H[WD]."
+            )
+
+        if target.shape[1] != 1 and target.shape[1] != input.shape[1]:
+            raise ValueError(
+                "number of channels for target is neither 1 (without one-hot encoding) nor the same as input, "
+                f"got shape {input.shape} and {target.shape}."
             )
 
         gdl_loss = self.generalized_dice(input, target)
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
index 98c1a071b6..28d1c0cdc9 100644
--- a/monai/losses/focal_loss.py
+++ b/monai/losses/focal_loss.py
@@ -234,9 +234,8 @@ def sigmoid_focal_loss(
     """
     # computing binary cross entropy with logits
     # equivalent to F.binary_cross_entropy_with_logits(input, target, reduction='none')
-    # see also https://github.com/pytorch/pytorch/blob/v1.9.0/aten/src/ATen/native/Loss.cpp#L231
-    max_val = (-input).clamp(min=0)
-    loss: torch.Tensor = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log()
+    # see also https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/Loss.cpp#L363
+    loss: torch.Tensor = input - input * target - F.logsigmoid(input)
 
     # sigmoid(-i) if t==1; sigmoid(i) if t==0 <=>
     # 1-sigmoid(i) if t==1; sigmoid(i) if t==0 <=>
diff --git a/monai/losses/perceptual.py b/monai/losses/perceptual.py
index 2207de5e64..fd61603b03 100644
--- a/monai/losses/perceptual.py
+++ b/monai/losses/perceptual.py
@@ -29,7 +29,7 @@ class PercetualNetworkType(StrEnum):
     squeeze = "squeeze"
     radimagenet_resnet50 = "radimagenet_resnet50"
     medicalnet_resnet10_23datasets = "medicalnet_resnet10_23datasets"
-    medical_resnet50_23datasets = "medical_resnet50_23datasets"
+    medicalnet_resnet50_23datasets = "medicalnet_resnet50_23datasets"
     resnet50 = "resnet50"
 
 
diff --git a/monai/losses/sure_loss.py b/monai/losses/sure_loss.py
new file mode 100644
index 0000000000..ebf25613a6
--- /dev/null
+++ b/monai/losses/sure_loss.py
@@ -0,0 +1,200 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Callable, Optional
+
+import torch
+import torch.nn as nn
+from torch.nn.modules.loss import _Loss
+
+
+def complex_diff_abs_loss(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """
+    First compute the difference in the complex domain,
+    then get the absolute value and take the mse
+
+    Args:
+        x, y - B, 2, H, W real valued tensors representing complex numbers
+                or  B,1,H,W complex valued tensors
+    Returns:
+        l2_loss - scalar
+    """
+    if not x.is_complex():
+        x = torch.view_as_complex(x.permute(0, 2, 3, 1).contiguous())
+    if not y.is_complex():
+        y = torch.view_as_complex(y.permute(0, 2, 3, 1).contiguous())
+
+    diff = torch.abs(x - y)
+    return nn.functional.mse_loss(diff, torch.zeros_like(diff), reduction="mean")
+
+
+def sure_loss_function(
+    operator: Callable,
+    x: torch.Tensor,
+    y_pseudo_gt: torch.Tensor,
+    y_ref: Optional[torch.Tensor] = None,
+    eps: Optional[float] = -1.0,
+    perturb_noise: Optional[torch.Tensor] = None,
+    complex_input: Optional[bool] = False,
+) -> torch.Tensor:
+    """
+    Args:
+        operator (function): The operator function that takes in an input
+        tensor x and returns an output tensor y. We will use this to compute
+        the divergence. More specifically, we will perturb the input x by a
+        small amount and compute the divergence between the perturbed output
+        and the reference output
+
+        x (torch.Tensor): The input tensor of shape (B, C, H, W) to the
+        operator.  For complex input, the shape is (B, 2, H, W) aka C=2 real.
+        For real input, the shape is (B, 1, H, W) real.
+
+        y_pseudo_gt (torch.Tensor): The pseudo ground truth tensor of shape
+        (B, C, H, W) used to compute the L2 loss.  For complex input, the shape is
+        (B, 2, H, W) aka C=2 real.  For real input, the shape is (B, 1, H, W)
+        real.
+
+        y_ref (torch.Tensor, optional): The reference output tensor of shape
+        (B, C, H, W) used to compute the divergence. Defaults to None.  For
+        complex input, the shape is (B, 2, H, W) aka C=2 real.  For real input,
+        the shape is (B, 1, H, W) real.
+
+        eps (float, optional): The perturbation scalar. Set to -1 to set it
+        automatically estimated based on y_pseudo_gtk
+
+        perturb_noise (torch.Tensor, optional): The noise vector of shape (B, C, H, W).
+        Defaults to None.  For complex input, the shape is (B, 2, H, W) aka C=2 real.
+        For real input, the shape is (B, 1, H, W) real.
+
+        complex_input(bool, optional): Whether the input is complex or not.
+        Defaults to False.
+
+    Returns:
+        sure_loss (torch.Tensor): The SURE loss scalar.
+    """
+    # perturb input
+    if perturb_noise is None:
+        perturb_noise = torch.randn_like(x)
+    if eps == -1.0:
+        eps = float(torch.abs(y_pseudo_gt.max())) / 1000
+    # get y_ref if not provided
+    if y_ref is None:
+        y_ref = operator(x)
+
+    # get perturbed output
+    x_perturbed = x + eps * perturb_noise
+    y_perturbed = operator(x_perturbed)
+    # divergence
+    divergence = torch.sum(1.0 / eps * torch.matmul(perturb_noise.permute(0, 1, 3, 2), y_perturbed - y_ref))  # type: ignore
+    # l2 loss between y_ref, y_pseudo_gt
+    if complex_input:
+        l2_loss = complex_diff_abs_loss(y_ref, y_pseudo_gt)
+    else:
+        # real input
+        l2_loss = nn.functional.mse_loss(y_ref, y_pseudo_gt, reduction="mean")
+
+    # sure loss
+    sure_loss = l2_loss * divergence / (x.shape[0] * x.shape[2] * x.shape[3])
+    return sure_loss
+
+
+class SURELoss(_Loss):
+    """
+    Calculate the Stein's Unbiased Risk Estimator (SURE) loss for a given operator.
+
+    This is a differentiable loss function that can be used to train/guide an
+    operator (e.g. neural network), where the pseudo ground truth is available
+    but the reference ground truth is not. For example, in the MRI
+    reconstruction, the pseudo ground truth is the zero-filled reconstruction
+    and the reference ground truth is the fully sampled reconstruction.  Often,
+    the reference ground truth is not available due to the lack of fully sampled
+    data.
+
+    The original SURE loss is proposed in [1]. The SURE loss used for guiding
+    the diffusion model based MRI reconstruction is proposed in [2].
+
+    Reference
+
+    [1] Stein, C.M.: Estimation of the mean of a multivariate normal distribution. Annals of Statistics
+
+    [2] B. Ozturkler et al. SMRD: SURE-based Robust MRI Reconstruction with Diffusion Models.
+    (https://arxiv.org/pdf/2310.01799.pdf)
+    """
+
+    def __init__(self, perturb_noise: Optional[torch.Tensor] = None, eps: Optional[float] = None) -> None:
+        """
+        Args:
+            perturb_noise (torch.Tensor, optional): The noise vector of shape
+            (B, C, H, W). Defaults to None.  For complex input, the shape is (B, 2, H, W) aka C=2 real.
+            For real input, the shape is (B, 1, H, W) real.
+
+            eps (float, optional): The perturbation scalar. Defaults to None.
+        """
+        super().__init__()
+        self.perturb_noise = perturb_noise
+        self.eps = eps
+
+    def forward(
+        self,
+        operator: Callable,
+        x: torch.Tensor,
+        y_pseudo_gt: torch.Tensor,
+        y_ref: Optional[torch.Tensor] = None,
+        complex_input: Optional[bool] = False,
+    ) -> torch.Tensor:
+        """
+        Args:
+            operator (function): The operator function that takes in an input
+            tensor x and returns an output tensor y. We will use this to compute
+            the divergence. More specifically, we will perturb the input x by a
+            small amount and compute the divergence between the perturbed output
+            and the reference output
+
+            x (torch.Tensor): The input tensor of shape (B, C, H, W) to the
+            operator. C=1 or 2: For complex input, the shape is (B, 2, H, W) aka
+            C=2 real.  For real input, the shape is (B, 1, H, W) real.
+
+            y_pseudo_gt (torch.Tensor): The pseudo ground truth tensor of shape
+            (B, C, H, W) used to compute the L2 loss. C=1 or 2: For complex
+            input, the shape is (B, 2, H, W) aka C=2 real.  For real input, the
+            shape is (B, 1, H, W) real.
+
+            y_ref (torch.Tensor, optional): The reference output tensor of the
+            same shape as y_pseudo_gt
+
+        Returns:
+            sure_loss (torch.Tensor): The SURE loss scalar.
+        """
+
+        # check inputs shapes
+        if x.dim() != 4:
+            raise ValueError(f"Input tensor x should be 4D, got {x.dim()}.")
+        if y_pseudo_gt.dim() != 4:
+            raise ValueError(f"Input tensor y_pseudo_gt should be 4D, but got {y_pseudo_gt.dim()}.")
+        if y_ref is not None and y_ref.dim() != 4:
+            raise ValueError(f"Input tensor y_ref should be 4D, but got {y_ref.dim()}.")
+        if x.shape != y_pseudo_gt.shape:
+            raise ValueError(
+                f"Input tensor x and y_pseudo_gt should have the same shape, but got x shape {x.shape}, "
+                f"y_pseudo_gt shape {y_pseudo_gt.shape}."
+            )
+        if y_ref is not None and y_pseudo_gt.shape != y_ref.shape:
+            raise ValueError(
+                f"Input tensor y_pseudo_gt and y_ref should have the same shape, but got y_pseudo_gt shape {y_pseudo_gt.shape}, "
+                f"y_ref shape {y_ref.shape}."
+            )
+
+        # compute loss
+        loss = sure_loss_function(operator, x, y_pseudo_gt, y_ref, self.eps, self.perturb_noise, complex_input)
+
+        return loss
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index a6dc1a49a2..249b2dc951 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -37,6 +37,9 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any:
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
+    def __str__(self):
+        return self.__class__.__name__
+
 
 class IterationMetric(Metric):
     """
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index 9d29654ee3..4c8b8aa71b 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -303,7 +303,7 @@ def _compute_metric(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor
 
         if self.spatial_dims == 3 and dims != 5:
             raise ValueError(
-                f"y_pred should have 4 dimensions (batch, channel, height, width, depth) when using {self.spatial_dims}"
+                f"y_pred should have 5 dimensions (batch, channel, height, width, depth) when using {self.spatial_dims}"
                 f" spatial dimensions, got {dims}."
             )
 
diff --git a/monai/networks/blocks/patchembedding.py b/monai/networks/blocks/patchembedding.py
index 7d56045814..91bd73ebbb 100644
--- a/monai/networks/blocks/patchembedding.py
+++ b/monai/networks/blocks/patchembedding.py
@@ -120,9 +120,7 @@ def __init__(
             for in_size, pa_size in zip(img_size, patch_size):
                 grid_size.append(in_size // pa_size)
 
-            with torch.no_grad():
-                pos_embeddings = build_sincos_position_embedding(grid_size, hidden_size, spatial_dims)
-                self.position_embeddings.data.copy_(pos_embeddings.float())
+            self.position_embeddings = build_sincos_position_embedding(grid_size, hidden_size, spatial_dims)
         else:
             raise ValueError(f"pos_embed_type {self.pos_embed_type} not supported.")
 
diff --git a/monai/networks/blocks/pos_embed_utils.py b/monai/networks/blocks/pos_embed_utils.py
index e03553307e..21586e56da 100644
--- a/monai/networks/blocks/pos_embed_utils.py
+++ b/monai/networks/blocks/pos_embed_utils.py
@@ -46,7 +46,7 @@ def build_sincos_position_embedding(
         temperature (float): The temperature for the sin-cos position embedding.
 
     Returns:
-        pos_embed (nn.Parameter): The sin-cos position embedding as a learnable parameter.
+        pos_embed (nn.Parameter): The sin-cos position embedding as a fixed parameter.
     """
 
     if spatial_dims == 2:
diff --git a/monai/networks/layers/__init__.py b/monai/networks/layers/__init__.py
index d61ed57f7f..3a6e4aa554 100644
--- a/monai/networks/layers/__init__.py
+++ b/monai/networks/layers/__init__.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+from .conjugate_gradient import ConjugateGradient
 from .convutils import calculate_out_shape, gaussian_1d, polyval, same_padding, stride_minus_kernel_padding
 from .drop_path import DropPath
 from .factories import Act, Conv, Dropout, LayerFactory, Norm, Pad, Pool, split_args
diff --git a/monai/networks/layers/conjugate_gradient.py b/monai/networks/layers/conjugate_gradient.py
new file mode 100644
index 0000000000..93a45930d7
--- /dev/null
+++ b/monai/networks/layers/conjugate_gradient.py
@@ -0,0 +1,112 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Callable
+
+import torch
+from torch import nn
+
+
+def _zdot(x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
+    """
+    Complex dot product between tensors x1 and x2: sum(x1.*x2)
+    """
+    if torch.is_complex(x1):
+        assert torch.is_complex(x2), "x1 and x2 must both be complex"
+        return torch.sum(x1.conj() * x2)
+    else:
+        return torch.sum(x1 * x2)
+
+
+def _zdot_single(x: torch.Tensor) -> torch.Tensor:
+    """
+    Complex dot product between tensor x and itself
+    """
+    res = _zdot(x, x)
+    if torch.is_complex(res):
+        return res.real
+    else:
+        return res
+
+
+class ConjugateGradient(nn.Module):
+    """
+    Congugate Gradient (CG) solver for linear systems Ax = y.
+
+    For linear_op that is positive definite and self-adjoint, CG is
+    guaranteed to converge CG is often used to solve linear systems of the form
+    Ax = y, where A is too large to store explicitly, but can be computed via a
+    linear operator.
+
+    As a result, here we won't set A explicitly as a matrix, but rather as a
+    linear operator. For example, A could be a FFT/IFFT operation
+    """
+
+    def __init__(self, linear_op: Callable, num_iter: int):
+        """
+        Args:
+            linear_op: Linear operator
+            num_iter: Number of iterations to run CG
+        """
+        super().__init__()
+
+        self.linear_op = linear_op
+        self.num_iter = num_iter
+
+    def update(
+        self, x: torch.Tensor, p: torch.Tensor, r: torch.Tensor, rsold: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        perform one iteration of the CG method. It takes the current solution x,
+        the current search direction p, the current residual r, and the old
+        residual norm rsold as inputs. Then it computes the new solution, search
+        direction, residual, and residual norm, and returns them.
+        """
+
+        dy = self.linear_op(p)
+        p_dot_dy = _zdot(p, dy)
+        alpha = rsold / p_dot_dy
+        x = x + alpha * p
+        r = r - alpha * dy
+        rsnew = _zdot_single(r)
+        beta = rsnew / rsold
+        rsold = rsnew
+        p = beta * p + r
+        return x, p, r, rsold
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """
+        run conjugate gradient for num_iter iterations to solve Ax = y
+
+        Args:
+            x: tensor (real or complex); Initial guess for linear system Ax = y.
+            The size of x should be applicable to the linear operator. For
+            example, if the linear operator is FFT, then x is HCHW; if the
+            linear operator is a matrix multiplication, then x is a vector
+
+            y: tensor (real or complex); Measurement. Same size as x
+
+        Returns:
+            x: Solution to Ax = y
+        """
+        # Compute residual
+        r = y - self.linear_op(x)
+        rsold = _zdot_single(r)
+        p = r
+
+        # Update
+        for _i in range(self.num_iter):
+            x, p, r, rsold = self.update(x, p, r, rsold)
+            if rsold < 1e-10:
+                break
+        return x
diff --git a/monai/networks/utils.py b/monai/networks/utils.py
index 42e537648a..4e6699f16b 100644
--- a/monai/networks/utils.py
+++ b/monai/networks/utils.py
@@ -850,7 +850,10 @@ def _onnx_trt_compile(
 
     # wrap the serialized TensorRT engine back to a TorchScript module.
     trt_model = torch_tensorrt.ts.embed_engine_in_new_module(
-        f.getvalue(), torch.device(f"cuda:{device}"), input_names, output_names
+        f.getvalue(),
+        device=torch.device(f"cuda:{device}"),
+        input_binding_names=input_names,
+        output_binding_names=output_names,
     )
     return trt_model
 
diff --git a/monai/transforms/__init__.py b/monai/transforms/__init__.py
index 2aa8fbf8a1..ab9adb6a99 100644
--- a/monai/transforms/__init__.py
+++ b/monai/transforms/__init__.py
@@ -92,6 +92,7 @@
 from .croppad.functional import crop_func, crop_or_pad_nd, pad_func, pad_nd
 from .intensity.array import (
     AdjustContrast,
+    ClipIntensityPercentiles,
     ComputeHoVerMaps,
     DetectEnvelope,
     ForegroundMask,
@@ -135,6 +136,9 @@
     AdjustContrastd,
     AdjustContrastD,
     AdjustContrastDict,
+    ClipIntensityPercentilesd,
+    ClipIntensityPercentilesD,
+    ClipIntensityPercentilesDict,
     ComputeHoVerMapsd,
     ComputeHoVerMapsD,
     ComputeHoVerMapsDict,
@@ -336,6 +340,18 @@
     VoteEnsembled,
     VoteEnsembleDict,
 )
+from .regularization.array import CutMix, CutOut, MixUp
+from .regularization.dictionary import (
+    CutMixd,
+    CutMixD,
+    CutMixDict,
+    CutOutd,
+    CutOutD,
+    CutOutDict,
+    MixUpd,
+    MixUpD,
+    MixUpDict,
+)
 from .signal.array import (
     SignalContinuousWavelet,
     SignalFillEmpty,
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
index f9667402c9..f656475a36 100644
--- a/monai/transforms/intensity/array.py
+++ b/monai/transforms/intensity/array.py
@@ -30,7 +30,7 @@
 from monai.data.utils import get_random_patch, get_valid_patch_size
 from monai.networks.layers import GaussianFilter, HilbertTransform, MedianFilter, SavitzkyGolayFilter
 from monai.transforms.transform import RandomizableTransform, Transform
-from monai.transforms.utils import Fourier, equalize_hist, is_positive, rescale_array
+from monai.transforms.utils import Fourier, equalize_hist, is_positive, rescale_array, soft_clip
 from monai.transforms.utils_pytorch_numpy_unification import clip, percentile, where
 from monai.utils.enums import TransformBackends
 from monai.utils.misc import ensure_tuple, ensure_tuple_rep, ensure_tuple_size, fall_back_tuple
@@ -54,6 +54,7 @@
     "NormalizeIntensity",
     "ThresholdIntensity",
     "ScaleIntensityRange",
+    "ClipIntensityPercentiles",
     "AdjustContrast",
     "RandAdjustContrast",
     "ScaleIntensityRangePercentiles",
@@ -91,24 +92,33 @@ class RandGaussianNoise(RandomizableTransform):
         mean: Mean or “centre” of the distribution.
         std: Standard deviation (spread) of distribution.
         dtype: output data type, if None, same as input image. defaults to float32.
+        sample_std: If True, sample the spread of the Gaussian distribution uniformly from 0 to std.
 
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __init__(self, prob: float = 0.1, mean: float = 0.0, std: float = 0.1, dtype: DtypeLike = np.float32) -> None:
+    def __init__(
+        self,
+        prob: float = 0.1,
+        mean: float = 0.0,
+        std: float = 0.1,
+        dtype: DtypeLike = np.float32,
+        sample_std: bool = True,
+    ) -> None:
         RandomizableTransform.__init__(self, prob)
         self.mean = mean
         self.std = std
         self.dtype = dtype
         self.noise: np.ndarray | None = None
+        self.sample_std = sample_std
 
     def randomize(self, img: NdarrayOrTensor, mean: float | None = None) -> None:
         super().randomize(None)
         if not self._do_transform:
             return None
-        rand_std = self.R.uniform(0, self.std)
-        noise = self.R.normal(self.mean if mean is None else mean, rand_std, size=img.shape)
+        std = self.R.uniform(0, self.std) if self.sample_std else self.std
+        noise = self.R.normal(self.mean if mean is None else mean, std, size=img.shape)
         # noise is float64 array, convert to the output dtype to save memory
         self.noise, *_ = convert_data_type(noise, dtype=self.dtype)
 
@@ -998,6 +1008,151 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         return ret
 
 
+class ClipIntensityPercentiles(Transform):
+    """
+    Apply clip based on the intensity distribution of input image.
+    If `sharpness_factor` is provided, the intensity values will be soft clipped according to
+    f(x) = x + (1/sharpness_factor)*softplus(- c(x - minv)) - (1/sharpness_factor)*softplus(c(x - maxv))
+    From https://medium.com/life-at-hopper/clip-it-clip-it-good-1f1bf711b291
+
+    Soft clipping preserves the order of the values and maintains the gradient everywhere.
+    For example:
+
+    .. code-block:: python
+        :emphasize-lines: 11, 22
+
+        image = torch.Tensor(
+            [[[1, 2, 3, 4, 5],
+              [1, 2, 3, 4, 5],
+              [1, 2, 3, 4, 5],
+              [1, 2, 3, 4, 5],
+              [1, 2, 3, 4, 5],
+              [1, 2, 3, 4, 5]]])
+
+        # Hard clipping from lower and upper image intensity percentiles
+        hard_clipper = ClipIntensityPercentiles(30, 70)
+        print(hard_clipper(image))
+        metatensor([[[2., 2., 3., 4., 4.],
+                [2., 2., 3., 4., 4.],
+                [2., 2., 3., 4., 4.],
+                [2., 2., 3., 4., 4.],
+                [2., 2., 3., 4., 4.],
+                [2., 2., 3., 4., 4.]]])
+
+
+        # Soft clipping from lower and upper image intensity percentiles
+        soft_clipper = ClipIntensityPercentiles(30, 70, 10.)
+        print(soft_clipper(image))
+        metatensor([[[2.0000, 2.0693, 3.0000, 3.9307, 4.0000],
+         [2.0000, 2.0693, 3.0000, 3.9307, 4.0000],
+         [2.0000, 2.0693, 3.0000, 3.9307, 4.0000],
+         [2.0000, 2.0693, 3.0000, 3.9307, 4.0000],
+         [2.0000, 2.0693, 3.0000, 3.9307, 4.0000],
+         [2.0000, 2.0693, 3.0000, 3.9307, 4.0000]]])
+
+    See Also:
+
+        - :py:class:`monai.transforms.ScaleIntensityRangePercentiles`
+    """
+
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    def __init__(
+        self,
+        lower: float | None,
+        upper: float | None,
+        sharpness_factor: float | None = None,
+        channel_wise: bool = False,
+        return_clipping_values: bool = False,
+        dtype: DtypeLike = np.float32,
+    ) -> None:
+        """
+        Args:
+            lower: lower intensity percentile. In the case of hard clipping, None will have the same effect as 0 by
+                not clipping the lowest input values. However, in the case of soft clipping, None and zero will have
+                two different effects: None will not apply clipping to low values, whereas zero will still transform
+                the lower values according to the soft clipping transformation. Please check for more details:
+                https://medium.com/life-at-hopper/clip-it-clip-it-good-1f1bf711b291.
+            upper: upper intensity percentile.  The same as for lower, but this time with the highest values. If we
+                are looking to perform soft clipping, if None then there will be no effect on this side whereas if set
+                to 100, the values will be passed via the corresponding clipping equation.
+            sharpness_factor: if not None, the intensity values will be soft clipped according to
+                f(x) = x + (1/sharpness_factor)*softplus(- c(x - minv)) - (1/sharpness_factor)*softplus(c(x - maxv)).
+                defaults to None.
+            channel_wise: if True, compute intensity percentile and normalize every channel separately.
+                default to False.
+            return_clipping_values: whether to return the calculated percentiles in tensor meta information.
+                If soft clipping and requested percentile is None, return None as the corresponding clipping
+                values in meta information. Clipping values are stored in a list with each element corresponding
+                to a channel if channel_wise is set to True. defaults to False.
+            dtype: output data type, if None, same as input image. defaults to float32.
+        """
+        if lower is None and upper is None:
+            raise ValueError("lower or upper percentiles must be provided")
+        if lower is not None and (lower < 0.0 or lower > 100.0):
+            raise ValueError("Percentiles must be in the range [0, 100]")
+        if upper is not None and (upper < 0.0 or upper > 100.0):
+            raise ValueError("Percentiles must be in the range [0, 100]")
+        if upper is not None and lower is not None and upper < lower:
+            raise ValueError("upper must be greater than or equal to lower")
+        if sharpness_factor is not None and sharpness_factor <= 0:
+            raise ValueError("sharpness_factor must be greater than 0")
+
+        self.lower = lower
+        self.upper = upper
+        self.sharpness_factor = sharpness_factor
+        self.channel_wise = channel_wise
+        if return_clipping_values:
+            self.clipping_values: list[tuple[float | None, float | None]] = []
+        self.return_clipping_values = return_clipping_values
+        self.dtype = dtype
+
+    def _clip(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        if self.sharpness_factor is not None:
+            lower_percentile = percentile(img, self.lower) if self.lower is not None else None
+            upper_percentile = percentile(img, self.upper) if self.upper is not None else None
+            img = soft_clip(img, self.sharpness_factor, lower_percentile, upper_percentile, self.dtype)
+        else:
+            lower_percentile = percentile(img, self.lower) if self.lower is not None else percentile(img, 0)
+            upper_percentile = percentile(img, self.upper) if self.upper is not None else percentile(img, 100)
+            img = clip(img, lower_percentile, upper_percentile)
+
+        if self.return_clipping_values:
+            self.clipping_values.append(
+                (
+                    (
+                        lower_percentile
+                        if lower_percentile is None
+                        else lower_percentile.item() if hasattr(lower_percentile, "item") else lower_percentile
+                    ),
+                    (
+                        upper_percentile
+                        if upper_percentile is None
+                        else upper_percentile.item() if hasattr(upper_percentile, "item") else upper_percentile
+                    ),
+                )
+            )
+        img = convert_to_tensor(img, track_meta=False)
+        return img
+
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        """
+        Apply the transform to `img`.
+        """
+        img = convert_to_tensor(img, track_meta=get_track_meta())
+        img_t = convert_to_tensor(img, track_meta=False)
+        if self.channel_wise:
+            img_t = torch.stack([self._clip(img=d) for d in img_t])  # type: ignore
+        else:
+            img_t = self._clip(img=img_t)
+
+        img = convert_to_dst_type(img_t, dst=img)[0]
+        if self.return_clipping_values:
+            img.meta["clipping_values"] = self.clipping_values  # type: ignore
+
+        return img
+
+
 class AdjustContrast(Transform):
     """
     Changes image intensity with gamma transform. Each pixel/voxel intensity is updated as::
@@ -1831,15 +1986,19 @@ class RandGibbsNoise(RandomizableTransform):
 
     Args:
         prob (float): probability of applying the transform.
-        alpha (Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
+        alpha (float, Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
             If a length-2 list is given as [a,b] then the value of alpha will be
             sampled uniformly from the interval [a,b]. 0 <= a <= b <= 1.
+            If a float is given, then the value of alpha will be sampled uniformly from the interval [0, alpha].
     """
 
     backend = GibbsNoise.backend
 
-    def __init__(self, prob: float = 0.1, alpha: Sequence[float] = (0.0, 1.0)) -> None:
+    def __init__(self, prob: float = 0.1, alpha: float | Sequence[float] = (0.0, 1.0)) -> None:
+        if isinstance(alpha, float):
+            alpha = (0, alpha)
+        alpha = ensure_tuple(alpha)
         if len(alpha) != 2:
             raise ValueError("alpha length must be 2.")
         if alpha[1] > 1 or alpha[0] < 0:
diff --git a/monai/transforms/intensity/dictionary.py b/monai/transforms/intensity/dictionary.py
index 058ef87b95..5dbac485fe 100644
--- a/monai/transforms/intensity/dictionary.py
+++ b/monai/transforms/intensity/dictionary.py
@@ -26,6 +26,7 @@
 from monai.data.meta_obj import get_track_meta
 from monai.transforms.intensity.array import (
     AdjustContrast,
+    ClipIntensityPercentiles,
     ComputeHoVerMaps,
     ForegroundMask,
     GaussianSharpen,
@@ -77,6 +78,7 @@
     "NormalizeIntensityd",
     "ThresholdIntensityd",
     "ScaleIntensityRanged",
+    "ClipIntensityPercentilesd",
     "AdjustContrastd",
     "RandAdjustContrastd",
     "ScaleIntensityRangePercentilesd",
@@ -122,6 +124,8 @@
     "ThresholdIntensityDict",
     "ScaleIntensityRangeD",
     "ScaleIntensityRangeDict",
+    "ClipIntensityPercentilesD",
+    "ClipIntensityPercentilesDict",
     "AdjustContrastD",
     "AdjustContrastDict",
     "RandAdjustContrastD",
@@ -172,7 +176,7 @@
 class RandGaussianNoised(RandomizableTransform, MapTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.RandGaussianNoise`.
-    Add Gaussian noise to image. This transform assumes all the expected fields have same shape, if want to add
+    Add Gaussian noise to image. This transform assumes all the expected fields have same shape, if you want to add
     different noise for every field, please use this transform separately.
 
     Args:
@@ -183,6 +187,7 @@ class RandGaussianNoised(RandomizableTransform, MapTransform):
         std: Standard deviation (spread) of distribution.
         dtype: output data type, if None, same as input image. defaults to float32.
         allow_missing_keys: don't raise exception if key is missing.
+        sample_std: If True, sample the spread of the Gaussian distribution uniformly from 0 to std.
     """
 
     backend = RandGaussianNoise.backend
@@ -195,10 +200,11 @@ def __init__(
         std: float = 0.1,
         dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
+        sample_std: bool = True,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.rand_gaussian_noise = RandGaussianNoise(mean=mean, std=std, prob=1.0, dtype=dtype)
+        self.rand_gaussian_noise = RandGaussianNoise(mean=mean, std=std, prob=1.0, dtype=dtype, sample_std=sample_std)
 
     def set_random_state(
         self, seed: int | None = None, state: np.random.RandomState | None = None
@@ -884,6 +890,36 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> dict[Hashable, N
         return d
 
 
+class ClipIntensityPercentilesd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.ClipIntensityPercentiles`.
+    Clip the intensity values of input image to a specific range based on the intensity distribution of the input.
+    If `sharpness_factor` is provided, the intensity values will be soft clipped according to
+    f(x) = x + (1/sharpness_factor) * softplus(- c(x - minv)) - (1/sharpness_factor)*softplus(c(x - maxv))
+    """
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        lower: float | None,
+        upper: float | None,
+        sharpness_factor: float | None = None,
+        channel_wise: bool = False,
+        dtype: DtypeLike = np.float32,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.scaler = ClipIntensityPercentiles(
+            lower=lower, upper=upper, sharpness_factor=sharpness_factor, channel_wise=channel_wise, dtype=dtype
+        )
+
+    def __call__(self, data: dict) -> dict:
+        d = dict(data)
+        for key in self.key_iterator(d):
+            d[key] = self.scaler(d[key])
+        return d
+
+
 class AdjustContrastd(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.AdjustContrast`.
@@ -1421,10 +1457,11 @@ class RandGibbsNoised(RandomizableTransform, MapTransform):
         keys: 'image', 'label', or ['image', 'label'] depending on which data
                 you need to transform.
         prob (float): probability of applying the transform.
-        alpha (float, List[float]): Parametrizes the intensity of the Gibbs noise filter applied. Takes
+        alpha (float, Sequence[float]): Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
             If a length-2 list is given as [a,b] then the value of alpha will be sampled
             uniformly from the interval [a,b].
+            If a float is given, then the value of alpha will be sampled uniformly from the interval [0, alpha].
         allow_missing_keys: do not raise exception if key is missing.
     """
 
@@ -1434,7 +1471,7 @@ def __init__(
         self,
         keys: KeysCollection,
         prob: float = 0.1,
-        alpha: Sequence[float] = (0.0, 1.0),
+        alpha: float | Sequence[float] = (0.0, 1.0),
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
@@ -1926,6 +1963,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> dict[Hashable, N
 NormalizeIntensityD = NormalizeIntensityDict = NormalizeIntensityd
 ThresholdIntensityD = ThresholdIntensityDict = ThresholdIntensityd
 ScaleIntensityRangeD = ScaleIntensityRangeDict = ScaleIntensityRanged
+ClipIntensityPercentilesD = ClipIntensityPercentilesDict = ClipIntensityPercentilesd
 AdjustContrastD = AdjustContrastDict = AdjustContrastd
 RandAdjustContrastD = RandAdjustContrastDict = RandAdjustContrastd
 ScaleIntensityRangePercentilesD = ScaleIntensityRangePercentilesDict = ScaleIntensityRangePercentilesd
diff --git a/monai/transforms/lazy/utils.py b/monai/transforms/lazy/utils.py
index 359559e319..3819e6bb10 100644
--- a/monai/transforms/lazy/utils.py
+++ b/monai/transforms/lazy/utils.py
@@ -18,9 +18,11 @@
 
 import monai
 from monai.config import NdarrayOrTensor
+from monai.data.meta_obj import get_track_meta, MetaObj
+from monai.data.meta_tensor import MetaTensor
 from monai.data.utils import AFFINE_TOL
 from monai.transforms.utils_pytorch_numpy_unification import allclose
-from monai.utils import LazyAttr, convert_to_numpy, convert_to_tensor, look_up_option
+from monai.utils import LazyAttr, MetaKeys, convert_to_numpy, convert_to_tensor, look_up_option
 
 __all__ = ["resample", "combine_transforms"]
 
@@ -227,3 +229,42 @@ def resample(data: torch.Tensor, matrix: NdarrayOrTensor, kwargs: dict | None =
     resampler.lazy = False  # resampler is a lazytransform
     with resampler.trace_transform(False):  # don't track this transform in `img`
         return resampler(img=img, **call_kwargs)
+
+
+def apply_to_geometry(
+        data: torch.Tensor,
+        meta_info: dict | MetaObj,
+):
+    """
+    Apply an affine geometric transform or deformation field to geometry.
+    At present this is limited to the transformation of points.
+
+    The points must be provided as a tensor and must be  compatible with a homogeneous
+    transform. This means that:
+     - 2D points are of the form (x, y, 1)
+     - 3D points are of the form (x, y, z, 1)
+
+    The affine transform or deformation field is applied to the the points and a tensor of
+    the same shape as the input tensor is returned.
+
+    Args:
+        data: the tensor of points to be transformed.
+        meta_info: the metadata containing the affine transformation
+    """
+
+    if not isinstance(data, (torch.Tensor, MetaTensor)):
+        raise TypeError(f"data {type(data)} must be a torch.Tensor or MetaTensor")
+
+    data = convert_to_tensor(data, track_meta=get_track_meta())
+
+    transform = meta_info.meta[MetaKeys.AFFINE]
+
+    if transform.dtype != data.dtype:
+        transform = transform.to(data.dtype)
+
+    if data.shape[1] != transform.shape[0]:
+        raise ValueError(f"second element of data.shape {data.shape} must match transform shape {transform.shape}")
+
+    result = torch.matmul(data, transform.T)
+
+    return result
diff --git a/monai/transforms/regularization/__init__.py b/monai/transforms/regularization/__init__.py
new file mode 100644
index 0000000000..1e97f89407
--- /dev/null
+++ b/monai/transforms/regularization/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/monai/transforms/regularization/array.py b/monai/transforms/regularization/array.py
new file mode 100644
index 0000000000..0b495c8623
--- /dev/null
+++ b/monai/transforms/regularization/array.py
@@ -0,0 +1,174 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from abc import abstractmethod
+from math import ceil, sqrt
+
+import torch
+
+from ..transform import RandomizableTransform
+
+__all__ = ["MixUp", "CutMix", "CutOut", "Mixer"]
+
+
+class Mixer(RandomizableTransform):
+
+    def __init__(self, batch_size: int, alpha: float = 1.0) -> None:
+        """
+        Mixer is a base class providing the basic logic for the mixup-class of
+        augmentations. In all cases, we need to sample the mixing weights for each
+        sample (lambda in the notation used in the papers). Also, pairs of samples
+        being mixed are picked by randomly shuffling the batch samples.
+
+        Args:
+            batch_size (int): number of samples per batch. That is, samples are expected tp
+                be of size batchsize x channels [x depth] x height x width.
+            alpha (float, optional): mixing weights are sampled from the Beta(alpha, alpha)
+                distribution. Defaults to 1.0, the uniform distribution.
+        """
+        super().__init__()
+        if alpha <= 0:
+            raise ValueError(f"Expected positive number, but got {alpha = }")
+        self.alpha = alpha
+        self.batch_size = batch_size
+
+    @abstractmethod
+    def apply(self, data: torch.Tensor):
+        raise NotImplementedError()
+
+    def randomize(self, data=None) -> None:
+        """
+        Sometimes you need may to apply the same transform to different tensors.
+        The idea is to get a sample and then apply it with apply() as often
+        as needed. You need to call this method everytime you apply the transform to a new
+        batch.
+        """
+        self._params = (
+            torch.from_numpy(self.R.beta(self.alpha, self.alpha, self.batch_size)).type(torch.float32),
+            self.R.permutation(self.batch_size),
+        )
+
+
+class MixUp(Mixer):
+    """MixUp as described in:
+    Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz.
+    mixup: Beyond Empirical Risk Minimization, ICLR 2018
+
+    Class derived from :py:class:`monai.transforms.Mixer`. See corresponding
+    documentation for details on the constructor parameters.
+    """
+
+    def apply(self, data: torch.Tensor):
+        weight, perm = self._params
+        nsamples, *dims = data.shape
+        if len(weight) != nsamples:
+            raise ValueError(f"Expected batch of size: {len(weight)}, but got {nsamples}")
+
+        if len(dims) not in [3, 4]:
+            raise ValueError("Unexpected number of dimensions")
+
+        mixweight = weight[(Ellipsis,) + (None,) * len(dims)]
+        return mixweight * data + (1 - mixweight) * data[perm, ...]
+
+    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None):
+        self.randomize()
+        if labels is None:
+            return self.apply(data)
+        return self.apply(data), self.apply(labels)
+
+
+class CutMix(Mixer):
+    """CutMix augmentation as described in:
+        Sangdoo Yun, Dongyoon Han, Seong Joon Oh, Sanghyuk Chun, Junsuk Choe, Youngjoon Yoo.
+        CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features,
+        ICCV 2019
+
+        Class derived from :py:class:`monai.transforms.Mixer`. See corresponding
+        documentation for details on the constructor parameters. Here, alpha not only determines
+        the mixing weight but also the size of the random rectangles used during for mixing.
+        Please refer to the paper for details.
+
+        The most common use case is something close to:
+
+    .. code-block:: python
+
+        cm = CutMix(batch_size=8, alpha=0.5)
+        for batch in loader:
+            images, labels = batch
+            augimg, auglabels = cm(images, labels)
+            output = model(augimg)
+            loss = loss_function(output, auglabels)
+            ...
+
+    """
+
+    def apply(self, data: torch.Tensor):
+        weights, perm = self._params
+        nsamples, _, *dims = data.shape
+        if len(weights) != nsamples:
+            raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
+
+        mask = torch.ones_like(data)
+        for s, weight in enumerate(weights):
+            coords = [torch.randint(0, d, size=(1,)) for d in dims]
+            lengths = [d * sqrt(1 - weight) for d in dims]
+            idx = [slice(None)] + [slice(c, min(ceil(c + ln), d)) for c, ln, d in zip(coords, lengths, dims)]
+            mask[s][idx] = 0
+
+        return mask * data + (1 - mask) * data[perm, ...]
+
+    def apply_on_labels(self, labels: torch.Tensor):
+        weights, perm = self._params
+        nsamples, *dims = labels.shape
+        if len(weights) != nsamples:
+            raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
+
+        mixweight = weights[(Ellipsis,) + (None,) * len(dims)]
+        return mixweight * labels + (1 - mixweight) * labels[perm, ...]
+
+    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None):
+        self.randomize()
+        augmented = self.apply(data)
+        return (augmented, self.apply_on_labels(labels)) if labels is not None else augmented
+
+
+class CutOut(Mixer):
+    """Cutout as described in the paper:
+    Terrance DeVries, Graham W. Taylor.
+    Improved Regularization of Convolutional Neural Networks with Cutout,
+    arXiv:1708.04552
+
+    Class derived from :py:class:`monai.transforms.Mixer`. See corresponding
+    documentation for details on the constructor parameters. Here, alpha not only determines
+    the mixing weight but also the size of the random rectangles being cut put.
+    Please refer to the paper for details.
+    """
+
+    def apply(self, data: torch.Tensor):
+        weights, _ = self._params
+        nsamples, _, *dims = data.shape
+        if len(weights) != nsamples:
+            raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
+
+        mask = torch.ones_like(data)
+        for s, weight in enumerate(weights):
+            coords = [torch.randint(0, d, size=(1,)) for d in dims]
+            lengths = [d * sqrt(1 - weight) for d in dims]
+            idx = [slice(None)] + [slice(c, min(ceil(c + ln), d)) for c, ln, d in zip(coords, lengths, dims)]
+            mask[s][idx] = 0
+
+        return mask * data
+
+    def __call__(self, data: torch.Tensor):
+        self.randomize()
+        return self.apply(data)
diff --git a/monai/transforms/regularization/dictionary.py b/monai/transforms/regularization/dictionary.py
new file mode 100644
index 0000000000..373913da99
--- /dev/null
+++ b/monai/transforms/regularization/dictionary.py
@@ -0,0 +1,97 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from monai.config import KeysCollection
+from monai.utils.misc import ensure_tuple
+
+from ..transform import MapTransform
+from .array import CutMix, CutOut, MixUp
+
+__all__ = ["MixUpd", "MixUpD", "MixUpDict", "CutMixd", "CutMixD", "CutMixDict", "CutOutd", "CutOutD", "CutOutDict"]
+
+
+class MixUpd(MapTransform):
+    """
+    Dictionary-based version :py:class:`monai.transforms.MixUp`.
+
+    Notice that the mixup transformation will be the same for all entries
+    for consistency, i.e. images and labels must be applied the same augmenation.
+    """
+
+    def __init__(
+        self, keys: KeysCollection, batch_size: int, alpha: float = 1.0, allow_missing_keys: bool = False
+    ) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.mixup = MixUp(batch_size, alpha)
+
+    def __call__(self, data):
+        self.mixup.randomize()
+        result = dict(data)
+        for k in self.keys:
+            result[k] = self.mixup.apply(data[k])
+        return result
+
+
+class CutMixd(MapTransform):
+    """
+    Dictionary-based version :py:class:`monai.transforms.CutMix`.
+
+    Notice that the mixture weights will be the same for all entries
+    for consistency, i.e. images and labels must be aggregated with the same weights,
+    but the random crops are not.
+    """
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        batch_size: int,
+        label_keys: KeysCollection | None = None,
+        alpha: float = 1.0,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.mixer = CutMix(batch_size, alpha)
+        self.label_keys = ensure_tuple(label_keys) if label_keys is not None else []
+
+    def __call__(self, data):
+        self.mixer.randomize()
+        result = dict(data)
+        for k in self.keys:
+            result[k] = self.mixer.apply(data[k])
+        for k in self.label_keys:
+            result[k] = self.mixer.apply_on_labels(data[k])
+        return result
+
+
+class CutOutd(MapTransform):
+    """
+    Dictionary-based version :py:class:`monai.transforms.CutOut`.
+
+    Notice that the cutout is different for every entry in the dictionary.
+    """
+
+    def __init__(self, keys: KeysCollection, batch_size: int, allow_missing_keys: bool = False) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.cutout = CutOut(batch_size)
+
+    def __call__(self, data):
+        result = dict(data)
+        self.cutout.randomize()
+        for k in self.keys:
+            result[k] = self.cutout(data[k])
+        return result
+
+
+MixUpD = MixUpDict = MixUpd
+CutMixD = CutMixDict = CutMixd
+CutOutD = CutOutDict = CutOutd
diff --git a/monai/transforms/spatial/functional.py b/monai/transforms/spatial/functional.py
index add4e7f5ea..bbf970d30f 100644
--- a/monai/transforms/spatial/functional.py
+++ b/monai/transforms/spatial/functional.py
@@ -24,13 +24,14 @@
 import monai
 from monai.config import USE_COMPILED
 from monai.config.type_definitions import NdarrayOrTensor
-from monai.data.meta_obj import get_track_meta
+from monai.data.meta_obj import get_track_meta, MetaObj
 from monai.data.meta_tensor import MetaTensor
 from monai.data.utils import AFFINE_TOL, compute_shape_offset, to_affine_nd
 from monai.networks.layers import AffineTransform
 from monai.transforms.croppad.array import ResizeWithPadOrCrop
 from monai.transforms.intensity.array import GaussianSmooth
 from monai.transforms.inverse import TraceableTransform
+from monai.transforms.lazy.utils import apply_to_geometry
 from monai.transforms.utils import create_rotate, create_translate, resolves_modes, scale_affine
 from monai.transforms.utils_pytorch_numpy_unification import allclose
 from monai.utils import (
@@ -229,7 +230,7 @@ def orientation(img, original_affine, spatial_ornt, lazy, transform_info) -> tor
     return out.copy_meta_from(meta_info) if isinstance(out, MetaTensor) else out  # type: ignore
 
 
-def flip(img, sp_axes, lazy, transform_info):
+def flip_impl(img, sp_axes, lazy, transform_info):
     """
     Functional implementation of flip.
     This function operates eagerly or lazily according to
@@ -258,6 +259,11 @@ def flip(img, sp_axes, lazy, transform_info):
     meta_info = TraceableTransform.track_transform_meta(
         img, sp_size=sp_size, affine=xform, extra_info=extra_info, transform_info=transform_info, lazy=lazy
     )
+    return axes, meta_info
+
+
+def flip_raster(img, sp_axes, lazy, transform_info):
+    axes, meta_info = flip_impl(img, sp_axes, lazy, transform_info)
     out = _maybe_new_metatensor(img)
     if lazy:
         return out.copy_meta_from(meta_info) if isinstance(out, MetaTensor) else meta_info
@@ -265,6 +271,27 @@ def flip(img, sp_axes, lazy, transform_info):
     return out.copy_meta_from(meta_info) if isinstance(out, MetaTensor) else out
 
 
+def flip_geom(img, sp_axes, lazy, transform_info):
+    _, meta_info = flip_impl(img, sp_axes, lazy, transform_info)
+    out = _maybe_new_metatensor(img)
+    if lazy:
+        return out.copy_meta_from(meta_info) if isinstance(out, MetaTensor) else meta_info
+    out = apply_to_geometry(out, meta_info)
+    return out.copy_meta_from(meta_info) if isinstance(out, MetaTensor) else out
+
+
+def flip(image, sp_axes, lazy, transform_info):
+    """
+    Flip the tensor / MetaTensor according to `sp_axes`.
+    """
+
+    if isinstance(image.MetaTensor):
+        if image.kind == Kind.RASTER:
+            return flip_raster(image, sp_axes, lazy, transform_info)
+        elif image.kind == Kind.GEOM:
+            return flip_geom(image, sp_axes, lazy, transform_info)
+
+
 def resize(
     img, out_size, mode, align_corners, dtype, input_ndim, anti_aliasing, anti_aliasing_sigma, lazy, transform_info
 ):
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
index e282ecff24..14f35e1219 100644
--- a/monai/transforms/utils.py
+++ b/monai/transforms/utils.py
@@ -38,6 +38,7 @@
     nonzero,
     ravel,
     searchsorted,
+    softplus,
     unique,
     unravel_index,
     where,
@@ -131,9 +132,45 @@
     "resolves_modes",
     "has_status_keys",
     "distance_transform_edt",
+    "soft_clip",
 ]
 
 
+def soft_clip(
+    arr: NdarrayOrTensor,
+    sharpness_factor: float = 1.0,
+    minv: NdarrayOrTensor | float | int | None = None,
+    maxv: NdarrayOrTensor | float | int | None = None,
+    dtype: DtypeLike | torch.dtype = np.float32,
+) -> NdarrayOrTensor:
+    """
+    Apply soft clip to the input array or tensor.
+    The intensity values will be soft clipped according to
+    f(x) = x + (1/sharpness_factor)*softplus(- c(x - minv)) - (1/sharpness_factor)*softplus(c(x - maxv))
+    From https://medium.com/life-at-hopper/clip-it-clip-it-good-1f1bf711b291
+
+    To perform one-sided clipping, set either minv or maxv to None.
+    Args:
+        arr: input array to clip.
+        sharpness_factor: the sharpness of the soft clip function, default to 1.
+        minv: minimum value of target clipped array.
+        maxv: maximum value of target clipped array.
+        dtype: if not None, convert input array to dtype before computation.
+
+    """
+
+    if dtype is not None:
+        arr, *_ = convert_data_type(arr, dtype=dtype)
+
+    v = arr
+    if minv is not None:
+        v = v + softplus(-sharpness_factor * (arr - minv)) / sharpness_factor
+    if maxv is not None:
+        v = v - softplus(sharpness_factor * (arr - maxv)) / sharpness_factor
+
+    return v
+
+
 def rand_choice(prob: float = 0.5) -> bool:
     """
     Returns True if a randomly chosen number is less than or equal to `prob`, by default this is a 50/50 chance.
@@ -625,9 +662,12 @@ def generate_label_classes_crop_centers(
 
     for i, array in enumerate(indices):
         if len(array) == 0:
-            ratios_[i] = 0
-            if warn:
-                warnings.warn(f"no available indices of class {i} to crop, set the crop ratio of this class to zero.")
+            if ratios_[i] != 0:
+                ratios_[i] = 0
+                if warn:
+                    warnings.warn(
+                        f"no available indices of class {i} to crop, setting the crop ratio of this class to zero."
+                    )
 
     centers = []
     classes = rand_state.choice(len(ratios_), size=num_samples, p=np.asarray(ratios_) / np.sum(ratios_))
diff --git a/monai/transforms/utils_pytorch_numpy_unification.py b/monai/transforms/utils_pytorch_numpy_unification.py
index 0774d50314..020d99af16 100644
--- a/monai/transforms/utils_pytorch_numpy_unification.py
+++ b/monai/transforms/utils_pytorch_numpy_unification.py
@@ -52,9 +52,24 @@
     "median",
     "mean",
     "std",
+    "softplus",
 ]
 
 
+def softplus(x: NdarrayOrTensor) -> NdarrayOrTensor:
+    """stable softplus through `np.logaddexp` with equivalent implementation for torch.
+
+    Args:
+        x: array/tensor.
+
+    Returns:
+        Softplus of the input.
+    """
+    if isinstance(x, np.ndarray):
+        return np.logaddexp(np.zeros_like(x), x)
+    return torch.logaddexp(torch.zeros_like(x), x)
+
+
 def allclose(a: NdarrayTensor, b: NdarrayOrTensor, rtol=1e-5, atol=1e-8, equal_nan=False) -> bool:
     """`np.allclose` with equivalent implementation for torch."""
     b, *_ = convert_to_dst_type(b, a, wrap_sequence=True)
diff --git a/monai/utils/misc.py b/monai/utils/misc.py
index caa7c067df..dd0ccada3d 100644
--- a/monai/utils/misc.py
+++ b/monai/utils/misc.py
@@ -527,7 +527,7 @@ def doc_images() -> str | None:
 
     @staticmethod
     def algo_hash() -> str | None:
-        return os.environ.get("MONAI_ALGO_HASH", "249bf4b")
+        return os.environ.get("MONAI_ALGO_HASH", "b910ab8")
 
     @staticmethod
     def trace_transform() -> str | None:
diff --git a/monai/utils/module.py b/monai/utils/module.py
index 5e058c105b..6f301d8067 100644
--- a/monai/utils/module.py
+++ b/monai/utils/module.py
@@ -272,7 +272,7 @@ def instantiate(__path: str, __mode: str, **kwargs: Any) -> Any:
             return pdb.runcall(component, **kwargs)
     except Exception as e:
         raise RuntimeError(
-            f"Failed to instantiate component '{__path}' with kwargs: {kwargs}"
+            f"Failed to instantiate component '{__path}' with keywords: {','.join(kwargs.keys())}"
             f"\n set '_mode_={CompInitMode.DEBUG}' to enter the debugging mode."
         ) from e
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index b08fef874b..af1b8b89d5 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -26,7 +26,7 @@ mypy>=1.5.0
 ninja
 torchvision
 psutil
-cucim>=23.2.0; platform_system == "Linux"
+cucim-cu12; platform_system == "Linux" and python_version >= "3.9" and python_version <= "3.10"
 openslide-python
 imagecodecs; platform_system == "Linux" or platform_system == "Darwin"
 tifffile; platform_system == "Linux" or platform_system == "Darwin"
@@ -46,7 +46,7 @@ pynrrd
 pre-commit
 pydicom
 h5py
-nni; platform_system == "Linux"
+nni; platform_system == "Linux" and "arm" not in platform_machine and "aarch" not in platform_machine
 optuna
 git+https://github.com/Project-MONAI/MetricsReloaded@monai-support#egg=MetricsReloaded
 onnx>=1.13.0
diff --git a/runtests.sh b/runtests.sh
index 0c60bc0f58..0b3e20ce49 100755
--- a/runtests.sh
+++ b/runtests.sh
@@ -738,12 +738,14 @@ fi
 # network training/inference/eval integration tests
 if [ $doNetTests = true ]
 then
+    set +e  # disable exit on failure so that diagnostics can be given on failure
     echo "${separator}${blue}integration${noColor}"
     for i in tests/*integration_*.py
     do
         echo "$i"
         ${cmdPrefix}${cmd} "$i"
     done
+    set -e # enable exit on failure
 fi
 
 # run model zoo tests
diff --git a/setup.cfg b/setup.cfg
index 229e2ace56..d7cb703d25 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -59,7 +59,7 @@ all =
     tqdm>=4.47.0
     lmdb
     psutil
-    cucim>=23.2.0
+    cucim-cu12; python_version >= '3.9' and python_version <= '3.10'
     openslide-python
     tifffile
     imagecodecs
@@ -111,7 +111,7 @@ lmdb =
 psutil =
     psutil
 cucim =
-    cucim>=23.2.0
+    cucim-cu12
 openslide =
     openslide-python
 tifffile =
diff --git a/tests/nonconfig_workflow.py b/tests/nonconfig_workflow.py
index 7b5328bf72..b2c44c12c6 100644
--- a/tests/nonconfig_workflow.py
+++ b/tests/nonconfig_workflow.py
@@ -36,8 +36,8 @@ class NonConfigWorkflow(BundleWorkflow):
 
     """
 
-    def __init__(self, filename, output_dir):
-        super().__init__(workflow_type="inference")
+    def __init__(self, filename, output_dir, meta_file=None, logging_file=None):
+        super().__init__(workflow_type="inference", meta_file=meta_file, logging_file=logging_file)
         self.filename = filename
         self.output_dir = output_dir
         self._bundle_root = "will override"
diff --git a/tests/test_auto3dseg_hpo.py b/tests/test_auto3dseg_hpo.py
index 34d00336ec..53d09defa0 100644
--- a/tests/test_auto3dseg_hpo.py
+++ b/tests/test_auto3dseg_hpo.py
@@ -181,7 +181,7 @@ def test_get_history(self) -> None:
 
         NNIGen().run_algo(obj_filename, self.work_dir)
         history = import_bundle_algo_history(self.work_dir, only_trained=True)
-        assert len(history) == 3
+        assert len(history) == 1
 
     def tearDown(self) -> None:
         self.test_dir.cleanup()
diff --git a/tests/test_barlow_twins_loss.py b/tests/test_barlow_twins_loss.py
new file mode 100644
index 0000000000..81f4032e0c
--- /dev/null
+++ b/tests/test_barlow_twins_loss.py
@@ -0,0 +1,109 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import numpy as np
+import torch
+from parameterized import parameterized
+
+from monai.losses import BarlowTwinsLoss
+
+TEST_CASES = [
+    [  # shape: (2, 4), (2, 4)
+        {"lambd": 5e-3},
+        {
+            "input": torch.tensor([[1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0]]),
+            "target": torch.tensor([[1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0]]),
+        },
+        4.0,
+    ],
+    [  # shape: (2, 4), (2, 4)
+        {"lambd": 5e-3},
+        {
+            "input": torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]),
+            "target": torch.tensor([[1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0]]),
+        },
+        4.0,
+    ],
+    [  # shape: (2, 4), (2, 4)
+        {"lambd": 5e-3},
+        {
+            "input": torch.tensor([[1.0, 0.0, 1.0, 1.0], [0.0, 1.0, 1.0, 0.0]]),
+            "target": torch.tensor([[1.0, 1.0, 1.0, 0.0], [1.0, 1.0, 0.0, 1.0]]),
+        },
+        5.2562,
+    ],
+    [  # shape: (2, 4), (2, 4)
+        {"lambd": 5e-4},
+        {
+            "input": torch.tensor([[2.0, 3.0, 1.0, 2.0], [0.0, 1.0, 2.0, 5.0]]),
+            "target": torch.tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]),
+        },
+        5.0015,
+    ],
+    [  # shape: (4, 4), (4, 4)
+        {"lambd": 5e-3},
+        {
+            "input": torch.tensor(
+                [[1.0, 2.0, 1.0, 1.0], [3.0, 1.0, 1.0, 2.0], [1.0, 1.0, 1.0, 1.0], [2.0, 1.0, 1.0, 0.0]]
+            ),
+            "target": torch.tensor(
+                [
+                    [0.0, 1.0, -1.0, 0.0],
+                    [1 / 3, 0.0, -2 / 3, 1 / 3],
+                    [-2 / 3, -1.0, 7 / 3, 1 / 3],
+                    [1 / 3, 0.0, 1 / 3, -2 / 3],
+                ]
+            ),
+        },
+        1.4736,
+    ],
+]
+
+
+class TestBarlowTwinsLoss(unittest.TestCase):
+
+    @parameterized.expand(TEST_CASES)
+    def test_result(self, input_param, input_data, expected_val):
+        barlowtwinsloss = BarlowTwinsLoss(**input_param)
+        result = barlowtwinsloss(**input_data)
+        np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, atol=1e-4, rtol=1e-4)
+
+    def test_ill_shape(self):
+        loss = BarlowTwinsLoss(lambd=5e-3)
+        with self.assertRaises(ValueError):
+            loss(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+
+    def test_ill_batch_size(self):
+        loss = BarlowTwinsLoss(lambd=5e-3)
+        with self.assertRaises(ValueError):
+            loss(torch.ones((1, 2)), torch.ones((1, 2)))
+
+    def test_with_cuda(self):
+        loss = BarlowTwinsLoss(lambd=5e-3)
+        i = torch.ones((2, 10))
+        j = torch.ones((2, 10))
+        if torch.cuda.is_available():
+            i = i.cuda()
+            j = j.cuda()
+        output = loss(i, j)
+        np.testing.assert_allclose(output.detach().cpu().numpy(), 10.0, atol=1e-4, rtol=1e-4)
+
+    def check_warning_raised(self):
+        with self.assertWarns(Warning):
+            BarlowTwinsLoss(lambd=5e-3, batch_size=1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_bundle_workflow.py b/tests/test_bundle_workflow.py
index f7da37acef..9a276b577f 100644
--- a/tests/test_bundle_workflow.py
+++ b/tests/test_bundle_workflow.py
@@ -35,6 +35,8 @@
 
 TEST_CASE_3 = [os.path.join(os.path.dirname(__file__), "testing_data", "config_fl_train.json")]
 
+TEST_CASE_NON_CONFIG_WRONG_LOG = [None, "logging.conf", "Cannot find the logging config file: logging.conf."]
+
 
 class TestBundleWorkflow(unittest.TestCase):
 
@@ -103,6 +105,16 @@ def test_inference_config(self, config_file):
         )
         self._test_inferer(inferer)
 
+        # test property path
+        inferer = ConfigWorkflow(
+            config_file=config_file,
+            properties_path=os.path.join(os.path.dirname(__file__), "testing_data", "fl_infer_properties.json"),
+            logging_file=os.path.join(os.path.dirname(__file__), "testing_data", "logging.conf"),
+            **override,
+        )
+        self._test_inferer(inferer)
+        self.assertEqual(inferer.workflow_type, None)
+
     @parameterized.expand([TEST_CASE_3])
     def test_train_config(self, config_file):
         # test standard MONAI model-zoo config workflow
@@ -144,8 +156,14 @@ def test_train_config(self, config_file):
     def test_non_config(self):
         # test user defined python style workflow
         inferer = NonConfigWorkflow(self.filename, self.data_dir)
+        self.assertEqual(inferer.meta_file, None)
         self._test_inferer(inferer)
 
+    @parameterized.expand([TEST_CASE_NON_CONFIG_WRONG_LOG])
+    def test_non_config_wrong_log_cases(self, meta_file, logging_file, expected_error):
+        with self.assertRaisesRegex(FileNotFoundError, expected_error):
+            NonConfigWorkflow(self.filename, self.data_dir, meta_file, logging_file)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_clip_intensity_percentiles.py b/tests/test_clip_intensity_percentiles.py
new file mode 100644
index 0000000000..82471e25ce
--- /dev/null
+++ b/tests/test_clip_intensity_percentiles.py
@@ -0,0 +1,185 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.transforms import ClipIntensityPercentiles
+from monai.transforms.utils import soft_clip
+from monai.transforms.utils_pytorch_numpy_unification import clip, percentile
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
+
+
+class TestClipIntensityPercentiles2D(NumpyImageTestCase2D):
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_two_sided(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=95, lower=5)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (5, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_high(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=95, lower=None)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (0, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_low(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=None, lower=5)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (5, 100))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_two_sided(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=95, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        lower, upper = percentile(im, (5, 95))
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_high(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=95, lower=None, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        upper = percentile(im, 95)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=5e-5, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_low(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=None, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        lower = percentile(im, 5)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_channel_wise(self, p):
+        clipper = ClipIntensityPercentiles(upper=95, lower=5, channel_wise=True)
+        im = p(self.imt)
+        result = clipper(im)
+        for i, c in enumerate(im):
+            lower, upper = percentile(c, (5, 95))
+            expected = clip(c, lower, upper)
+            assert_allclose(result[i], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    def test_ill_sharpness_factor(self):
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentiles(upper=95, lower=5, sharpness_factor=0.0)
+
+    def test_ill_lower_percentile(self):
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentiles(upper=None, lower=-1)
+
+    def test_ill_upper_percentile(self):
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentiles(upper=101, lower=None)
+
+    def test_ill_percentiles(self):
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentiles(upper=95, lower=96)
+
+    def test_ill_both_none(self):
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentiles(upper=None, lower=None)
+
+
+class TestClipIntensityPercentiles3D(NumpyImageTestCase3D):
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_two_sided(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=95, lower=5)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (5, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_high(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=95, lower=None)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (0, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_low(self, p):
+        hard_clipper = ClipIntensityPercentiles(upper=None, lower=5)
+        im = p(self.imt)
+        result = hard_clipper(im)
+        lower, upper = percentile(im, (5, 100))
+        expected = clip(im, lower, upper)
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_two_sided(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=95, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        lower, upper = percentile(im, (5, 95))
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_high(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=95, lower=None, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        upper = percentile(im, 95)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=5e-5, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_low(self, p):
+        soft_clipper = ClipIntensityPercentiles(upper=None, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper(im)
+        lower = percentile(im, 5)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_channel_wise(self, p):
+        clipper = ClipIntensityPercentiles(upper=95, lower=5, channel_wise=True)
+        im = p(self.imt)
+        result = clipper(im)
+        for i, c in enumerate(im):
+            lower, upper = percentile(c, (5, 95))
+            expected = clip(c, lower, upper)
+            assert_allclose(result[i], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_clip_intensity_percentilesd.py b/tests/test_clip_intensity_percentilesd.py
new file mode 100644
index 0000000000..2b49383182
--- /dev/null
+++ b/tests/test_clip_intensity_percentilesd.py
@@ -0,0 +1,205 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.transforms import ClipIntensityPercentilesd
+from monai.transforms.utils import soft_clip
+from monai.transforms.utils_pytorch_numpy_unification import clip, percentile
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
+
+
+class TestClipIntensityPercentilesd2D(NumpyImageTestCase2D):
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_two_sided(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (5, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_high(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (0, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_low(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (5, 100))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_two_sided(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        lower, upper = percentile(im, (5, 95))
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_high(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        upper = percentile(im, 95)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=5e-5, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_low(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        lower = percentile(im, 5)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_channel_wise(self, p):
+        key = "img"
+        clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, channel_wise=True)
+        im = p(self.imt)
+        result = clipper({key: im})
+        for i, c in enumerate(im):
+            lower, upper = percentile(c, (5, 95))
+            expected = clip(c, lower, upper)
+            assert_allclose(result[key][i], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    def test_ill_sharpness_factor(self):
+        key = "img"
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, sharpness_factor=0.0)
+
+    def test_ill_lower_percentile(self):
+        key = "img"
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentilesd(keys=[key], upper=None, lower=-1)
+
+    def test_ill_upper_percentile(self):
+        key = "img"
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentilesd(keys=[key], upper=101, lower=None)
+
+    def test_ill_percentiles(self):
+        key = "img"
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentilesd(keys=[key], upper=95, lower=96)
+
+    def test_ill_both_none(self):
+        key = "img"
+        with self.assertRaises(ValueError):
+            ClipIntensityPercentilesd(keys=[key], upper=None, lower=None)
+
+
+class TestClipIntensityPercentilesd3D(NumpyImageTestCase3D):
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_two_sided(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (5, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_high(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (0, 95))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_hard_clipping_one_sided_low(self, p):
+        key = "img"
+        hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5)
+        im = p(self.imt)
+        result = hard_clipper({key: im})
+        lower, upper = percentile(im, (5, 100))
+        expected = clip(im, lower, upper)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_two_sided(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        lower, upper = percentile(im, (5, 95))
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_high(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        upper = percentile(im, 95)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
+        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=5e-5, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_soft_clipping_one_sided_low(self, p):
+        key = "img"
+        soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5, sharpness_factor=1.0)
+        im = p(self.imt)
+        result = soft_clipper({key: im})
+        lower = percentile(im, 5)
+        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
+        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+
+    @parameterized.expand([[p] for p in TEST_NDARRAYS])
+    def test_channel_wise(self, p):
+        key = "img"
+        clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, channel_wise=True)
+        im = p(self.imt)
+        result = clipper({key: im})
+        for i, c in enumerate(im):
+            lower, upper = percentile(c, (5, 95))
+            expected = clip(c, lower, upper)
+            assert_allclose(result[key][i], p(expected), type_test="tensor", rtol=1e-7, atol=0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_conjugate_gradient.py b/tests/test_conjugate_gradient.py
new file mode 100644
index 0000000000..64efe3b168
--- /dev/null
+++ b/tests/test_conjugate_gradient.py
@@ -0,0 +1,56 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import torch
+
+from monai.networks.layers import ConjugateGradient
+
+
+class TestConjugateGradient(unittest.TestCase):
+
+    def test_real_valued_inverse(self):
+        """Test ConjugateGradient with real-valued input: when the input is real
+        value, the output should be the inverse of the matrix."""
+        a_dim = 3
+        a_mat = torch.tensor([[1, 2, 3], [2, 1, 2], [3, 2, 1]], dtype=torch.float)
+
+        def a_op(x):
+            return a_mat @ x
+
+        cg_solver = ConjugateGradient(a_op, num_iter=100)
+        # define the measurement
+        y = torch.tensor([1, 2, 3], dtype=torch.float)
+        # solve for x
+        x = cg_solver(torch.zeros(a_dim), y)
+        x_ref = torch.linalg.solve(a_mat, y)
+        # assert torch.allclose(x, x_ref, atol=1e-6), 'CG solver failed to converge to reference solution'
+        self.assertTrue(torch.allclose(x, x_ref, atol=1e-6))
+
+    def test_complex_valued_inverse(self):
+        a_dim = 3
+        a_mat = torch.tensor([[1, 2, 3], [2, 1, 2], [3, 2, 1]], dtype=torch.complex64)
+
+        def a_op(x):
+            return a_mat @ x
+
+        cg_solver = ConjugateGradient(a_op, num_iter=100)
+        y = torch.tensor([1, 2, 3], dtype=torch.complex64)
+        x = cg_solver(torch.zeros(a_dim, dtype=torch.complex64), y)
+        x_ref = torch.linalg.solve(a_mat, y)
+        self.assertTrue(torch.allclose(x, x_ref, atol=1e-6))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_convert_to_onnx.py b/tests/test_convert_to_onnx.py
index 398d260c52..798c510800 100644
--- a/tests/test_convert_to_onnx.py
+++ b/tests/test_convert_to_onnx.py
@@ -12,6 +12,7 @@
 from __future__ import annotations
 
 import itertools
+import platform
 import unittest
 
 import torch
@@ -29,6 +30,12 @@
 TESTS = list(itertools.product(TORCH_DEVICE_OPTIONS, [True, False], [True, False]))
 TESTS_ORT = list(itertools.product(TORCH_DEVICE_OPTIONS, [True]))
 
+ON_AARCH64 = platform.machine() == "aarch64"
+if ON_AARCH64:
+    rtol, atol = 1e-1, 1e-2
+else:
+    rtol, atol = 1e-3, 1e-4
+
 onnx, _ = optional_import("onnx")
 
 
@@ -56,8 +63,8 @@ def test_unet(self, device, use_trace, use_ort):
                 device=device,
                 use_ort=use_ort,
                 use_trace=use_trace,
-                rtol=1e-3,
-                atol=1e-4,
+                rtol=rtol,
+                atol=atol,
             )
         else:
             # https://github.com/pytorch/pytorch/blob/release/1.9/torch/onnx/__init__.py#L182
@@ -72,8 +79,8 @@ def test_unet(self, device, use_trace, use_ort):
                 device=device,
                 use_ort=use_ort,
                 use_trace=use_trace,
-                rtol=1e-3,
-                atol=1e-4,
+                rtol=rtol,
+                atol=atol,
             )
         self.assertTrue(isinstance(onnx_model, onnx.ModelProto))
 
@@ -107,8 +114,8 @@ def test_seg_res_net(self, device, use_ort):
             device=device,
             use_ort=use_ort,
             use_trace=True,
-            rtol=1e-3,
-            atol=1e-4,
+            rtol=rtol,
+            atol=atol,
         )
         self.assertTrue(isinstance(onnx_model, onnx.ModelProto))
 
diff --git a/tests/test_dice_ce_loss.py b/tests/test_dice_ce_loss.py
index 225618ed2c..97c7ae5050 100644
--- a/tests/test_dice_ce_loss.py
+++ b/tests/test_dice_ce_loss.py
@@ -93,10 +93,20 @@ def test_result(self, input_param, input_data, expected_val):
         result = diceceloss(**input_data)
         np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, atol=1e-4, rtol=1e-4)
 
-    # def test_ill_shape(self):
-    #     loss = DiceCELoss()
-    #     with self.assertRaisesRegex(ValueError, ""):
-    #         loss(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+    def test_ill_shape(self):
+        loss = DiceCELoss()
+        with self.assertRaises(AssertionError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 2, 5)))
+
+    def test_ill_shape2(self):
+        loss = DiceCELoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+
+    def test_ill_shape3(self):
+        loss = DiceCELoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 3, 4, 4)), torch.ones((1, 2, 4, 4)))
 
     # def test_ill_reduction(self):
     #     with self.assertRaisesRegex(ValueError, ""):
diff --git a/tests/test_dice_focal_loss.py b/tests/test_dice_focal_loss.py
index 13899da003..814a174762 100644
--- a/tests/test_dice_focal_loss.py
+++ b/tests/test_dice_focal_loss.py
@@ -69,8 +69,18 @@ def test_result_no_onehot_no_bg(self, size, onehot):
 
     def test_ill_shape(self):
         loss = DiceFocalLoss()
-        with self.assertRaisesRegex(ValueError, ""):
-            loss(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+        with self.assertRaises(AssertionError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 2, 5)))
+
+    def test_ill_shape2(self):
+        loss = DiceFocalLoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+
+    def test_ill_shape3(self):
+        loss = DiceFocalLoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 3, 4, 4)), torch.ones((1, 2, 4, 4)))
 
     def test_ill_lambda(self):
         with self.assertRaisesRegex(ValueError, ""):
diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index b0137ae245..f3c982056c 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+import platform
 import unittest
 from typing import Any, Sequence
 
@@ -24,6 +25,12 @@
 
 InstanceNorm3dNVFuser, _ = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
 
+ON_AARCH64 = platform.machine() == "aarch64"
+if ON_AARCH64:
+    rtol, atol = 1e-2, 1e-2
+else:
+    rtol, atol = 1e-4, 1e-4
+
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
 strides: Sequence[Sequence[int] | int]
@@ -159,7 +166,7 @@ def test_consistency(self, input_param, input_shape, _):
                         with eval_mode(net_fuser):
                             result_fuser = net_fuser(input_tensor)
 
-                        assert_allclose(result, result_fuser, rtol=1e-4, atol=1e-4)
+                        assert_allclose(result, result_fuser, rtol=rtol, atol=atol)
 
 
 class TestDynUNetDeepSupervision(unittest.TestCase):
diff --git a/tests/test_focal_loss.py b/tests/test_focal_loss.py
index de8d625058..0bb8a078ae 100644
--- a/tests/test_focal_loss.py
+++ b/tests/test_focal_loss.py
@@ -132,7 +132,7 @@ def test_consistency_with_cross_entropy_2d_no_reduction(self):
             error = np.abs(a - b)
             max_error = np.maximum(error, max_error)
 
-        assert np.allclose(max_error, 0)
+        assert np.allclose(max_error, 0, atol=1e-6)
 
     def test_consistency_with_cross_entropy_2d_onehot_label(self):
         """For gamma=0 the focal loss reduces to the cross entropy loss"""
diff --git a/tests/test_generalized_dice_focal_loss.py b/tests/test_generalized_dice_focal_loss.py
index 8a0a80865e..65252611ca 100644
--- a/tests/test_generalized_dice_focal_loss.py
+++ b/tests/test_generalized_dice_focal_loss.py
@@ -59,8 +59,18 @@ def test_result_no_onehot_no_bg(self):
 
     def test_ill_shape(self):
         loss = GeneralizedDiceFocalLoss()
-        with self.assertRaisesRegex(ValueError, ""):
-            loss(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+        with self.assertRaises(AssertionError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 2, 5)))
+
+    def test_ill_shape2(self):
+        loss = GeneralizedDiceFocalLoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+
+    def test_ill_shape3(self):
+        loss = GeneralizedDiceFocalLoss()
+        with self.assertRaises(ValueError):
+            loss.forward(torch.ones((1, 3, 4, 4)), torch.ones((1, 2, 4, 4)))
 
     def test_ill_lambda(self):
         with self.assertRaisesRegex(ValueError, ""):
diff --git a/tests/test_patchembedding.py b/tests/test_patchembedding.py
index f8610d9214..d059145033 100644
--- a/tests/test_patchembedding.py
+++ b/tests/test_patchembedding.py
@@ -93,6 +93,32 @@ def test_shape(self, input_param, input_shape, expected_shape):
             result = net(torch.randn(input_shape))
             self.assertEqual(result.shape, expected_shape)
 
+    def test_sincos_pos_embed(self):
+        net = PatchEmbeddingBlock(
+            in_channels=1,
+            img_size=(32, 32, 32),
+            patch_size=(8, 8, 8),
+            hidden_size=96,
+            num_heads=8,
+            pos_embed_type="sincos",
+            dropout_rate=0.5,
+        )
+
+        self.assertEqual(net.position_embeddings.requires_grad, False)
+
+    def test_learnable_pos_embed(self):
+        net = PatchEmbeddingBlock(
+            in_channels=1,
+            img_size=(32, 32, 32),
+            patch_size=(8, 8, 8),
+            hidden_size=96,
+            num_heads=8,
+            pos_embed_type="learnable",
+            dropout_rate=0.5,
+        )
+
+        self.assertEqual(net.position_embeddings.requires_grad, True)
+
     def test_ill_arg(self):
         with self.assertRaises(ValueError):
             PatchEmbeddingBlock(
diff --git a/tests/test_perceptual_loss.py b/tests/test_perceptual_loss.py
index ba204af697..02232e6f8d 100644
--- a/tests/test_perceptual_loss.py
+++ b/tests/test_perceptual_loss.py
@@ -40,6 +40,11 @@
         (2, 1, 64, 64, 64),
         (2, 1, 64, 64, 64),
     ],
+    [
+        {"spatial_dims": 3, "network_type": "medicalnet_resnet50_23datasets", "is_fake_3d": False},
+        (2, 1, 64, 64, 64),
+        (2, 1, 64, 64, 64),
+    ],
     [
         {"spatial_dims": 3, "network_type": "resnet50", "is_fake_3d": True, "pretrained": True, "fake_3d_ratio": 0.2},
         (2, 1, 64, 64, 64),
diff --git a/tests/test_rand_affine.py b/tests/test_rand_affine.py
index f37f7827bb..23e3fd148c 100644
--- a/tests/test_rand_affine.py
+++ b/tests/test_rand_affine.py
@@ -147,7 +147,7 @@ def test_rand_affine(self, input_param, input_data, expected_val):
         g.set_random_state(123)
         result = g(**input_data)
         g.rand_affine_grid.affine = torch.eye(4, dtype=torch.float64)  # reset affine
-        test_resampler_lazy(g, result, input_param, input_data, seed=123)
+        test_resampler_lazy(g, result, input_param, input_data, seed=123, rtol=_rtol)
         if input_param.get("cache_grid", False):
             self.assertTrue(g._cached_grid is not None)
         assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4, type_test="tensor")
diff --git a/tests/test_rand_affined.py b/tests/test_rand_affined.py
index 20c50954e2..32fde8dc0f 100644
--- a/tests/test_rand_affined.py
+++ b/tests/test_rand_affined.py
@@ -234,7 +234,9 @@ def test_rand_affined(self, input_param, input_data, expected_val, track_meta):
                 lazy_init_param["keys"], lazy_init_param["mode"] = key, mode
                 resampler = RandAffined(**lazy_init_param).set_random_state(123)
                 expected_output = resampler(**call_param)
-                test_resampler_lazy(resampler, expected_output, lazy_init_param, call_param, seed=123, output_key=key)
+                test_resampler_lazy(
+                    resampler, expected_output, lazy_init_param, call_param, seed=123, output_key=key, rtol=_rtol
+                )
             resampler.lazy = False
 
         if input_param.get("cache_grid", False):
diff --git a/tests/test_rand_gaussian_noise.py b/tests/test_rand_gaussian_noise.py
index a56e54fe31..233b4dd1b6 100644
--- a/tests/test_rand_gaussian_noise.py
+++ b/tests/test_rand_gaussian_noise.py
@@ -22,22 +22,24 @@
 
 TESTS = []
 for p in TEST_NDARRAYS:
-    TESTS.append(("test_zero_mean", p, 0, 0.1))
-    TESTS.append(("test_non_zero_mean", p, 1, 0.5))
+    TESTS.append(("test_zero_mean", p, 0, 0.1, True))
+    TESTS.append(("test_non_zero_mean", p, 1, 0.5, True))
+    TESTS.append(("test_no_sample_std", p, 1, 0.5, False))
 
 
 class TestRandGaussianNoise(NumpyImageTestCase2D):
 
     @parameterized.expand(TESTS)
-    def test_correct_results(self, _, im_type, mean, std):
+    def test_correct_results(self, _, im_type, mean, std, sample_std):
         seed = 0
-        gaussian_fn = RandGaussianNoise(prob=1.0, mean=mean, std=std)
+        gaussian_fn = RandGaussianNoise(prob=1.0, mean=mean, std=std, sample_std=sample_std)
         gaussian_fn.set_random_state(seed)
         im = im_type(self.imt)
         noised = gaussian_fn(im)
         np.random.seed(seed)
         np.random.random()
-        expected = self.imt + np.random.normal(mean, np.random.uniform(0, std), size=self.imt.shape)
+        _std = np.random.uniform(0, std) if sample_std else std
+        expected = self.imt + np.random.normal(mean, _std, size=self.imt.shape)
         if isinstance(noised, torch.Tensor):
             noised = noised.cpu()
         np.testing.assert_allclose(expected, noised, atol=1e-5)
diff --git a/tests/test_rand_gaussian_noised.py b/tests/test_rand_gaussian_noised.py
index bcbed98b5a..e3df196be2 100644
--- a/tests/test_rand_gaussian_noised.py
+++ b/tests/test_rand_gaussian_noised.py
@@ -22,8 +22,9 @@
 
 TESTS = []
 for p in TEST_NDARRAYS:
-    TESTS.append(["test_zero_mean", p, ["img1", "img2"], 0, 0.1])
-    TESTS.append(["test_non_zero_mean", p, ["img1", "img2"], 1, 0.5])
+    TESTS.append(["test_zero_mean", p, ["img1", "img2"], 0, 0.1, True])
+    TESTS.append(["test_non_zero_mean", p, ["img1", "img2"], 1, 0.5, True])
+    TESTS.append(["test_no_sample_std", p, ["img1", "img2"], 1, 0.5, False])
 
 seed = 0
 
@@ -31,15 +32,18 @@
 class TestRandGaussianNoised(NumpyImageTestCase2D):
 
     @parameterized.expand(TESTS)
-    def test_correct_results(self, _, im_type, keys, mean, std):
-        gaussian_fn = RandGaussianNoised(keys=keys, prob=1.0, mean=mean, std=std, dtype=np.float64)
+    def test_correct_results(self, _, im_type, keys, mean, std, sample_std):
+        gaussian_fn = RandGaussianNoised(
+            keys=keys, prob=1.0, mean=mean, std=std, dtype=np.float64, sample_std=sample_std
+        )
         gaussian_fn.set_random_state(seed)
         im = im_type(self.imt)
         noised = gaussian_fn({k: im for k in keys})
         np.random.seed(seed)
         # simulate the randomize() of transform
         np.random.random()
-        noise = np.random.normal(mean, np.random.uniform(0, std), size=self.imt.shape)
+        _std = np.random.uniform(0, std) if sample_std else std
+        noise = np.random.normal(mean, _std, size=self.imt.shape)
         for k in keys:
             expected = self.imt + noise
             if isinstance(noised[k], torch.Tensor):
diff --git a/tests/test_rand_gibbs_noise.py b/tests/test_rand_gibbs_noise.py
index 4befeffbe2..5ef249a1f4 100644
--- a/tests/test_rand_gibbs_noise.py
+++ b/tests/test_rand_gibbs_noise.py
@@ -90,6 +90,15 @@ def test_alpha(self, im_shape, input_type):
         self.assertGreaterEqual(t.sampled_alpha, 0.5)
         self.assertLessEqual(t.sampled_alpha, 0.51)
 
+    @parameterized.expand(TEST_CASES)
+    def test_alpha_single_value(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
+        alpha = 0.01
+        t = RandGibbsNoise(1.0, alpha)
+        _ = t(deepcopy(im))
+        self.assertGreaterEqual(t.sampled_alpha, 0)
+        self.assertLessEqual(t.sampled_alpha, 0.01)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_rand_gibbs_noised.py b/tests/test_rand_gibbs_noised.py
index 6580189af6..382290dd39 100644
--- a/tests/test_rand_gibbs_noised.py
+++ b/tests/test_rand_gibbs_noised.py
@@ -105,6 +105,14 @@ def test_alpha(self, im_shape, input_type):
         _ = t(deepcopy(data))
         self.assertTrue(0.5 <= t.rand_gibbs_noise.sampled_alpha <= 0.51)
 
+    @parameterized.expand(TEST_CASES)
+    def test_alpha_single_value(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
+        alpha = 0.01
+        t = RandGibbsNoised(KEYS, 1.0, alpha)
+        _ = t(deepcopy(data))
+        self.assertTrue(0 <= t.rand_gibbs_noise.sampled_alpha <= 0.01)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_regularization.py b/tests/test_regularization.py
new file mode 100644
index 0000000000..4df60b9808
--- /dev/null
+++ b/tests/test_regularization.py
@@ -0,0 +1,112 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import torch
+
+from monai.transforms import CutMix, CutMixd, CutOut, MixUp, MixUpd
+from monai.utils import set_determinism
+
+
+class TestMixup(unittest.TestCase):
+
+    def setUp(self) -> None:
+        set_determinism(seed=0)
+
+    def tearDown(self) -> None:
+        set_determinism(None)
+
+    def test_mixup(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            sample = torch.rand(*shape, dtype=torch.float32)
+            mixup = MixUp(6, 1.0)
+            output = mixup(sample)
+            self.assertEqual(output.shape, sample.shape)
+            self.assertTrue(any(not torch.allclose(sample, mixup(sample)) for _ in range(10)))
+
+        with self.assertRaises(ValueError):
+            MixUp(6, -0.5)
+
+        mixup = MixUp(6, 0.5)
+        for dims in [2, 3]:
+            with self.assertRaises(ValueError):
+                shape = (5, 3) + (32,) * dims
+                sample = torch.rand(*shape, dtype=torch.float32)
+                mixup(sample)
+
+    def test_mixupd(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            t = torch.rand(*shape, dtype=torch.float32)
+            sample = {"a": t, "b": t}
+            mixup = MixUpd(["a", "b"], 6)
+            output = mixup(sample)
+            self.assertTrue(torch.allclose(output["a"], output["b"]))
+
+        with self.assertRaises(ValueError):
+            MixUpd(["k1", "k2"], 6, -0.5)
+
+
+class TestCutMix(unittest.TestCase):
+
+    def setUp(self) -> None:
+        set_determinism(seed=0)
+
+    def tearDown(self) -> None:
+        set_determinism(None)
+
+    def test_cutmix(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            sample = torch.rand(*shape, dtype=torch.float32)
+            cutmix = CutMix(6, 1.0)
+            output = cutmix(sample)
+            self.assertEqual(output.shape, sample.shape)
+            self.assertTrue(any(not torch.allclose(sample, cutmix(sample)) for _ in range(10)))
+
+    def test_cutmixd(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            t = torch.rand(*shape, dtype=torch.float32)
+            label = torch.randint(0, 1, shape)
+            sample = {"a": t, "b": t, "lbl1": label, "lbl2": label}
+            cutmix = CutMixd(["a", "b"], 6, label_keys=("lbl1", "lbl2"))
+            output = cutmix(sample)
+            # croppings are different on each application
+            self.assertTrue(not torch.allclose(output["a"], output["b"]))
+            # but mixing of labels is not affected by it
+            self.assertTrue(torch.allclose(output["lbl1"], output["lbl2"]))
+
+
+class TestCutOut(unittest.TestCase):
+
+    def setUp(self) -> None:
+        set_determinism(seed=0)
+
+    def tearDown(self) -> None:
+        set_determinism(None)
+
+    def test_cutout(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            sample = torch.rand(*shape, dtype=torch.float32)
+            cutout = CutOut(6, 1.0)
+            output = cutout(sample)
+            self.assertEqual(output.shape, sample.shape)
+            self.assertTrue(any(not torch.allclose(sample, cutout(sample)) for _ in range(10)))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_resize.py b/tests/test_resize.py
index 65b33ea649..d4c57e2742 100644
--- a/tests/test_resize.py
+++ b/tests/test_resize.py
@@ -46,6 +46,7 @@
 
 
 class TestResize(NumpyImageTestCase2D):
+
     def test_invalid_inputs(self):
         with self.assertRaises(ValueError):
             resize = Resize(spatial_size=(128, 128, 3), mode="order")
diff --git a/tests/test_resized.py b/tests/test_resized.py
index d62f29ab5c..243a4e6622 100644
--- a/tests/test_resized.py
+++ b/tests/test_resized.py
@@ -66,6 +66,7 @@
 
 @SkipIfAtLeastPyTorchVersion((2, 2, 0))  # https://github.com/Project-MONAI/MONAI/issues/7445
 class TestResized(NumpyImageTestCase2D):
+
     def test_invalid_inputs(self):
         with self.assertRaises(ValueError):
             resize = Resized(keys="img", spatial_size=(128, 128, 3), mode="order")
diff --git a/tests/test_set_visible_devices.py b/tests/test_set_visible_devices.py
index 7860656b3d..b4f44957a2 100644
--- a/tests/test_set_visible_devices.py
+++ b/tests/test_set_visible_devices.py
@@ -14,7 +14,7 @@
 import os
 import unittest
 
-from tests.utils import skip_if_no_cuda
+from tests.utils import SkipIfAtLeastPyTorchVersion, skip_if_no_cuda
 
 
 class TestVisibleDevices(unittest.TestCase):
@@ -25,6 +25,7 @@ def run_process_and_get_exit_code(code_to_execute):
         return int(bin(value).replace("0b", "").rjust(16, "0")[:8], 2)
 
     @skip_if_no_cuda
+    @SkipIfAtLeastPyTorchVersion((2, 2, 1))
     def test_visible_devices(self):
         num_gpus_before = self.run_process_and_get_exit_code(
             'python -c "import os; import torch; '
diff --git a/tests/test_soft_clip.py b/tests/test_soft_clip.py
new file mode 100644
index 0000000000..de5122e982
--- /dev/null
+++ b/tests/test_soft_clip.py
@@ -0,0 +1,125 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import numpy as np
+import torch
+from parameterized import parameterized
+
+from monai.transforms.utils import soft_clip
+
+TEST_CASES = [
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 10},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([2.0000, 2.0000, 2.0693, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 7.9307, 8.0000]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": None, "sharpness_factor": 10},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([2.0000, 2.0000, 2.0693, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000]),
+        },
+    ],
+    [
+        {"minv": None, "maxv": 7, "sharpness_factor": 10},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([0.0000, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 6.9307, 7.0000, 7.0000]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 1.0},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([2.1266, 2.3124, 2.6907, 3.3065, 4.1088, 5.0000, 5.8912, 6.6935, 7.3093, 7.6877]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 3.0},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([2.0008, 2.0162, 2.2310, 3.0162, 4.0008, 5.0000, 5.9992, 6.9838, 7.7690, 7.9838]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 5.0},
+        {
+            "input": torch.arange(10).float(),
+            "clipped": torch.tensor([2.0000, 2.0013, 2.1386, 3.0013, 4.0000, 5.0000, 6.0000, 6.9987, 7.8614, 7.9987]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 10},
+        {
+            "input": np.arange(10).astype(np.float32),
+            "clipped": np.array([2.0000, 2.0000, 2.0693, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 7.9307, 8.0000]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": None, "sharpness_factor": 10},
+        {
+            "input": np.arange(10).astype(float),
+            "clipped": np.array([2.0000, 2.0000, 2.0693, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000]),
+        },
+    ],
+    [
+        {"minv": None, "maxv": 7, "sharpness_factor": 10},
+        {
+            "input": np.arange(10).astype(float),
+            "clipped": np.array([0.0000, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 6.9307, 7.0000, 7.0000]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 1.0},
+        {
+            "input": np.arange(10).astype(float),
+            "clipped": np.array([2.1266, 2.3124, 2.6907, 3.3065, 4.1088, 5.0000, 5.8912, 6.6935, 7.3093, 7.6877]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 3.0},
+        {
+            "input": np.arange(10).astype(float),
+            "clipped": np.array([2.0008, 2.0162, 2.2310, 3.0162, 4.0008, 5.0000, 5.9992, 6.9838, 7.7690, 7.9838]),
+        },
+    ],
+    [
+        {"minv": 2, "maxv": 8, "sharpness_factor": 5.0},
+        {
+            "input": np.arange(10).astype(float),
+            "clipped": np.array([2.0000, 2.0013, 2.1386, 3.0013, 4.0000, 5.0000, 6.0000, 6.9987, 7.8614, 7.9987]),
+        },
+    ],
+]
+
+
+class TestSoftClip(unittest.TestCase):
+
+    @parameterized.expand(TEST_CASES)
+    def test_result(self, input_param, input_data):
+        outputs = soft_clip(input_data["input"], **input_param)
+        expected_val = input_data["clipped"]
+        if isinstance(outputs, torch.Tensor):
+            np.testing.assert_allclose(
+                outputs.detach().cpu().numpy(), expected_val.detach().cpu().numpy(), atol=1e-4, rtol=1e-4
+            )
+        else:
+            np.testing.assert_allclose(outputs, expected_val, atol=1e-4, rtol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_spatial_resampled.py b/tests/test_spatial_resampled.py
index 541015cc34..d5c86258d7 100644
--- a/tests/test_spatial_resampled.py
+++ b/tests/test_spatial_resampled.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+import platform
 import unittest
 
 import numpy as np
@@ -23,6 +24,12 @@
 from tests.lazy_transforms_utils import test_resampler_lazy
 from tests.utils import TEST_DEVICES, assert_allclose
 
+ON_AARCH64 = platform.machine() == "aarch64"
+if ON_AARCH64:
+    rtol, atol = 1e-1, 1e-2
+else:
+    rtol, atol = 1e-3, 1e-4
+
 TESTS = []
 
 destinations_3d = [
@@ -104,7 +111,7 @@ def test_flips_inverse(self, img, device, dst_affine, kwargs, expected_output):
 
         # check lazy
         lazy_xform = SpatialResampled(**init_param)
-        test_resampler_lazy(lazy_xform, output_data, init_param, call_param, output_key="img")
+        test_resampler_lazy(lazy_xform, output_data, init_param, call_param, output_key="img", rtol=rtol, atol=atol)
 
         # check inverse
         inverted = xform.inverse(output_data)["img"]
diff --git a/tests/test_sure_loss.py b/tests/test_sure_loss.py
new file mode 100644
index 0000000000..903f9bd2ca
--- /dev/null
+++ b/tests/test_sure_loss.py
@@ -0,0 +1,72 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+
+import torch
+
+from monai.losses import SURELoss
+
+
+class TestSURELoss(unittest.TestCase):
+
+    def test_real_value(self):
+        """Test SURELoss with real-valued input: when the input is real value, the loss should be 0.0."""
+        sure_loss_real = SURELoss(perturb_noise=torch.zeros(2, 1, 128, 128), eps=0.1)
+
+        def operator(x):
+            return x
+
+        y_pseudo_gt = torch.randn(2, 1, 128, 128)
+        x = torch.randn(2, 1, 128, 128)
+        loss = sure_loss_real(operator, x, y_pseudo_gt, complex_input=False)
+        self.assertAlmostEqual(loss.item(), 0.0)
+
+    def test_complex_value(self):
+        """Test SURELoss with complex-valued input: when the input is complex value, the loss should be 0.0."""
+
+        def operator(x):
+            return x
+
+        sure_loss_complex = SURELoss(perturb_noise=torch.zeros(2, 2, 128, 128), eps=0.1)
+        y_pseudo_gt = torch.randn(2, 2, 128, 128)
+        x = torch.randn(2, 2, 128, 128)
+        loss = sure_loss_complex(operator, x, y_pseudo_gt, complex_input=True)
+        self.assertAlmostEqual(loss.item(), 0.0)
+
+    def test_complex_general_input(self):
+        """Test SURELoss with complex-valued input: when the input is general complex value, the loss should be 0.0."""
+
+        def operator(x):
+            return x
+
+        perturb_noise_real = torch.randn(2, 1, 128, 128)
+        perturb_noise_complex = torch.zeros(2, 2, 128, 128)
+        perturb_noise_complex[:, 0, :, :] = perturb_noise_real.squeeze()
+        y_pseudo_gt_real = torch.randn(2, 1, 128, 128)
+        y_pseudo_gt_complex = torch.zeros(2, 2, 128, 128)
+        y_pseudo_gt_complex[:, 0, :, :] = y_pseudo_gt_real.squeeze()
+        x_real = torch.randn(2, 1, 128, 128)
+        x_complex = torch.zeros(2, 2, 128, 128)
+        x_complex[:, 0, :, :] = x_real.squeeze()
+
+        sure_loss_real = SURELoss(perturb_noise=perturb_noise_real, eps=0.1)
+        sure_loss_complex = SURELoss(perturb_noise=perturb_noise_complex, eps=0.1)
+
+        loss_real = sure_loss_real(operator, x_real, y_pseudo_gt_real, complex_input=False)
+        loss_complex = sure_loss_complex(operator, x_complex, y_pseudo_gt_complex, complex_input=True)
+        self.assertAlmostEqual(loss_real.item(), loss_complex.abs().item(), places=6)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/testing_data/fl_infer_properties.json b/tests/testing_data/fl_infer_properties.json
new file mode 100644
index 0000000000..72e97cd2c6
--- /dev/null
+++ b/tests/testing_data/fl_infer_properties.json
@@ -0,0 +1,67 @@
+{
+    "bundle_root": {
+        "description": "root path of the bundle.",
+        "required": true,
+        "id": "bundle_root"
+    },
+    "device": {
+        "description": "target device to execute the bundle workflow.",
+        "required": true,
+        "id": "device"
+    },
+    "dataset_dir": {
+        "description": "directory path of the dataset.",
+        "required": true,
+        "id": "dataset_dir"
+    },
+    "dataset": {
+        "description": "PyTorch dataset object for the inference / evaluation logic.",
+        "required": true,
+        "id": "dataset"
+    },
+    "evaluator": {
+        "description": "inference / evaluation workflow engine.",
+        "required": true,
+        "id": "evaluator"
+    },
+    "network_def": {
+        "description": "network module for the inference.",
+        "required": true,
+        "id": "network_def"
+    },
+    "inferer": {
+        "description": "MONAI Inferer object to execute the model computation in inference.",
+        "required": true,
+        "id": "inferer"
+    },
+    "dataset_data": {
+        "description": "data source for the inference / evaluation dataset.",
+        "required": false,
+        "id": "dataset::data",
+        "refer_id": null
+    },
+    "handlers": {
+        "description": "event-handlers for the inference / evaluation logic.",
+        "required": false,
+        "id": "handlers",
+        "refer_id": "evaluator::val_handlers"
+    },
+    "preprocessing": {
+        "description": "preprocessing for the input data.",
+        "required": false,
+        "id": "preprocessing",
+        "refer_id": "dataset::transform"
+    },
+    "postprocessing": {
+        "description": "postprocessing for the model output data.",
+        "required": false,
+        "id": "postprocessing",
+        "refer_id": "evaluator::postprocessing"
+    },
+    "key_metric": {
+        "description": "the key metric during evaluation.",
+        "required": false,
+        "id": "key_metric",
+        "refer_id": "evaluator::key_val_metric"
+    }
+}
diff --git a/tests/testing_data/integration_answers.py b/tests/testing_data/integration_answers.py
index c0dd973418..e02b9ae995 100644
--- a/tests/testing_data/integration_answers.py
+++ b/tests/testing_data/integration_answers.py
@@ -600,6 +600,62 @@
             ],
         }
     },
+    {  # test answers for 24.03
+        "integration_segmentation_3d": {
+            "losses": [
+                0.5442982316017151,
+                0.4741817444562912,
+                0.4535954713821411,
+                0.44163046181201937,
+                0.4307525992393494,
+                0.428487154841423,
+            ],
+            "best_metric": 0.9314384460449219,
+            "infer_metric": 0.9315622448921204,
+            "output_sums": [
+                0.14268704426414708,
+                0.1528672845845743,
+                0.1521782248125706,
+                0.14028769128068194,
+                0.1889830671664784,
+                0.16999075690664475,
+                0.14736282992708227,
+                0.16877952654821815,
+                0.15779597155181269,
+                0.17987829927082263,
+                0.16320253928314676,
+                0.16854299322173155,
+                0.14497470986956967,
+                0.11437140546369519,
+                0.1624117412960871,
+                0.20156009294443875,
+                0.1764654154256958,
+                0.0982348259217418,
+                0.1942436068604293,
+                0.20359421536407518,
+                0.19661953116976483,
+                0.2088326101468625,
+                0.16273043545239807,
+                0.1326107887439663,
+                0.1489245275752285,
+                0.143107476635514,
+                0.23189027677929547,
+                0.1613818424566088,
+                0.14889532196775188,
+                0.10332622984492143,
+                0.11940054688302351,
+                0.13040496302762658,
+                0.11472123087193181,
+                0.15307044007394474,
+                0.16371989575844717,
+                0.1942898223272055,
+                0.2230120930471398,
+                0.1814679187634795,
+                0.19069496508164732,
+                0.07537197031940022,
+            ],
+        }
+    },
 ]