From fc6e8e7409765ae1df118b246046a174238e3e2a Mon Sep 17 00:00:00 2001
From: ver217 <lhx0217@gmail.com>
Date: Thu, 4 May 2023 17:21:59 +0800
Subject: [PATCH 1/3] [booster] add dp plugin base

---
 colossalai/booster/plugin/dp_plugin_base.py | 72 +++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 colossalai/booster/plugin/dp_plugin_base.py

diff --git a/colossalai/booster/plugin/dp_plugin_base.py b/colossalai/booster/plugin/dp_plugin_base.py
new file mode 100644
index 000000000000..4021b31754b4
--- /dev/null
+++ b/colossalai/booster/plugin/dp_plugin_base.py
@@ -0,0 +1,72 @@
+import random
+
+import numpy as np
+import torch
+import torch.distributed as dist
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+
+from .plugin_base import Plugin
+
+
+class DPPluginBase(Plugin):
+    """This is a base class for all DP plugins. It sets up world size and rank, and provides data loader creation.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        assert dist.is_initialized(
+        ), 'torch.distributed is not initialized, please use colossalai.launch to create the distributed environment'
+        self.rank = dist.get_rank()
+        self.world_size = dist.get_world_size()
+
+    def prepare_train_dataloader(self,
+                                 dataset,
+                                 batch_size,
+                                 shuffle=False,
+                                 seed=1024,
+                                 drop_last=False,
+                                 pin_memory=False,
+                                 num_workers=0,
+                                 **kwargs):
+        r"""
+        Prepare a dataloader for distributed training. The dataloader will be wrapped by
+        `torch.utils.data.DataLoader` and `torch.utils.data.DistributedSampler`.
+
+        Note:
+            1. Evaluation datasets should not be passed to this function.
+
+        Args:
+            dataset (`torch.utils.data.Dataset`): The dataset to be loaded.
+            shuffle (bool, optional): Whether to shuffle the dataset. Defaults to False.
+            seed (int, optional): Random worker seed for sampling, defaults to 1024.
+            add_sampler: Whether to add ``DistributedDataParallelSampler`` to the dataset. Defaults to True.
+            drop_last (bool, optional): Set to True to drop the last incomplete batch, if the dataset size
+                is not divisible by the batch size. If False and the size of dataset is not divisible by
+                the batch size, then the last batch will be smaller, defaults to False.
+            pin_memory (bool, optional): Whether to pin memory address in CPU memory. Defaults to False.
+            num_workers (int, optional): Number of worker threads for this dataloader. Defaults to 0.
+            kwargs (dict): optional parameters for ``torch.utils.data.DataLoader``, more details could be found in
+                    `DataLoader <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataloader.html#DataLoader>`_.
+
+        Returns:
+            :class:`torch.utils.data.DataLoader`: A DataLoader used for training or testing.
+        """
+        _kwargs = kwargs.copy()
+        sampler = DistributedSampler(dataset, num_replicas=self.world_size, rank=self.rank, shuffle=shuffle)
+
+        # Deterministic dataloader
+        def seed_worker(worker_id):
+            worker_seed = seed
+            np.random.seed(worker_seed)
+            torch.manual_seed(worker_seed)
+            random.seed(worker_seed)
+
+        return DataLoader(dataset,
+                          batch_size=batch_size,
+                          sampler=sampler,
+                          worker_init_fn=seed_worker,
+                          drop_last=drop_last,
+                          pin_memory=pin_memory,
+                          num_workers=num_workers,
+                          **_kwargs)

From e1486d9d06f7c6e7f1372039dbdcf0d53b5467c4 Mon Sep 17 00:00:00 2001
From: ver217 <lhx0217@gmail.com>
Date: Thu, 4 May 2023 17:26:43 +0800
Subject: [PATCH 2/3] [booster] inherit dp plugin base

---
 colossalai/booster/plugin/gemini_plugin.py    | 65 +-----------------
 .../booster/plugin/low_level_zero_plugin.py   | 66 +------------------
 colossalai/booster/plugin/torch_ddp_plugin.py | 66 +------------------
 3 files changed, 9 insertions(+), 188 deletions(-)

diff --git a/colossalai/booster/plugin/gemini_plugin.py b/colossalai/booster/plugin/gemini_plugin.py
index deda00d8a7b3..523a562ea5ea 100644
--- a/colossalai/booster/plugin/gemini_plugin.py
+++ b/colossalai/booster/plugin/gemini_plugin.py
@@ -1,16 +1,12 @@
-import random
 import warnings
 from typing import Callable, List, Optional, Tuple, Union
 
-import numpy as np
 import torch
-import torch.distributed as dist
 import torch.nn as nn
 from torch import Tensor
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
 from torch.utils.data import DataLoader
-from torch.utils.data.distributed import DistributedSampler
 
 from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO
 from colossalai.checkpoint_io.utils import save_state_dict
@@ -20,7 +16,7 @@
 from colossalai.zero import GeminiDDP, zero_model_wrapper, zero_optim_wrapper
 from colossalai.zero.gemini.memory_tracer import MemStats
 
-from .plugin_base import Plugin
+from .dp_plugin_base import DPPluginBase
 
 __all__ = ['GeminiPlugin']
 
@@ -104,7 +100,7 @@ def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None:
         raise NotImplementedError('Gemini does not support clip_grad_by_value')
 
 
-class GeminiPlugin(Plugin):
+class GeminiPlugin(DPPluginBase):
     """
     Plugin for Gemini.
 
@@ -173,11 +169,7 @@ def __init__(
         norm_type: float = 2.0,
         verbose: bool = False,
     ) -> None:
-
-        assert dist.is_initialized(
-        ), 'torch.distributed is not initialized, please use colossalai.launch to create the distributed environment'
-        self.rank = dist.get_rank()
-        self.world_size = dist.get_world_size()
+        super().__init__()
         self.gemini_config = dict(
             device=(device or get_current_device()),
             placement_policy=placement_policy,
@@ -216,57 +208,6 @@ def control_device(self) -> bool:
     def supported_devices(self) -> List[str]:
         return ['cuda']
 
-    def prepare_train_dataloader(self,
-                                 dataset,
-                                 batch_size,
-                                 shuffle=False,
-                                 seed=1024,
-                                 drop_last=False,
-                                 pin_memory=False,
-                                 num_workers=0,
-                                 **kwargs):
-        r"""
-        Prepare a dataloader for distributed training. The dataloader will be wrapped by
-        `torch.utils.data.DataLoader` and `torch.utils.data.DistributedSampler`.
-
-        Note:
-            1. Evaluation datasets should not be passed to this function.
-
-        Args:
-            dataset (`torch.utils.data.Dataset`): The dataset to be loaded.
-            shuffle (bool, optional): Whether to shuffle the dataset. Defaults to False.
-            seed (int, optional): Random worker seed for sampling, defaults to 1024.
-            add_sampler: Whether to add ``DistributedDataParallelSampler`` to the dataset. Defaults to True.
-            drop_last (bool, optional): Set to True to drop the last incomplete batch, if the dataset size
-                is not divisible by the batch size. If False and the size of dataset is not divisible by
-                the batch size, then the last batch will be smaller, defaults to False.
-            pin_memory (bool, optional): Whether to pin memory address in CPU memory. Defaults to False.
-            num_workers (int, optional): Number of worker threads for this dataloader. Defaults to 0.
-            kwargs (dict): optional parameters for ``torch.utils.data.DataLoader``, more details could be found in
-                    `DataLoader <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataloader.html#DataLoader>`_.
-
-        Returns:
-            :class:`torch.utils.data.DataLoader`: A DataLoader used for training or testing.
-        """
-        _kwargs = kwargs.copy()
-        sampler = DistributedSampler(dataset, num_replicas=self.world_size, rank=self.rank, shuffle=shuffle)
-
-        # Deterministic dataloader
-        def seed_worker(worker_id):
-            worker_seed = seed
-            np.random.seed(worker_seed)
-            torch.manual_seed(worker_seed)
-            random.seed(worker_seed)
-
-        return DataLoader(dataset,
-                          batch_size=batch_size,
-                          sampler=sampler,
-                          worker_init_fn=seed_worker,
-                          drop_last=drop_last,
-                          pin_memory=pin_memory,
-                          num_workers=num_workers,
-                          **_kwargs)
-
     def configure(
         self,
         model: nn.Module,
diff --git a/colossalai/booster/plugin/low_level_zero_plugin.py b/colossalai/booster/plugin/low_level_zero_plugin.py
index 969c430bd317..828d8b27422f 100644
--- a/colossalai/booster/plugin/low_level_zero_plugin.py
+++ b/colossalai/booster/plugin/low_level_zero_plugin.py
@@ -1,24 +1,20 @@
-import random
 import warnings
 from typing import Callable, List, Optional, Tuple, Union
 
-import numpy as np
 import torch
-import torch.distributed as dist
 import torch.nn as nn
 from torch import Tensor
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
 from torch.utils._pytree import tree_map
 from torch.utils.data import DataLoader
-from torch.utils.data.distributed import DistributedSampler
 
 from colossalai.checkpoint_io import CheckpointIO
 from colossalai.interface import ModelWrapper, OptimizerWrapper
 from colossalai.utils import get_current_device
 from colossalai.zero import zero_model_wrapper, zero_optim_wrapper
 
-from .plugin_base import Plugin
+from .dp_plugin_base import DPPluginBase
 from .torch_ddp_plugin import TorchDDPCheckpointIO
 
 __all__ = ['LowLevelZeroPlugin']
@@ -88,7 +84,7 @@ def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None:
         raise NotImplementedError('LowLevelZero does not support clip_grad_by_value')
 
 
-class LowLevelZeroPlugin(Plugin):
+class LowLevelZeroPlugin(DPPluginBase):
     """
     Plugin for low level zero.
 
@@ -142,15 +138,10 @@ def __init__(
         cpu_offload: bool = False,
         verbose: bool = False,
     ) -> None:
-
-        assert dist.is_initialized(
-        ), 'torch.distributed is not initialized, please use colossalai.launch to create the distributed environment'
+        super().__init__()
         assert stage in (1, 2), f'LowLevelZeroPlugin only supports stage 1/2 training'
         assert precision in ('fp16', 'fp32'), f'LowLevelZeroPlugin only supports fp16/fp32 training'
 
-        self.rank = dist.get_rank()
-        self.world_size = dist.get_world_size()
-
         self.stage = stage
         self.precision = precision
         self.zero_optim_config = dict(reduce_bucket_size=reduce_bucket_size_in_m * 1024 * 1024,
@@ -183,57 +174,6 @@ def control_device(self) -> bool:
     def supported_devices(self) -> List[str]:
         return ['cuda']
 
-    def prepare_train_dataloader(self,
-                                 dataset,
-                                 batch_size,
-                                 shuffle=False,
-                                 seed=1024,
-                                 drop_last=False,
-                                 pin_memory=False,
-                                 num_workers=0,
-                                 **kwargs):
-        r"""
-        Prepare a dataloader for distributed training. The dataloader will be wrapped by
-        `torch.utils.data.DataLoader` and `torch.utils.data.DistributedSampler`.
-
-        Note:
-            1. Evaluation datasets should not be passed to this function.
-
-        Args:
-            dataset (`torch.utils.data.Dataset`): The dataset to be loaded.
-            shuffle (bool, optional): Whether to shuffle the dataset. Defaults to False.
-            seed (int, optional): Random worker seed for sampling, defaults to 1024.
-            add_sampler: Whether to add ``DistributedDataParallelSampler`` to the dataset. Defaults to True.
-            drop_last (bool, optional): Set to True to drop the last incomplete batch, if the dataset size
-                is not divisible by the batch size. If False and the size of dataset is not divisible by
-                the batch size, then the last batch will be smaller, defaults to False.
-            pin_memory (bool, optional): Whether to pin memory address in CPU memory. Defaults to False.
-            num_workers (int, optional): Number of worker threads for this dataloader. Defaults to 0.
-            kwargs (dict): optional parameters for ``torch.utils.data.DataLoader``, more details could be found in
-                    `DataLoader <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataloader.html#DataLoader>`_.
-
-        Returns:
-            :class:`torch.utils.data.DataLoader`: A DataLoader used for training or testing.
-        """
-        _kwargs = kwargs.copy()
-        sampler = DistributedSampler(dataset, num_replicas=self.world_size, rank=self.rank, shuffle=shuffle)
-
-        # Deterministic dataloader
-        def seed_worker(worker_id):
-            worker_seed = seed
-            np.random.seed(worker_seed)
-            torch.manual_seed(worker_seed)
-            random.seed(worker_seed)
-
-        return DataLoader(dataset,
-                          batch_size=batch_size,
-                          sampler=sampler,
-                          worker_init_fn=seed_worker,
-                          drop_last=drop_last,
-                          pin_memory=pin_memory,
-                          num_workers=num_workers,
-                          **_kwargs)
-
     def configure(
         self,
         model: nn.Module,
diff --git a/colossalai/booster/plugin/torch_ddp_plugin.py b/colossalai/booster/plugin/torch_ddp_plugin.py
index c5e310c7e769..d30d266c0048 100644
--- a/colossalai/booster/plugin/torch_ddp_plugin.py
+++ b/colossalai/booster/plugin/torch_ddp_plugin.py
@@ -1,21 +1,16 @@
-import random
 from typing import Callable, List, Tuple, Union
 
-import numpy as np
-import torch
-import torch.distributed as dist
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
 from torch.utils.data import DataLoader
-from torch.utils.data.distributed import DistributedSampler
 
 from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO
 from colossalai.cluster import DistCoordinator
 from colossalai.interface import ModelWrapper, OptimizerWrapper
 
-from .plugin_base import Plugin
+from .dp_plugin_base import DPPluginBase
 
 __all__ = ['TorchDDPPlugin']
 
@@ -66,7 +61,7 @@ def unwrap(self):
         return self.module.module
 
 
-class TorchDDPPlugin(Plugin):
+class TorchDDPPlugin(DPPluginBase):
     """
     Plugin for PyTorch DDP.
 
@@ -97,11 +92,7 @@ def __init__(self,
                  check_reduction: bool = False,
                  gradient_as_bucket_view: bool = False,
                  static_graph: bool = False) -> None:
-
-        assert dist.is_initialized(
-        ), 'torch.distributed is not initialized, please use colossalai.launch to create the distributed environment'
-        self.rank = dist.get_rank()
-        self.world_size = dist.get_world_size()
+        super().__init__()
         self.ddp_kwargs = dict(broadcast_buffers=broadcast_buffers,
                                bucket_cap_mb=bucket_cap_mb,
                                find_unused_parameters=find_unused_parameters,
@@ -124,57 +115,6 @@ def control_device(self) -> bool:
     def supported_devices(self) -> List[str]:
         return ['cuda']
 
-    def prepare_train_dataloader(self,
-                                 dataset,
-                                 batch_size,
-                                 shuffle=False,
-                                 seed=1024,
-                                 drop_last=False,
-                                 pin_memory=False,
-                                 num_workers=0,
-                                 **kwargs):
-        r"""
-        Prepare a dataloader for distributed training. The dataloader will be wrapped by
-        `torch.utils.data.DataLoader` and `torch.utils.data.DistributedSampler`.
-
-        Note:
-            1. Evaluation datasets should not be passed to this function.
-
-        Args:
-            dataset (`torch.utils.data.Dataset`): The dataset to be loaded.
-            shuffle (bool, optional): Whether to shuffle the dataset. Defaults to False.
-            seed (int, optional): Random worker seed for sampling, defaults to 1024.
-            add_sampler: Whether to add ``DistributedDataParallelSampler`` to the dataset. Defaults to True.
-            drop_last (bool, optional): Set to True to drop the last incomplete batch, if the dataset size
-                is not divisible by the batch size. If False and the size of dataset is not divisible by
-                the batch size, then the last batch will be smaller, defaults to False.
-            pin_memory (bool, optional): Whether to pin memory address in CPU memory. Defaults to False.
-            num_workers (int, optional): Number of worker threads for this dataloader. Defaults to 0.
-            kwargs (dict): optional parameters for ``torch.utils.data.DataLoader``, more details could be found in
-                    `DataLoader <https://pytorch.org/docs/stable/_modules/torch/utils/data/dataloader.html#DataLoader>`_.
-
-        Returns:
-            :class:`torch.utils.data.DataLoader`: A DataLoader used for training or testing.
-        """
-        _kwargs = kwargs.copy()
-        sampler = DistributedSampler(dataset, num_replicas=self.world_size, rank=self.rank, shuffle=shuffle)
-
-        # Deterministic dataloader
-        def seed_worker(worker_id):
-            worker_seed = seed
-            np.random.seed(worker_seed)
-            torch.manual_seed(worker_seed)
-            random.seed(worker_seed)
-
-        return DataLoader(dataset,
-                          batch_size=batch_size,
-                          sampler=sampler,
-                          worker_init_fn=seed_worker,
-                          drop_last=drop_last,
-                          pin_memory=pin_memory,
-                          num_workers=num_workers,
-                          **_kwargs)
-
     def configure(
         self,
         model: nn.Module,

From 1f72d31d4bd459e1fdf318b6b81f7c9089276e97 Mon Sep 17 00:00:00 2001
From: ver217 <lhx0217@gmail.com>
Date: Thu, 4 May 2023 18:27:59 +0800
Subject: [PATCH 3/3] [booster] refactor unit tests

---
 .../test_plugin/test_dp_plugin_base.py        | 85 +++++++++++++++++++
 .../test_plugin/test_gemini_plugin.py         | 25 ------
 .../test_plugin/test_low_level_zero_plugin.py | 24 ------
 .../test_plugin/test_torch_ddp_plugin.py      | 48 -----------
 4 files changed, 85 insertions(+), 97 deletions(-)
 create mode 100644 tests/test_booster/test_plugin/test_dp_plugin_base.py

diff --git a/tests/test_booster/test_plugin/test_dp_plugin_base.py b/tests/test_booster/test_plugin/test_dp_plugin_base.py
new file mode 100644
index 000000000000..a2b94ba6ca81
--- /dev/null
+++ b/tests/test_booster/test_plugin/test_dp_plugin_base.py
@@ -0,0 +1,85 @@
+from typing import Callable, List, Tuple, Union
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
+from torch.utils.data import DataLoader, TensorDataset
+
+import colossalai
+from colossalai.booster.plugin.dp_plugin_base import DPPluginBase
+from colossalai.checkpoint_io import CheckpointIO
+from colossalai.interface import OptimizerWrapper
+from colossalai.testing import rerun_if_address_is_in_use, spawn
+
+
+class DPPluginWrapper(DPPluginBase):
+    """This is a wrapper class for testing DP plugin initialization and dataloader creation.
+    """
+
+    def configure(
+        self,
+        model: nn.Module,
+        optimizer: Optimizer,
+        criterion: Callable = None,
+        dataloader: DataLoader = None,
+        lr_scheduler: LRScheduler = None,
+    ) -> Tuple[Union[nn.Module, OptimizerWrapper, LRScheduler, DataLoader]]:
+        pass
+
+    def control_checkpoint_io(self) -> bool:
+        pass
+
+    def control_device(self) -> bool:
+        pass
+
+    def control_precision(self) -> bool:
+        pass
+
+    def get_checkpoint_io(self) -> CheckpointIO:
+        pass
+
+    def support_no_sync(self) -> bool:
+        pass
+
+    def supported_devices(self) -> List[str]:
+        pass
+
+    def supported_precisions(self) -> List[str]:
+        pass
+
+
+def check_dataloader_sharding():
+    plugin = DPPluginWrapper()
+
+    # create a custom dasetset with 0 to 10
+    dataset = TensorDataset(torch.arange(0, 10))
+    train_dataloader = plugin.prepare_train_dataloader(dataset, batch_size=2)
+
+    # get the first batch of data
+    batch = next(iter(train_dataloader))[0].cuda()
+    is_rank_0 = dist.get_rank() == 0
+
+    if is_rank_0:
+        batch_to_compare = batch.clone()
+    else:
+        batch_to_compare = batch
+    # pass to the rank 1 value to rank 0
+    dist.broadcast(batch_to_compare, src=1)
+
+    # compare on rank 0
+    if is_rank_0:
+        assert not torch.equal(batch,
+                               batch_to_compare), 'Same number was found across ranks but expected it to be different'
+
+
+def run_dist(rank, world_size, port):
+    # init dist env
+    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
+    check_dataloader_sharding()
+
+
+@rerun_if_address_is_in_use()
+def test_dp_plugin_dataloader():
+    spawn(run_dist, 2)
diff --git a/tests/test_booster/test_plugin/test_gemini_plugin.py b/tests/test_booster/test_plugin/test_gemini_plugin.py
index 985d7989fc9d..c7b3676fb478 100644
--- a/tests/test_booster/test_plugin/test_gemini_plugin.py
+++ b/tests/test_booster/test_plugin/test_gemini_plugin.py
@@ -117,34 +117,9 @@ def check_gemini_plugin(init_method: str = 'none', early_stop: bool = True):
     assert len(failed_info) == 0, '\n'.join([f'{k}: {v}' for k, v in failed_info.items()])
 
 
-def check_dataloader_sharding():
-    plugin = GeminiPlugin()
-
-    # create a custom dasetset with 0 to 10
-    dataset = torch.utils.data.TensorDataset(torch.arange(0, 10))
-    train_dataloader = plugin.prepare_train_dataloader(dataset, batch_size=2)
-
-    # get the first batch of data
-    batch = next(iter(train_dataloader))[0].cuda()
-    is_rank_0 = dist.get_rank() == 0
-
-    if is_rank_0:
-        batch_to_compare = batch.clone()
-    else:
-        batch_to_compare = batch
-    # pass to the rank 1 value to rank 0
-    dist.broadcast(batch_to_compare, src=1)
-
-    # compare on rank 0
-    if is_rank_0:
-        assert not torch.equal(batch,
-                               batch_to_compare), 'Same number was found across ranks but expected it to be different'
-
-
 def run_dist(rank, world_size, port, early_stop: bool = True):
     # init dist env
     colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
-    check_dataloader_sharding()
     check_gemini_plugin(early_stop=early_stop)
 
 
diff --git a/tests/test_booster/test_plugin/test_low_level_zero_plugin.py b/tests/test_booster/test_plugin/test_low_level_zero_plugin.py
index e24196a14917..d84b96f77a75 100644
--- a/tests/test_booster/test_plugin/test_low_level_zero_plugin.py
+++ b/tests/test_booster/test_plugin/test_low_level_zero_plugin.py
@@ -83,30 +83,6 @@ def check_low_level_zero_plugin(stage: int, early_stop: bool = True):
     assert len(failed_info) == 0, '\n'.join([f'{k}: {v}' for k, v in failed_info.items()])
 
 
-def check_dataloader_sharding():
-    plugin = LowLevelZeroPlugin()
-
-    # create a custom dasetset with 0 to 10
-    dataset = torch.utils.data.TensorDataset(torch.arange(0, 10))
-    train_dataloader = plugin.prepare_train_dataloader(dataset, batch_size=2)
-
-    # get the first batch of data
-    batch = next(iter(train_dataloader))[0].cuda()
-    is_rank_0 = dist.get_rank() == 0
-
-    if is_rank_0:
-        batch_to_compare = batch.clone()
-    else:
-        batch_to_compare = batch
-    # pass to the rank 1 value to rank 0
-    dist.broadcast(batch_to_compare, src=1)
-
-    # compare on rank 0
-    if is_rank_0:
-        assert not torch.equal(batch,
-                               batch_to_compare), 'Same number was found across ranks but expected it to be different'
-
-
 def run_dist(rank, world_size, port, early_stop: bool = True):
     # init dist env
     colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
diff --git a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
index 5354eae01d40..30c4db12309f 100644
--- a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
+++ b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
@@ -44,57 +44,9 @@ def check_torch_ddp_plugin():
         torch.cuda.empty_cache()
 
 
-def check_dataloader_sharding():
-    plugin = TorchDDPPlugin()
-
-    # create a custom dasetset with 0 to 10
-    dataset = torch.utils.data.TensorDataset(torch.arange(0, 10))
-    train_dataloader = plugin.prepare_train_dataloader(dataset, batch_size=2)
-
-    # get the first batch of data
-    batch = next(iter(train_dataloader))[0].cuda()
-    is_rank_0 = dist.get_rank() == 0
-
-    if is_rank_0:
-        batch_to_compare = batch.clone()
-    else:
-        batch_to_compare = batch
-    # pass to the rank 1 value to rank 0
-    dist.broadcast(batch_to_compare, src=1)
-
-    # compare on rank 0
-    if is_rank_0:
-        assert not torch.equal(batch,
-                               batch_to_compare), 'Same number was found across ranks but expected it to be different'
-
-
-def check_checkpoint_save_and_load():
-    model_fn, data_gen_fn, output_transform_fn, _ = model_zoo['timm_resnet']
-
-    plugin = TorchDDPPlugin()
-    booster = Booster(plugin=plugin)
-
-    model = model_fn()
-    optimizer = SGD(model.parameters(), lr=1e-3)
-    criterion = lambda x: x.mean()
-    data = data_gen_fn()
-
-    data = {k: v.to('cuda') if torch.is_tensor(v) or 'Tensor' in v.__class__.__name__ else v for k, v in data.items()}
-
-    model, optimizer, criterion, _, _ = booster.boost(model, optimizer, criterion)
-
-    output = model(**data)
-    output = output_transform_fn(output)
-    output_key = list(output.keys())[0]
-    loss = criterion(output[output_key])
-
-    booster.backward(loss, optimizer)
-
-
 def run_dist(rank, world_size, port):
     # init dist env
     colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
-    check_dataloader_sharding()
     check_torch_ddp_plugin()