From ad755dcf46a434e9aacf58d68760600e2b39c95e Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Mon, 1 Feb 2021 20:28:22 +0800 Subject: [PATCH 1/6] [DLMED] fix evenly divisible issue in AUC metric Signed-off-by: Nic Ma --- monai/handlers/roc_auc.py | 24 ++++++++++++++++++++++++ tests/test_handler_rocauc_dist.py | 6 +++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py index dbca70bf25..b759db4144 100644 --- a/monai/handlers/roc_auc.py +++ b/monai/handlers/roc_auc.py @@ -13,9 +13,11 @@ import torch +from monai.handlers.utils import evenly_divisible_all_gather from monai.metrics import compute_roc_auc from monai.utils import Average, exact_version, optional_import +idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed") EpochMetric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "EpochMetric") @@ -77,3 +79,25 @@ def _compute_fn(pred, label): check_compute_fn=False, device=device, ) + + def compute(self) -> float: + _prediction_tensor = torch.cat(self._predictions, dim=0) + _target_tensor = torch.cat(self._targets, dim=0) + + ws = idist.get_world_size() + if ws > 1 and not self._is_reduced: + # All gather across all processes + _prediction_tensor = evenly_divisible_all_gather(_prediction_tensor) + _target_tensor = evenly_divisible_all_gather(_target_tensor) + self._is_reduced = True + + result = 0.0 + if idist.get_rank() == 0: + # Run compute_fn on zero rank only + result = self.compute_fn(_prediction_tensor, _target_tensor) + + if ws > 1: + # broadcast result to all processes + result = idist.broadcast(result, src=0) + + return result.item() if torch.is_tensor(result) else result diff --git a/tests/test_handler_rocauc_dist.py b/tests/test_handler_rocauc_dist.py index 825b172064..c5cf44162c 100644 --- a/tests/test_handler_rocauc_dist.py +++ b/tests/test_handler_rocauc_dist.py @@ -31,12 +31,12 @@ def test_compute(self): auc_metric.update([y_pred, y]) if dist.get_rank() == 1: - y_pred = torch.tensor([[0.2, 0.1], [0.1, 0.5]], device=device) - y = torch.tensor([[0], [1]], device=device) + y_pred = torch.tensor([[0.2, 0.1], [0.1, 0.5], [0.3, 0.4]], device=device) + y = torch.tensor([[0], [1], [1]], device=device) auc_metric.update([y_pred, y]) result = auc_metric.compute() - np.testing.assert_allclose(0.75, result) + np.testing.assert_allclose(0.66667, result, rtol=1e-4) if __name__ == "__main__": From 1f329318d5fc530fde446f72b0e6904b7f201922 Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Mon, 1 Feb 2021 22:33:16 +0800 Subject: [PATCH 2/6] [DLMED] fix flake8 issue Signed-off-by: Nic Ma --- monai/handlers/roc_auc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py index b759db4144..7ae53ef9f8 100644 --- a/monai/handlers/roc_auc.py +++ b/monai/handlers/roc_auc.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, Optional, Union +from typing import Callable, Optional, Union, Any import torch @@ -73,6 +73,7 @@ def _compute_fn(pred, label): average=Average(average), ) + self._is_reduced: bool = False super().__init__( compute_fn=_compute_fn, output_transform=output_transform, @@ -80,7 +81,7 @@ def _compute_fn(pred, label): device=device, ) - def compute(self) -> float: + def compute(self) -> Any: _prediction_tensor = torch.cat(self._predictions, dim=0) _target_tensor = torch.cat(self._targets, dim=0) @@ -91,7 +92,7 @@ def compute(self) -> float: _target_tensor = evenly_divisible_all_gather(_target_tensor) self._is_reduced = True - result = 0.0 + result: torch.Tensor = torch.zeros(1) if idist.get_rank() == 0: # Run compute_fn on zero rank only result = self.compute_fn(_prediction_tensor, _target_tensor) From 6bf4020ebe28ea030091ee4f4c48040d19c432f5 Mon Sep 17 00:00:00 2001 From: monai-bot Date: Mon, 1 Feb 2021 14:37:35 +0000 Subject: [PATCH 3/6] [MONAI] python code formatting Signed-off-by: monai-bot --- monai/handlers/roc_auc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py index 7ae53ef9f8..2273b9ee89 100644 --- a/monai/handlers/roc_auc.py +++ b/monai/handlers/roc_auc.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, Optional, Union, Any +from typing import Any, Callable, Optional, Union import torch From f4daa7c8c77cbefec69625be0357a83901f7d9ec Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Wed, 3 Feb 2021 22:24:31 +0800 Subject: [PATCH 4/6] [DLMED] fix a grammar typo Signed-off-by: Nic Ma --- monai/data/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index 11a5682de6..d2a3ca4a53 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -572,8 +572,8 @@ class SmartCacheDataset(CacheDataset): 4. Call `shutdown()` when training ends. Note: - This replacement will not work if set the `multiprocessing_context` of DataLoader to `spawn` - or on windows(the default multiprocessing method is `spawn`) and set `num_workers` greater than 0 . + This replacement will not work if setting the `multiprocessing_context` of DataLoader to `spawn` + or on windows(the default multiprocessing method is `spawn`) and setting `num_workers` greater than 0. """ From c60e7ce0d62e55d485c932483905de992bfa9964 Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Wed, 3 Feb 2021 23:22:22 +0800 Subject: [PATCH 5/6] [DLMED] add doc string Signed-off-by: Nic Ma --- monai/handlers/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py index 2165ad8860..0fbb0126c2 100644 --- a/monai/handlers/utils.py +++ b/monai/handlers/utils.py @@ -61,6 +61,9 @@ def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor: Args: data: source tensor to pad and execute all_gather in distributed data parallel. + + Note: + The input data on different ranks must have exactly same `dtype`. """ if not isinstance(data, torch.Tensor): From 8cbd10a78577ed8dcde9dbc28fe0b53e5c4ed64f Mon Sep 17 00:00:00 2001 From: monai-bot Date: Wed, 3 Feb 2021 15:33:08 +0000 Subject: [PATCH 6/6] [MONAI] python code formatting Signed-off-by: monai-bot --- monai/handlers/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py index 0fbb0126c2..d0179e7f49 100644 --- a/monai/handlers/utils.py +++ b/monai/handlers/utils.py @@ -61,7 +61,7 @@ def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor: Args: data: source tensor to pad and execute all_gather in distributed data parallel. - + Note: The input data on different ranks must have exactly same `dtype`.