From 24713dae33b7fbc969908264dba4d117c9e536cd Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Mon, 1 Feb 2021 12:09:31 -0800 Subject: [PATCH 1/4] removing the cpu test, not too interesting anyway --- tests/nn/data_parallel/test_sharded_ddp.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/nn/data_parallel/test_sharded_ddp.py b/tests/nn/data_parallel/test_sharded_ddp.py index 2277067a7..3364ba579 100644 --- a/tests/nn/data_parallel/test_sharded_ddp.py +++ b/tests/nn/data_parallel/test_sharded_ddp.py @@ -112,14 +112,11 @@ def run_test(backend, device, world_size=2): mp.spawn(run_one_step, args=(world_size, backend, device, temp_file_name), nprocs=world_size, join=True) -def test_step_on_cpu(): - run_test(backend=dist.Backend.GLOO, device=torch.device("cpu"), world_size=4) - - @skip_if_no_cuda @skip_if_single_gpu -def test_step_on_gpu(): +def test_step(): run_test(backend=dist.Backend.NCCL, device=torch.device("cuda")) + run_test(backend=dist.Backend.GLOO, device=torch.device("cuda")) def run_ddp_parity(rank, world_size, backend, temp_file_name): From 2dabbcc0f44a0d1c0920340d23c8cc7a54976059 Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Mon, 1 Feb 2021 20:19:06 +0000 Subject: [PATCH 2/4] removing gloo for now --- tests/nn/data_parallel/test_sharded_ddp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/nn/data_parallel/test_sharded_ddp.py b/tests/nn/data_parallel/test_sharded_ddp.py index 3364ba579..7962d7ddc 100644 --- a/tests/nn/data_parallel/test_sharded_ddp.py +++ b/tests/nn/data_parallel/test_sharded_ddp.py @@ -116,7 +116,6 @@ def run_test(backend, device, world_size=2): @skip_if_single_gpu def test_step(): run_test(backend=dist.Backend.NCCL, device=torch.device("cuda")) - run_test(backend=dist.Backend.GLOO, device=torch.device("cuda")) def run_ddp_parity(rank, world_size, backend, temp_file_name): From f37fef218f73b81c9e7a99e6c85543d82b1c10a1 Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Tue, 2 Feb 2021 01:14:11 +0000 Subject: [PATCH 3/4] review, thanks Min --- fairscale/utils/testing.py | 3 +++ tests/nn/data_parallel/test_sharded_ddp.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fairscale/utils/testing.py b/fairscale/utils/testing.py index d902383ba..55fad55f0 100644 --- a/fairscale/utils/testing.py +++ b/fairscale/utils/testing.py @@ -31,6 +31,7 @@ import multiprocessing import os import random +import sys import tempfile from typing import Any, Callable, Dict, List, Optional, Tuple @@ -53,6 +54,8 @@ not torch.cuda.is_available() or torch.cuda.device_count() < 2, reason="multiple GPUs required" ) +skip_if_py38 = pytest.mark.skipif(sys.version_info.major == 3 and sys.version_info.minor == 8) + _, filename_mpi = tempfile.mkstemp() diff --git a/tests/nn/data_parallel/test_sharded_ddp.py b/tests/nn/data_parallel/test_sharded_ddp.py index 7962d7ddc..b70baf3d1 100644 --- a/tests/nn/data_parallel/test_sharded_ddp.py +++ b/tests/nn/data_parallel/test_sharded_ddp.py @@ -21,7 +21,7 @@ from fairscale.nn.data_parallel import ShardedDataParallel from fairscale.optim import OSS -from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_single_gpu +from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_py38, skip_if_single_gpu def run_one_step(rank, world_size, backend, device, temp_file_name): @@ -114,10 +114,15 @@ def run_test(backend, device, world_size=2): @skip_if_no_cuda @skip_if_single_gpu -def test_step(): +def test_step_gpu(): run_test(backend=dist.Backend.NCCL, device=torch.device("cuda")) +@skip_if_py38 +def test_step_cpu(): + run_test(backend=dist.Backend.GLOO, device=torch.device("cpu")) + + def run_ddp_parity(rank, world_size, backend, temp_file_name): url = "file://" + temp_file_name dist.init_process_group(init_method=url, backend=backend, rank=rank, world_size=world_size) From 08beb607d5fe4f85de853b943a56799ce4a86ef4 Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Tue, 2 Feb 2021 01:19:48 +0000 Subject: [PATCH 4/4] now with a reason --- fairscale/utils/testing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fairscale/utils/testing.py b/fairscale/utils/testing.py index 55fad55f0..85e06c6a3 100644 --- a/fairscale/utils/testing.py +++ b/fairscale/utils/testing.py @@ -54,7 +54,9 @@ not torch.cuda.is_available() or torch.cuda.device_count() < 2, reason="multiple GPUs required" ) -skip_if_py38 = pytest.mark.skipif(sys.version_info.major == 3 and sys.version_info.minor == 8) +skip_if_py38 = pytest.mark.skipif( + sys.version_info.major == 3 and sys.version_info.minor == 8, reason="Python3.8 is skipped" +) _, filename_mpi = tempfile.mkstemp()