From 21a636b7fcf5f1bb404f4d1b35ec75dc7e05155b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 26 Nov 2022 19:41:41 -0500 Subject: [PATCH 1/4] skip `get_gpus` subprocess when TF is cpu only When I benchmark deepmd-kit on my machine, I found `get_gpus` takes about 2s and is quite slow. In #905, a subprocess is added to get available GPUs. As benchmarked in #2121, it's quite slow to import tensorflow. I don't have better ideas not to call a subprocess, but we can skip this process when TensorFlow is not built against GPUs. The tests on the GitHub Actions will also benefit. Attached selected profiling: > ncalls tottime percall cumtime percall filename:lineno(function) > 1 0.000 0.000 2.141 2.141 local.py:15(get_gpus) > 1 0.000 0.000 2.133 2.133 subprocess.py:1090(communicate) --- deepmd/cluster/local.py | 4 ++++ source/tests/test_cluster.py | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/deepmd/cluster/local.py b/deepmd/cluster/local.py index 6fe454a9a2..69af55040d 100644 --- a/deepmd/cluster/local.py +++ b/deepmd/cluster/local.py @@ -21,6 +21,10 @@ def get_gpus(): Optional[List[int]] List of available GPU IDs. Otherwise, None. """ + if (not tf.test.is_built_with_cuda() and + not (hasattr(tf.test, 'is_built_with_rocm') and tf.test.is_built_with_rocm())): + # TF is built with CPU only, skip expensive subprocess call + return None test_cmd = 'from tensorflow.python.client import device_lib; ' \ 'devices = device_lib.list_local_devices(); ' \ 'gpus = [d.name for d in devices if d.device_type == "GPU"]; ' \ diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py index 01e128b401..1aa700d1c7 100644 --- a/source/tests/test_cluster.py +++ b/source/tests/test_cluster.py @@ -23,25 +23,37 @@ def returncode(self): class TestGPU(unittest.TestCase): @mock.patch('subprocess.Popen') - def test_none(self, mock_Popen): + @mock.patch('tf.test.is_built_with_cuda') + def test_none(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'0', b'') + mock_is_built_with_cuda.return_value = True gpus = local.get_gpus() self.assertIsNone(gpus) @mock.patch('subprocess.Popen') - def test_valid(self, mock_Popen): + @mock.patch('tf.test.is_built_with_cuda') + def test_valid(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'2', b'') + mock_is_built_with_cuda.return_value = True gpus = local.get_gpus() self.assertEqual(gpus, [0, 1]) @mock.patch('subprocess.Popen') - def test_error(self, mock_Popen): + @mock.patch('tf.test.is_built_with_cuda') + def test_error(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = \ FakePopen(stderr=b'!', returncode=1) + mock_is_built_with_cuda.return_value = True with self.assertRaises(RuntimeError) as cm: _ = local.get_gpus() self.assertIn('Failed to detect', str(cm.exception)) + @mock.patch('tf.test.is_built_with_cuda') + def test_cpu(self, mock_is_built_with_cuda): + mock_is_built_with_cuda.return_value = False + gpus = local.get_gpus() + self.assertIsNone(gpus) + class TestLocal(unittest.TestCase): @mock.patch('socket.gethostname') From 8e55bcca4ff1ad2e69bd21d8fb24f1a403c4724b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 26 Nov 2022 19:54:48 -0500 Subject: [PATCH 2/4] fix mock.patch --- source/tests/test_cluster.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py index 1aa700d1c7..562fa87ab4 100644 --- a/source/tests/test_cluster.py +++ b/source/tests/test_cluster.py @@ -1,6 +1,7 @@ import unittest from deepmd.cluster import local, slurm +from deepmd.env import tf from unittest import mock @@ -23,7 +24,7 @@ def returncode(self): class TestGPU(unittest.TestCase): @mock.patch('subprocess.Popen') - @mock.patch('tf.test.is_built_with_cuda') + @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') def test_none(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'0', b'') mock_is_built_with_cuda.return_value = True @@ -31,7 +32,7 @@ def test_none(self, mock_Popen, mock_is_built_with_cuda): self.assertIsNone(gpus) @mock.patch('subprocess.Popen') - @mock.patch('tf.test.is_built_with_cuda') + @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') def test_valid(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'2', b'') mock_is_built_with_cuda.return_value = True @@ -39,7 +40,7 @@ def test_valid(self, mock_Popen, mock_is_built_with_cuda): self.assertEqual(gpus, [0, 1]) @mock.patch('subprocess.Popen') - @mock.patch('tf.test.is_built_with_cuda') + @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') def test_error(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = \ FakePopen(stderr=b'!', returncode=1) @@ -48,7 +49,7 @@ def test_error(self, mock_Popen, mock_is_built_with_cuda): _ = local.get_gpus() self.assertIn('Failed to detect', str(cm.exception)) - @mock.patch('tf.test.is_built_with_cuda') + @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') def test_cpu(self, mock_is_built_with_cuda): mock_is_built_with_cuda.return_value = False gpus = local.get_gpus() From e3aed0132685e79ec9b5aab106e73115c4cb3b09 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 26 Nov 2022 20:07:51 -0500 Subject: [PATCH 3/4] fix patch --- source/tests/test_cluster.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py index 562fa87ab4..f096914a36 100644 --- a/source/tests/test_cluster.py +++ b/source/tests/test_cluster.py @@ -23,24 +23,24 @@ def returncode(self): class TestGPU(unittest.TestCase): - @mock.patch('subprocess.Popen') @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') + @mock.patch('subprocess.Popen') def test_none(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'0', b'') mock_is_built_with_cuda.return_value = True gpus = local.get_gpus() self.assertIsNone(gpus) - @mock.patch('subprocess.Popen') @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') + @mock.patch('subprocess.Popen') def test_valid(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = FakePopen(b'2', b'') mock_is_built_with_cuda.return_value = True gpus = local.get_gpus() self.assertEqual(gpus, [0, 1]) - @mock.patch('subprocess.Popen') @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') + @mock.patch('subprocess.Popen') def test_error(self, mock_Popen, mock_is_built_with_cuda): mock_Popen.return_value.__enter__.return_value = \ FakePopen(stderr=b'!', returncode=1) From d5451508a4dd4cd6407947071b51a382c841067e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 26 Nov 2022 20:56:05 -0500 Subject: [PATCH 4/4] patch rocm Signed-off-by: Jinzhe Zeng --- source/tests/test_cluster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py index f096914a36..d67b572bb1 100644 --- a/source/tests/test_cluster.py +++ b/source/tests/test_cluster.py @@ -49,9 +49,11 @@ def test_error(self, mock_Popen, mock_is_built_with_cuda): _ = local.get_gpus() self.assertIn('Failed to detect', str(cm.exception)) + @mock.patch('tensorflow.compat.v1.test.is_built_with_rocm', create=True) @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda') - def test_cpu(self, mock_is_built_with_cuda): + def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm): mock_is_built_with_cuda.return_value = False + mock_is_built_with_rocm.return_value = False gpus = local.get_gpus() self.assertIsNone(gpus)