diff --git a/deepmd/cluster/local.py b/deepmd/cluster/local.py
index 6fe454a9a2..69af55040d 100644
--- a/deepmd/cluster/local.py
+++ b/deepmd/cluster/local.py
@@ -21,6 +21,10 @@ def get_gpus():
     Optional[List[int]]
         List of available GPU IDs. Otherwise, None.
     """
+    if (not tf.test.is_built_with_cuda() and 
+        not (hasattr(tf.test, 'is_built_with_rocm') and tf.test.is_built_with_rocm())):
+        # TF is built with CPU only, skip expensive subprocess call
+        return None
     test_cmd = 'from tensorflow.python.client import device_lib; ' \
                'devices = device_lib.list_local_devices(); ' \
                'gpus = [d.name for d in devices if d.device_type == "GPU"]; ' \
diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py
index 01e128b401..d67b572bb1 100644
--- a/source/tests/test_cluster.py
+++ b/source/tests/test_cluster.py
@@ -1,6 +1,7 @@
 import unittest
 
 from deepmd.cluster import local, slurm
+from deepmd.env import tf
 from unittest import mock
 
 
@@ -22,26 +23,40 @@ def returncode(self):
 
 
 class TestGPU(unittest.TestCase):
+    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
     @mock.patch('subprocess.Popen')
-    def test_none(self, mock_Popen):
+    def test_none(self, mock_Popen, mock_is_built_with_cuda):
         mock_Popen.return_value.__enter__.return_value = FakePopen(b'0', b'')
+        mock_is_built_with_cuda.return_value = True
         gpus = local.get_gpus()
         self.assertIsNone(gpus)
 
+    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
     @mock.patch('subprocess.Popen')
-    def test_valid(self, mock_Popen):
+    def test_valid(self, mock_Popen, mock_is_built_with_cuda):
         mock_Popen.return_value.__enter__.return_value = FakePopen(b'2', b'')
+        mock_is_built_with_cuda.return_value = True
         gpus = local.get_gpus()
         self.assertEqual(gpus, [0, 1])
 
+    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
     @mock.patch('subprocess.Popen')
-    def test_error(self, mock_Popen):
+    def test_error(self, mock_Popen, mock_is_built_with_cuda):
         mock_Popen.return_value.__enter__.return_value = \
             FakePopen(stderr=b'!', returncode=1)
+        mock_is_built_with_cuda.return_value = True
         with self.assertRaises(RuntimeError) as cm:
             _ = local.get_gpus()
             self.assertIn('Failed to detect', str(cm.exception))
 
+    @mock.patch('tensorflow.compat.v1.test.is_built_with_rocm', create=True)
+    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
+    def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm):
+        mock_is_built_with_cuda.return_value = False
+        mock_is_built_with_rocm.return_value = False
+        gpus = local.get_gpus()
+        self.assertIsNone(gpus)
+
 
 class TestLocal(unittest.TestCase):
     @mock.patch('socket.gethostname')