From cb46972ddefb971aaa53387032a926d9b9c4bf9b Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Thu, 25 Sep 2025 15:41:21 -0600 Subject: [PATCH 1/2] Improve CUDA detection In Docker 19.03+ NVIDIA Container Toolkit can integrates via an OCI prestart hook instead of requiring to be listed as a separate runtime. Signed-off-by: Emily Casey --- pkg/gpu/gpu.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/gpu/gpu.go b/pkg/gpu/gpu.go index fd48802f..a4f1a663 100644 --- a/pkg/gpu/gpu.go +++ b/pkg/gpu/gpu.go @@ -2,6 +2,7 @@ package gpu import ( "context" + "os/exec" "github.com/docker/docker/client" ) @@ -25,5 +26,11 @@ func ProbeGPUSupport(ctx context.Context, dockerClient client.SystemAPIClient) ( if _, hasNvidia := info.Runtimes["nvidia"]; hasNvidia { return GPUSupportCUDA, nil } + + // If nvidia runtime is not listed, try searching for nvidia-container-runtime on PATH + if _, err := exec.LookPath("nvidia-container-runtime"); err == nil { + return GPUSupportCUDA, nil + } + return GPUSupportNone, nil } From 6dfffc58132797a1a560178ae0acad9adefd4f12 Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Thu, 25 Sep 2025 16:13:05 -0600 Subject: [PATCH 2/2] Only specify nvidia runtime if one exists Docker may support CUDA GPUs without explicit nvidia runtime. Signed-off-by: Emily Casey --- pkg/gpu/gpu.go | 26 +++++++++++++++++++------- pkg/standalone/containers.go | 4 +++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/pkg/gpu/gpu.go b/pkg/gpu/gpu.go index a4f1a663..b1e1e9fd 100644 --- a/pkg/gpu/gpu.go +++ b/pkg/gpu/gpu.go @@ -19,18 +19,30 @@ const ( // ProbeGPUSupport determines whether or not the Docker engine has GPU support. func ProbeGPUSupport(ctx context.Context, dockerClient client.SystemAPIClient) (GPUSupport, error) { - info, err := dockerClient.Info(ctx) - if err != nil { - return GPUSupportNone, err - } - if _, hasNvidia := info.Runtimes["nvidia"]; hasNvidia { + // First search for nvidia-container-runtime on PATH + if _, err := exec.LookPath("nvidia-container-runtime"); err == nil { return GPUSupportCUDA, nil } - // If nvidia runtime is not listed, try searching for nvidia-container-runtime on PATH - if _, err := exec.LookPath("nvidia-container-runtime"); err == nil { + // Next look for explicitly configured nvidia runtime. This is not required in Docker 19.03+ but + // may be configured on some systems + hasNvidia, err := HasNVIDIARuntime(ctx, dockerClient) + if err != nil { + return GPUSupportNone, err + } + if hasNvidia { return GPUSupportCUDA, nil } return GPUSupportNone, nil } + +// HasNVIDIARuntime determines whether there is an nvidia runtime available +func HasNVIDIARuntime(ctx context.Context, dockerClient client.SystemAPIClient) (bool, error) { + info, err := dockerClient.Info(ctx) + if err != nil { + return false, err + } + _, hasNvidia := info.Runtimes["nvidia"] + return hasNvidia, nil +} diff --git a/pkg/standalone/containers.go b/pkg/standalone/containers.go index 78759e30..a5671215 100644 --- a/pkg/standalone/containers.go +++ b/pkg/standalone/containers.go @@ -268,7 +268,9 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client, nat.Port(portStr + "/tcp"): portBindings, } if gpu == gpupkg.GPUSupportCUDA { - hostConfig.Runtime = "nvidia" + if ok, err := gpupkg.HasNVIDIARuntime(ctx, dockerClient); err == nil && ok { + hostConfig.Runtime = "nvidia" + } hostConfig.DeviceRequests = []container.DeviceRequest{{Count: -1, Capabilities: [][]string{{"gpu"}}}} }