From 7f1278d3e8a4626d437acb78a7ca7239f4c28f22 Mon Sep 17 00:00:00 2001 From: CrazyMax <1951866+crazy-max@users.noreply.github.com> Date: Wed, 26 Mar 2025 11:07:20 +0100 Subject: [PATCH 1/2] contrib: support non PCI-based nvidia GPUs for WSL Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com> --- contrib/cdisetup/nvidia/nvidia.go | 68 +++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/contrib/cdisetup/nvidia/nvidia.go b/contrib/cdisetup/nvidia/nvidia.go index 9a4d32bab84c..fec166ffb674 100644 --- a/contrib/cdisetup/nvidia/nvidia.go +++ b/contrib/cdisetup/nvidia/nvidia.go @@ -39,8 +39,7 @@ type setup struct{} var _ cdidevices.Setup = &setup{} func (s *setup) Validate() error { - _, err := readVersion() - if err == nil { + if _, err := readVersion(); err == nil { return nil } b, err := hasNvidiaDevices() @@ -94,8 +93,11 @@ func (s *setup) Run(ctx context.Context) (err error) { } var needsDriver bool - - if _, err := os.Stat("/proc/driver/nvidia"); err != nil { + if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" { + if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err != nil { + needsDriver = true + } + } else if _, err := os.Stat("/proc/driver/nvidia"); err != nil { needsDriver = true } @@ -119,16 +121,22 @@ func (s *setup) Run(ctx context.Context) (err error) { }) } - version, err := readVersion() - if err != nil && !needsDriver { - return errors.Wrapf(err, "failed to read NVIDIA driver version") - } - if version == "" { - version = defaultVersion - } - v1, _, ok := strings.Cut(version, ".") - if !ok { - return errors.Errorf("failed to parse NVIDIA driver version %q", version) + var dv string + if !hasWSLGPU() { + version, err := readVersion() + if err != nil && !needsDriver { + return errors.Wrapf(err, "failed to read NVIDIA driver version") + } + if version == "" { + version = defaultVersion + } + var ok bool + dv, _, ok = strings.Cut(version, ".") + if !ok { + return errors.Errorf("failed to parse NVIDIA driver version %q", version) + } + } else if needsDriver { + return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs") } if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil { @@ -174,11 +182,11 @@ func (s *setup) Run(ctx context.Context) (err error) { return err } - if needsDriver { + if needsDriver && dv != "" { // this pretty much never works, is it even worth having? // better approach could be to try to create another chroot/container that is built with same kernel packages as the host // could nvidia-headless-no-dkms- be reusable - if err := run(ctx, []string{"apt-get", "install", "-y", "nvidia-driver-" + v1}, pw, dgst); err != nil { + if err := run(ctx, []string{"apt-get", "install", "-y", "nvidia-driver-" + dv}, pw, dgst); err != nil { return err } _, err := os.Stat("/proc/driver/nvidia") @@ -187,13 +195,19 @@ func (s *setup) Run(ctx context.Context) (err error) { } } - if err := run(ctx, []string{"apt-get", "install", "-y", "--no-install-recommends", - "libnvidia-compute-" + v1, - "libnvidia-extra-" + v1, - "libnvidia-gl-" + v1, - "nvidia-utils-" + v1, + pkgs := []string{ "nvidia-container-toolkit-base", - }, pw, dgst); err != nil { + } + if dv != "" { + pkgs = append(pkgs, []string{ + "libnvidia-compute-" + dv, + "libnvidia-extra-" + dv, + "libnvidia-gl-" + dv, + "nvidia-utils-" + dv, + }...) + } + + if err := run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst); err != nil { return err } @@ -268,6 +282,10 @@ func hasNvidiaDevices() (bool, error) { } } + if !found { + found = hasWSLGPU() + } + return found, nil } @@ -302,3 +320,9 @@ func isDebianOrUbuntu() (bool, error) { return id == "debian" || id == "ubuntu", nil } + +func hasWSLGPU() bool { + // WSL-specific GPU mapping that doesn't expose PCI info. + _, err := os.Stat("/dev/dxg") + return err == nil +} From f85a66c6a553a70c7f49ad60257a7174aa64b38c Mon Sep 17 00:00:00 2001 From: CrazyMax <1951866+crazy-max@users.noreply.github.com> Date: Thu, 27 Mar 2025 15:28:11 +0100 Subject: [PATCH 2/2] contrib: check if nvidia drivers are already installed Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com> --- contrib/cdisetup/nvidia/nvidia.go | 167 +++++++++++++++--------------- 1 file changed, 81 insertions(+), 86 deletions(-) diff --git a/contrib/cdisetup/nvidia/nvidia.go b/contrib/cdisetup/nvidia/nvidia.go index fec166ffb674..88b70b4196cb 100644 --- a/contrib/cdisetup/nvidia/nvidia.go +++ b/contrib/cdisetup/nvidia/nvidia.go @@ -25,10 +25,13 @@ import ( // This is example of experimental on-demand setup of a CDI devices. // This code is not currently shipping with BuildKit and will probably change. -const ( - cdiKind = "nvidia.com/gpu" - defaultVersion = "570.0" -) +const cdiKind = "nvidia.com/gpu" + +// https://github.com/ollama/ollama/blob/b816ff86c923e0290f58f2275e831fc17c29ba37/discover/gpu_linux.go#L33-L43 +var libcudaGlobs = []string{ + "/usr/lib/*-linux-gnu/libcuda.so*", + "/usr/lib/wsl/drivers/*/libcuda.so*", +} func init() { cdidevices.Register(cdiKind, &setup{}) @@ -92,51 +95,32 @@ func (s *setup) Run(ctx context.Context) (err error) { return errors.Errorf("NVIDIA setup is currently only supported on Debian/Ubuntu") } - var needsDriver bool - if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" { - if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err != nil { - needsDriver = true + needsDriver := true + if _, err := os.Stat("/proc/driver/nvidia"); err == nil { + needsDriver = false + } else if nvidiaSmi, err := exec.LookPath("nvidia-smi"); err == nil && nvidiaSmi != "" { + if err := run(ctx, []string{nvidiaSmi, "-L"}, pw, dgst); err == nil { + needsDriver = false } - } else if _, err := os.Stat("/proc/driver/nvidia"); err != nil { - needsDriver = true - } - - var arch string - switch runtime.GOARCH { - case "amd64": - arch = "x86_64" - case "arm64": - arch = "sbsa" - // for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb - } - - if arch == "" { - return errors.Errorf("unsupported architecture: %s", runtime.GOARCH) } - if needsDriver { - pw.Write(identity.NewID(), client.VertexWarning{ - Vertex: dgst, - Short: []byte("NVIDIA Drivers not found. Installing prebuilt drivers is not recommended"), - }) + if hasWSLGPU() { + return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs") + } + return errors.Errorf("NVIDIA drivers are required. Try loading NVIDIA kernel module with \"modprobe nvidia\" command") } var dv string - if !hasWSLGPU() { + if !hasLibsInstalled() && !hasWSLGPU() { version, err := readVersion() - if err != nil && !needsDriver { + if err != nil { return errors.Wrapf(err, "failed to read NVIDIA driver version") } - if version == "" { - version = defaultVersion - } var ok bool dv, _, ok = strings.Cut(version, ".") if !ok { return errors.Errorf("failed to parse NVIDIA driver version %q", version) } - } else if needsDriver { - return errors.Errorf("NVIDIA drivers are required for WSL with non PCI-based GPUs") } if err := run(ctx, []string{"apt-get", "update"}, pw, dgst); err != nil { @@ -147,9 +131,58 @@ func (s *setup) Run(ctx context.Context) (err error) { return err } + if err := installPackages(ctx, dv, pw, dgst); err != nil { + return err + } + + if err := os.MkdirAll("/etc/cdi", 0700); err != nil { + return errors.Wrapf(err, "failed to create /etc/cdi") + } + + buf := &bytes.Buffer{} + + cmd := exec.CommandContext(ctx, "nvidia-ctk", "cdi", "generate") + cmd.Stdout = buf + cmd.Stderr = newStream(pw, 2, dgst) + if err := cmd.Run(); err != nil { + return errors.Wrapf(err, "failed to generate CDI spec") + } + + if len(buf.Bytes()) == 0 { + return errors.Errorf("nvidia-ctk output is empty") + } + + if err := os.WriteFile("/etc/cdi/nvidia.yaml", buf.Bytes(), 0644); err != nil { + return errors.Wrapf(err, "failed to write /etc/cdi/nvidia.yaml") + } + + return nil +} + +func run(ctx context.Context, args []string, pw progress.Writer, dgst digest.Digest) error { + fmt.Fprintf(newStream(pw, 2, dgst), "> %s\n", strings.Join(args, " ")) + cmd := exec.CommandContext(ctx, args[0], args[1:]...) //nolint:gosec + cmd.Stderr = newStream(pw, 2, dgst) + cmd.Stdout = newStream(pw, 1, dgst) + return cmd.Run() +} + +func installPackages(ctx context.Context, dv string, pw progress.Writer, dgst digest.Digest) error { const aptDistro = "ubuntu2404" - aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/" + var arch string + switch runtime.GOARCH { + case "amd64": + arch = "x86_64" + case "arm64": + arch = "sbsa" + // for non-sbsa could use https://nvidia.github.io/libnvidia-container/stable/deb + } + if arch == "" { + return errors.Errorf("unsupported architecture: %s", runtime.GOARCH) + } + + aptURL := "https://developer.download.nvidia.com/compute/cuda/repos/" + aptDistro + "/" + arch + "/" keyTarget := "/usr/share/keyrings/nvidia-cuda-keyring.gpg" if _, err := os.Stat(keyTarget); err != nil { @@ -182,22 +215,7 @@ func (s *setup) Run(ctx context.Context) (err error) { return err } - if needsDriver && dv != "" { - // this pretty much never works, is it even worth having? - // better approach could be to try to create another chroot/container that is built with same kernel packages as the host - // could nvidia-headless-no-dkms- be reusable - if err := run(ctx, []string{"apt-get", "install", "-y", "nvidia-driver-" + dv}, pw, dgst); err != nil { - return err - } - _, err := os.Stat("/proc/driver/nvidia") - if err != nil { - return errors.Wrapf(err, "failed to install NVIDIA kernel module. Please install NVIDIA drivers manually") - } - } - - pkgs := []string{ - "nvidia-container-toolkit-base", - } + pkgs := []string{"nvidia-container-toolkit-base"} if dv != "" { pkgs = append(pkgs, []string{ "libnvidia-compute-" + dv, @@ -207,40 +225,7 @@ func (s *setup) Run(ctx context.Context) (err error) { }...) } - if err := run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst); err != nil { - return err - } - - if err := os.MkdirAll("/etc/cdi", 0700); err != nil { - return errors.Wrapf(err, "failed to create /etc/cdi") - } - - buf := &bytes.Buffer{} - - cmd := exec.CommandContext(ctx, "nvidia-ctk", "cdi", "generate") - cmd.Stdout = buf - cmd.Stderr = newStream(pw, 2, dgst) - if err := cmd.Run(); err != nil { - return errors.Wrapf(err, "failed to generate CDI spec") - } - - if len(buf.Bytes()) == 0 { - return errors.Errorf("nvidia-ctk output is empty") - } - - if err := os.WriteFile("/etc/cdi/nvidia.yaml", buf.Bytes(), 0644); err != nil { - return errors.Wrapf(err, "failed to write /etc/cdi/nvidia.yaml") - } - - return nil -} - -func run(ctx context.Context, args []string, pw progress.Writer, dgst digest.Digest) error { - fmt.Fprintf(newStream(pw, 2, dgst), "> %s\n", strings.Join(args, " ")) - cmd := exec.CommandContext(ctx, args[0], args[1:]...) //nolint:gosec - cmd.Stderr = newStream(pw, 2, dgst) - cmd.Stdout = newStream(pw, 1, dgst) - return cmd.Run() + return run(ctx, append([]string{"apt-get", "install", "-y", "--no-install-recommends"}, pkgs...), pw, dgst) } func readVersion() (string, error) { @@ -326,3 +311,13 @@ func hasWSLGPU() bool { _, err := os.Stat("/dev/dxg") return err == nil } + +func hasLibsInstalled() bool { + // Check for libcuda in the standard locations to confirm NVIDIA GPU drivers + for _, p := range libcudaGlobs { + if matches, err := filepath.Glob(p); err == nil && len(matches) > 0 { + return true + } + } + return false +}