Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .hadolint.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright (c) 2025, NVIDIA CORPORATION.
# Copyright (c) 2025-2026, NVIDIA CORPORATION.

ignored:
# warning: Pin versions in apt get install.
- DL3008
# warning: Pin versions in pip.
- DL3013
# warning: Specify version with `dnf install -y <package>-<version>`
- DL3041
# warning: Avoid use of cache directory with pip.
- DL3042
163 changes: 162 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ARG CUDA_VER=notset
ARG LINUX_DISTRO=ubuntu
ARG LINUX_DISTRO_VER=22.04
ARG LINUX_VER=${LINUX_DISTRO}${LINUX_DISTRO_VER}
ARG MINIFORGE_VER=notset
ARG PYTHON_VER=notset
ARG RAPIDS_VER=26.02

Expand Down Expand Up @@ -56,8 +57,168 @@ apt-get purge -y --auto-remove \
rm -rf /var/lib/apt/lists/*
EOF

# --- begin 'rapidsai/miniforge-cuda' --- #
ARG CUDA_VER=notset
ARG LINUX_VER=notset
ARG PYTHON_VER=notset
ARG MINIFORGE_VER=notset

FROM condaforge/miniforge3:${MINIFORGE_VER} AS miniforge-upstream

ENV PATH=/opt/conda/bin:$PATH

SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# Install gha-tools
RUN <<EOF
i=0; until apt-get update -y; do ((++i >= 5)) && break; sleep 10; done
apt-get install -y --no-install-recommends wget
wget -q https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
apt-get purge -y wget && apt-get autoremove -y
rm -rf /var/lib/apt/lists/*
EOF

RUN <<EOF
# Ensure new files/dirs have group write permissions
umask 002

# Example of pinned package in case you require an override
# echo '<PACKAGE_NAME>==<VERSION>' >> /opt/conda/conda-meta/pinned

# update everything before other environment changes, to ensure mixing
# an older conda with newer packages still works well
rapids-mamba-retry update --all -y -n base
EOF

################################ build miniforge-cuda using updated miniforge-upstream from above ###############################

FROM nvidia/cuda:${CUDA_VER}-base-${LINUX_VER} AS miniforge-cuda

ARG CUDA_VER=notset
ARG LINUX_VER=notset
ARG PYTHON_VER=notset
ARG DEBIAN_FRONTEND=noninteractive
ENV PATH=/opt/conda/bin:$PATH
ENV PYTHON_VERSION=${PYTHON_VER}

SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# Set apt policy configurations
# We bump up the number of retries and the timeouts for `apt`
# Note that `dnf` defaults to 10 retries, so no additional configuration is required here
RUN <<EOF
case "${LINUX_VER}" in
"ubuntu"*)
echo 'APT::Update::Error-Mode "any";' > /etc/apt/apt.conf.d/warnings-as-errors
echo 'APT::Acquire::Retries "10";' > /etc/apt/apt.conf.d/retries
echo 'APT::Acquire::https::Timeout "240";' > /etc/apt/apt.conf.d/https-timeout
echo 'APT::Acquire::http::Timeout "240";' > /etc/apt/apt.conf.d/http-timeout
;;
esac
EOF

# Install gha-tools
RUN <<EOF
i=0; until apt-get update -y; do ((++i >= 5)) && break; sleep 10; done
apt-get install -y --no-install-recommends wget
wget -q https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
apt-get purge -y wget && apt-get autoremove -y
rm -rf /var/lib/apt/lists/*
EOF

# Create a conda group and assign it as root's primary group
RUN <<EOF
groupadd conda
usermod -g conda root
EOF

# Ownership & permissions based on https://docs.anaconda.com/anaconda/install/multi-user/#multi-user-anaconda-installation-on-linux
COPY --from=miniforge-upstream --chown=root:conda --chmod=770 /opt/conda /opt/conda

# Ensure new files are created with group write access & setgid. See https://unix.stackexchange.com/a/12845
RUN chmod g+ws /opt/conda

RUN <<EOF
# Ensure new files/dirs have group write permissions
umask 002

# install expected Python version
PYTHON_MAJOR_VERSION=${PYTHON_VERSION%%.*}
PYTHON_MINOR_VERSION=${PYTHON_VERSION#*.}
PYTHON_UPPER_BOUND="${PYTHON_MAJOR_VERSION}.$((PYTHON_MINOR_VERSION+1)).0a0"
PYTHON_MINOR_PADDED=$(printf "%02d" "$PYTHON_MINOR_VERSION")
PYTHON_VERSION_PADDED="${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_PADDED}"
# 'shellcheck' is unhappy with the use of '>' to compare decimals here, but it works as expected for the 'bash' version in these
# images, and installing 'bc' or using a Python interpreter seem heavy for this purpose.
#
# shellcheck disable=SC2072
if [[ "$PYTHON_VERSION_PADDED" > "3.12" ]]; then
PYTHON_ABI_TAG="cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}"
else
PYTHON_ABI_TAG="cpython"
fi
rapids-mamba-retry install -y -n base "python>=${PYTHON_VERSION},<${PYTHON_UPPER_BOUND}=*_${PYTHON_ABI_TAG}"
rapids-mamba-retry update --all -y -n base
if [[ "$LINUX_VER" == "rockylinux"* ]]; then
dnf install -y findutils
dnf clean all
fi
find /opt/conda -follow -type f -name '*.a' -delete
find /opt/conda -follow -type f -name '*.pyc' -delete
# recreate missing libstdc++ symlinks
conda clean -aiptfy
EOF

# Reassign root's primary group to root
RUN usermod -g root root

RUN <<EOF
# ensure conda environment is always activated
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
echo ". /opt/conda/etc/profile.d/conda.sh; conda activate base" >> /etc/skel/.bashrc
echo ". /opt/conda/etc/profile.d/conda.sh; conda activate base" >> ~/.bashrc
EOF

# tzdata is needed by the ORC library used by pyarrow, because it provides /etc/localtime
# On Ubuntu 24.04 and newer, we also need tzdata-legacy
RUN <<EOF
case "${LINUX_VER}" in
"ubuntu"*)

PACKAGES_TO_INSTALL=(
tzdata
)

os_version=$(grep 'VERSION_ID' /etc/os-release | cut -d '"' -f 2)
# 'shellcheck' is unhappy with the use of '>' to compare decimals here, but it works as expected for the 'bash' version in these
# images, and installing 'bc' or using a Python interpreter seem heavy for this purpose.
#
# shellcheck disable=SC2072
if [[ "${os_version}" > "24.04" ]] || [[ "${os_version}" == "24.04" ]]; then
PACKAGES_TO_INSTALL+=(tzdata-legacy)
fi

rapids-retry apt-get update -y
apt-get upgrade -y
apt-get install -y --no-install-recommends \
"${PACKAGES_TO_INSTALL[@]}"

rm -rf "/var/lib/apt/lists/*"
;;
"rockylinux"*)
dnf update -y
dnf clean all
;;
*)
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1
;;
esac
EOF

# --- end 'rapidsai/miniforge-cuda' --- #

# Base image
FROM rapidsai/miniforge-cuda:${RAPIDS_VER}-cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER} AS base
FROM miniforge-cuda AS base
ARG CUDA_VER=notset
ARG PYTHON_VER=notset
ARG RAPIDS_VER=notset
Expand Down
3 changes: 2 additions & 1 deletion cuvs-bench/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# syntax=docker/dockerfile:1
# Copyright (c) 2024-2026, NVIDIA CORPORATION.

ARG MINIFORGE_VER=notset
ARG PYTHON_VER=notset
ARG RAPIDS_VER=26.02

FROM condaforge/miniforge3:24.11.3-2 AS cuvs-bench-cpu
FROM condaforge/miniforge3:${MINIFORGE_VER} AS cuvs-bench-cpu
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling out that this is technically a breaking change, because this updates the condaforge/miniforge3 used as a base for rapidsai/cuvs-bench-cpu from 24.11.3-2 to 25.9.1-0.

I think that's acceptable, especially because the builds do a rapids-mamba-retry update --all -y -n base a few lines down, but just calling it out.

I tested this using the command below, with a CPU-only algorithm and the smallest dataset listed at https://github.com/rapidsai/cuvs/blob/bae4cdbd0003c1572c0043541ff9826a2628762a/docs/source/cuvs_bench/index.rst

IMAGE_URI="docker.io/rapidsai/staging:docker-cuvs-bench-cpu-836-26.02a-py3.10-amd64@sha256:9026429656d09df7e2f59e75d6c6c4db0a112251378cce7577c45e8547404346"

docker run --rm -it \
    "${IMAGE_URI}" \
     "--dataset fashion-mnist-784-euclidean" \
     "--normalize" \
     "--algorithms hnswlib --batch-size 10 -k 10" \
     ""

And it seemed to run ok.

image

ARG PYTHON_VER=notset
ARG RAPIDS_VER=notset

Expand Down
157 changes: 156 additions & 1 deletion cuvs-bench/gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,165 @@

ARG CUDA_VER=notset
ARG LINUX_VER=notset
ARG MINIFORGE_VER=notset
ARG PYTHON_VER=notset
ARG RAPIDS_VER=26.02

FROM rapidsai/miniforge-cuda:${RAPIDS_VER}-cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER} AS cuvs-bench
# --- begin 'rapidsai/miniforge-cuda' --- #
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a lot of boilerplate to include twice. We're sure this is better than having a common miniforge-cuda image and using that?

Copy link
Copy Markdown
Member Author

@jameslamb jameslamb Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really really think it is worth it.

Being able to focus https://github.com/rapidsai/ci-imgs only on RAPIDS CI and not need to think about how changes there will affect users on Brev, Databricks, Sagemaker, etc. is well worth the separation duplication.

Also these images get published to NGC and go through more rigorous compliance and security scanning than the CI images, so it's helpful to know we can make changes in https://github.com/rapidsai/ci-imgs that won't endanger our ability to release these on-time.

It'll be a lot less boilerplate in a follow-up PR where I remove the things that are unnecessary in this context (for example, we don't build RockyLinux images here so anything about that OS can be cut out). I intentionally made this PR almost a straight copy-paste of the Dockerfiles so we could have high confidence this wasn't changing the resulting images too much.

I think there's also significant opportunity to move some of the identical code into scripts in shared context, instead of having them repeated across the Dockerfiles.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! Thanks for the context.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pun intended? 😛

FROM condaforge/miniforge3:${MINIFORGE_VER} AS miniforge-upstream

ENV PATH=/opt/conda/bin:$PATH

SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# Install gha-tools
RUN <<EOF
i=0; until apt-get update -y; do ((++i >= 5)) && break; sleep 10; done
apt-get install -y --no-install-recommends wget
wget -q https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
apt-get purge -y wget && apt-get autoremove -y
rm -rf /var/lib/apt/lists/*
EOF

RUN <<EOF
# Ensure new files/dirs have group write permissions
umask 002

# Example of pinned package in case you require an override
# echo '<PACKAGE_NAME>==<VERSION>' >> /opt/conda/conda-meta/pinned

# update everything before other environment changes, to ensure mixing
# an older conda with newer packages still works well
rapids-mamba-retry update --all -y -n base
EOF

################################ build miniforge-cuda using updated miniforge-upstream from above ###############################

FROM nvidia/cuda:${CUDA_VER}-base-${LINUX_VER} AS miniforge-cuda

ARG CUDA_VER=notset
ARG LINUX_VER=notset
ARG PYTHON_VER=notset
ARG DEBIAN_FRONTEND=noninteractive
ENV PATH=/opt/conda/bin:$PATH
ENV PYTHON_VERSION=${PYTHON_VER}

SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# Set apt policy configurations
# We bump up the number of retries and the timeouts for `apt`
# Note that `dnf` defaults to 10 retries, so no additional configuration is required here
RUN <<EOF
case "${LINUX_VER}" in
"ubuntu"*)
echo 'APT::Update::Error-Mode "any";' > /etc/apt/apt.conf.d/warnings-as-errors
echo 'APT::Acquire::Retries "10";' > /etc/apt/apt.conf.d/retries
echo 'APT::Acquire::https::Timeout "240";' > /etc/apt/apt.conf.d/https-timeout
echo 'APT::Acquire::http::Timeout "240";' > /etc/apt/apt.conf.d/http-timeout
;;
esac
EOF

# Install gha-tools
RUN <<EOF
i=0; until apt-get update -y; do ((++i >= 5)) && break; sleep 10; done
apt-get install -y --no-install-recommends wget
wget -q https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
apt-get purge -y wget && apt-get autoremove -y
rm -rf /var/lib/apt/lists/*
EOF

# Create a conda group and assign it as root's primary group
RUN <<EOF
groupadd conda
usermod -g conda root
EOF

# Ownership & permissions based on https://docs.anaconda.com/anaconda/install/multi-user/#multi-user-anaconda-installation-on-linux
COPY --from=miniforge-upstream --chown=root:conda --chmod=770 /opt/conda /opt/conda

# Ensure new files are created with group write access & setgid. See https://unix.stackexchange.com/a/12845
RUN chmod g+ws /opt/conda

RUN <<EOF
# Ensure new files/dirs have group write permissions
umask 002

# install expected Python version
PYTHON_MAJOR_VERSION=${PYTHON_VERSION%%.*}
PYTHON_MINOR_VERSION=${PYTHON_VERSION#*.}
PYTHON_UPPER_BOUND="${PYTHON_MAJOR_VERSION}.$((PYTHON_MINOR_VERSION+1)).0a0"
PYTHON_MINOR_PADDED=$(printf "%02d" "$PYTHON_MINOR_VERSION")
PYTHON_VERSION_PADDED="${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_PADDED}"
# 'shellcheck' is unhappy with the use of '>' to compare decimals here, but it works as expected for the 'bash' version in these
# images, and installing 'bc' or using a Python interpreter seem heavy for this purpose.
#
# shellcheck disable=SC2072
if [[ "$PYTHON_VERSION_PADDED" > "3.12" ]]; then
PYTHON_ABI_TAG="cp${PYTHON_MAJOR_VERSION}${PYTHON_MINOR_VERSION}"
else
PYTHON_ABI_TAG="cpython"
fi
rapids-mamba-retry install -y -n base "python>=${PYTHON_VERSION},<${PYTHON_UPPER_BOUND}=*_${PYTHON_ABI_TAG}"
rapids-mamba-retry update --all -y -n base
if [[ "$LINUX_VER" == "rockylinux"* ]]; then
dnf install -y findutils
dnf clean all
fi
find /opt/conda -follow -type f -name '*.a' -delete
find /opt/conda -follow -type f -name '*.pyc' -delete
# recreate missing libstdc++ symlinks
conda clean -aiptfy
EOF

# Reassign root's primary group to root
RUN usermod -g root root

RUN <<EOF
# ensure conda environment is always activated
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
echo ". /opt/conda/etc/profile.d/conda.sh; conda activate base" >> /etc/skel/.bashrc
echo ". /opt/conda/etc/profile.d/conda.sh; conda activate base" >> ~/.bashrc
EOF

# tzdata is needed by the ORC library used by pyarrow, because it provides /etc/localtime
# On Ubuntu 24.04 and newer, we also need tzdata-legacy
RUN <<EOF
case "${LINUX_VER}" in
"ubuntu"*)
PACKAGES_TO_INSTALL=(
tzdata
)

os_version=$(grep 'VERSION_ID' /etc/os-release | cut -d '"' -f 2)
# 'shellcheck' is unhappy with the use of '>' to compare decimals here, but it works as expected for the 'bash' version in these
# images, and installing 'bc' or using a Python interpreter seem heavy for this purpose.
#
# shellcheck disable=SC2072
if [[ "${os_version}" > "24.04" ]] || [[ "${os_version}" == "24.04" ]]; then
PACKAGES_TO_INSTALL+=(tzdata-legacy)
fi

rapids-retry apt-get update -y
apt-get upgrade -y
apt-get install -y --no-install-recommends \
"${PACKAGES_TO_INSTALL[@]}"

rm -rf "/var/lib/apt/lists/*"
;;
"rockylinux"*)
dnf update -y
dnf clean all
;;
*)
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1
;;
esac
EOF

# --- end 'rapidsai/miniforge-cuda' --- #

FROM miniforge-cuda AS cuvs-bench
ARG CUDA_VER=notset
ARG RAPIDS_VER=notset

Expand Down
2 changes: 2 additions & 0 deletions versions.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# renovate: datasource=docker depName=condaforge/miniforge3 versioning=docker
MINIFORGE_VER: 25.9.1-0
# renovate: datasource=github-releases depName=mikefarah/yq
YQ_VER: 4.49.2
Loading