diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml new file mode 100644 index 000000000..c9a7a314b --- /dev/null +++ b/.github/workflows/actions.yml @@ -0,0 +1,33 @@ +name: Unit tests + +on: + pull_request: + branches: + - '**rc' + - 'master' + +jobs: + tests: + strategy: + fail-fast: false + matrix: + gpu: ["amd", "nvidia"] + precision: ["double", "single"] + runs-on: [self-hosted, "${{ matrix.gpu }}-gpu"] + steps: + - name: Checkout + uses: actions/checkout@v3.3.0 + - name: Configure + run: | + if [ "${{ matrix.gpu }}" = "nvidia" ]; then + FLAGS="-D Kokkos_ENABLE_CUDA=ON -D Kokkos_ARCH_AMPERE86=ON" + elif [ "${{ matrix.gpu }}" = "amd" ]; then + FLAGS="-D Kokkos_ENABLE_HIP=ON -D Kokkos_ARCH_AMD_GFX1100=ON" + fi + cmake -B build -D TESTS=ON -D output=ON -D precision=${{ matrix.precision }} $FLAGS + - name: Compile + run: | + cmake --build build -j $(exec nproc) + - name: Run tests + run: | + ctest --test-dir build --output-on-failure --verbose \ No newline at end of file diff --git a/dev/Dockerfile.cuda b/dev/Dockerfile.cuda index dad5b0faf..be0051150 100755 --- a/dev/Dockerfile.cuda +++ b/dev/Dockerfile.cuda @@ -1,7 +1,6 @@ # syntax = devthefuture/dockerfile-x FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 -MAINTAINER "@haykh" ENV CUDA_HOME=/usr/local/cuda ENV PATH=/usr/local/cuda/bin:$PATH diff --git a/dev/Dockerfile.rocm b/dev/Dockerfile.rocm index 739f2d856..8978b6994 100755 --- a/dev/Dockerfile.rocm +++ b/dev/Dockerfile.rocm @@ -1,7 +1,6 @@ # syntax = devthefuture/dockerfile-x FROM rocm/rocm-terminal:latest -MAINTAINER "@haykh" USER root ENV PATH=/opt/rocm/bin:$PATH diff --git a/dev/runners/Dockerfile.runner.nvidia b/dev/runners/Dockerfile.runner.nvidia new file mode 100644 index 000000000..d7925ec6a --- /dev/null +++ b/dev/runners/Dockerfile.runner.nvidia @@ -0,0 +1,72 @@ +FROM nvidia/cuda:12.5.0-devel-ubuntu22.04 + +ARG DEBIAN_FRONTEND=noninteractive +ENV DISPLAY=host.docker.internal:0.0 + +# upgrade +RUN apt-get update && apt-get upgrade -y + +# cmake & build tools +RUN apt-get remove -y --purge cmake && \ + apt-get install -y sudo wget curl build-essential && \ + wget "https://github.com/Kitware/CMake/releases/download/v3.29.6/cmake-3.29.6-linux-x86_64.tar.gz" -P /opt && \ + tar xvf /opt/cmake-3.29.6-linux-x86_64.tar.gz -C /opt && \ + rm /opt/cmake-3.29.6-linux-x86_64.tar.gz +ENV PATH=/opt/cmake-3.29.6-linux-x86_64/bin:$PATH + +# adios2 +RUN apt-get update && apt-get install -y git libhdf5-dev && \ + git clone https://github.com/ornladios/ADIOS2.git /opt/adios2-src && \ + cd /opt/adios2-src && \ + cmake -B build \ + -D CMAKE_CXX_STANDARD=17 \ + -D CMAKE_CXX_EXTENSIONS=OFF \ + -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE \ + -D BUILD_SHARED_LIBS=ON \ + -D ADIOS2_USE_HDF5=ON \ + -D ADIOS2_USE_Python=OFF \ + -D ADIOS2_USE_Fortran=OFF \ + -D ADIOS2_USE_ZeroMQ=OFF \ + -D BUILD_TESTING=OFF \ + -D ADIOS2_BUILD_EXAMPLES=OFF \ + -D ADIOS2_USE_MPI=OFF \ + -D ADIOS2_HAVE_HDF5_VOL=OFF \ + -D CMAKE_INSTALL_PREFIX=/opt/adios2 && \ + cmake --build build -j && \ + cmake --install build && \ + rm -rf /opt/adios2-src + +ENV CUDA_HOME=/usr/local/cuda +ENV HDF5_ROOT=/usr +ENV ADIOS2_DIR=/opt/adios2 +ENV PATH=/opt/adios2/bin:/usr/local/cuda/bin:$PATH + +# cleanup +RUN apt-get clean && \ + apt-get autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/cache/* && \ + rm -rf /var/lib/log/* && \ + rm -rf /var/lib/apt/lists/* + +ARG USER=runner +RUN useradd -ms /usr/bin/zsh $USER && \ + usermod -aG sudo $USER && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +USER $USER +ARG HOME=/home/$USER +WORKDIR $HOME + +# gh runner +ARG TOKEN +RUN mkdir actions-runner +WORKDIR $HOME/actions-runner + +RUN curl -o actions-runner-linux-x64-2.317.0.tar.gz \ + -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz && \ + tar xzf ./actions-runner-linux-x64-2.317.0.tar.gz && \ + sudo ./bin/installdependencies.sh && \ + ./config.sh --url https://github.com/entity-toolkit/entity --token $TOKEN --labels nvidia-gpu + +ENTRYPOINT ["./run.sh"] diff --git a/dev/runners/README.md b/dev/runners/README.md new file mode 100644 index 000000000..526068bea --- /dev/null +++ b/dev/runners/README.md @@ -0,0 +1,15 @@ +## Self-hosted runners + +GitHub allows to listen to repository changes and run the so-called "actions" (e.g., tests) if a particular event has been triggered (e.g., a pull request has been created). To test Entity on GPUs, we provide Docker runner images, with all the proper compilers already preinstalled, which can fetch the actions directly from GitHub and run them within the container environment. + +To do that, one needs to create an image with the corresponding `Dockerfile`, and then launch a Docker container which will run in the background, listening to commands and running any actions forwarded from the GitHub. + +### NVIDIA GPUs + +```sh +# 1. Create the image +docker build -t ghrunner:nvidia -f Dockerfile.runner.nvidia . +# 2. Run a container from the image with GPU support +# ... (see wiki for instructions on NVIDIA runtime) +docker run --runtime=nvidia --gpus=all -dt ghrunner:nvidia +```