Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 26 additions & 18 deletions .github/workflows/gh-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:

env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BASE_IMAGE: rapidsai/devcontainers:23.06-cpp-cuda11.8-mambaforge-ubuntu22.04
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably update this to 23.08 version of the devcontainer. Not sure if we should do it here right now or in a separate PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's do it in a separate PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separate PR would be ideal. Keeping conversation open so we don't forget to do this later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did this in #725

IMAGE_NAME: legate.core-${{ inputs.build-target }}
USE_CUDA: ${{ (inputs.build-target == 'cpu' && 'OFF') || 'ON' }}

Expand Down Expand Up @@ -43,44 +44,51 @@ jobs:
role-duration-seconds: 28800 # 8 hours
role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate

- name: Docker system prune
run: |
docker version
docker system prune --all --force

- name: Build docker image
run: |
echo BUILD_TARGET: ${{ inputs.build-target }}
echo USE_CUDA: ${{ env.USE_CUDA }}

docker build \
--build-arg AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }} \
--build-arg AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }} \
--build-arg AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }} \
--build-arg GITHUB_TOKEN=${{ env.GITHUB_TOKEN }} \
--build-arg USE_CUDA=${{ env.USE_CUDA }} \
--progress=plain \
--tag=$IMAGE_NAME:${{ inputs.sha }} \
--label "git-commit=${{ inputs.sha }}" \
-f continuous_integration/Dockerfile .
IMAGE_TAG=${{ env.IMAGE_NAME }}:${{ inputs.sha }}

continuous_integration/build-docker-image \
--base-image "$BASE_IMAGE" \
--image-tag "$IMAGE_TAG" \
--source-dir .

- name: Dump docker history of image before upload
run: |
IMAGE_TAG=${{ env.IMAGE_NAME }}:${{ inputs.sha }}
docker history $IMAGE_TAG

- name: Log in to container image registry
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin

- name: Push image
run: |
IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME
IMAGE_TAG=${{ env.IMAGE_NAME }}:${{ inputs.sha }}

IMAGE_ID=ghcr.io/${{ github.repository_owner }}

# Change all uppercase to lowercase
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')

VERSION=${{ inputs.sha }}
IMAGE_ID=$IMAGE_ID/$IMAGE_TAG

docker tag $IMAGE_NAME:$VERSION $IMAGE_ID:$VERSION
docker push $IMAGE_ID:$VERSION
docker tag $IMAGE_TAG $IMAGE_ID
docker push $IMAGE_ID

- name: Copy artifacts back to the host
run: |
IMAGE_TAG=${{ env.IMAGE_NAME }}:${{ inputs.sha }}
mkdir -p artifacts
docker run -v "$(pwd)/artifacts:/home/coder/.artifacts" --rm -t $IMAGE_NAME:${{ inputs.sha }} copy-artifacts
echo --------- DOCKER HISTORY START -----------
docker history $IMAGE_NAME:${{ inputs.sha }}
echo --------- DOCKER HISTORY END -----------
docker run -v "$(pwd)/artifacts:/home/coder/.artifacts" --rm -t $IMAGE_TAG copy-artifacts


- name: Display structure of workdir
run: ls -R
Expand Down
29 changes: 19 additions & 10 deletions continuous_integration/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
ARG BASE_IMAGE=rapidsai/devcontainers:23.06-cpp-cuda11.8-mambaforge-ubuntu22.04
ARG BASE_IMAGE
FROM ${BASE_IMAGE} as stage0

SHELL ["/bin/bash", "-c"]

ENV PYTHONDONTWRITEBYTECODE=1
ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-east"
Expand Down Expand Up @@ -28,16 +30,21 @@ COPY --chown=coder:coder . /home/coder/legate

RUN chmod a+x /home/coder/.local/bin/* && \
mkdir -p /tmp/out && \
chown -R coder:coder /tmp/out && \
chown -R coder:coder /home/coder/.artifacts
chown -R coder:coder /tmp/out

#---------------------------------------------------
FROM stage0 as setup

RUN get-yaml-and-make-conda-env
USER coder
WORKDIR /home/coder

RUN set -x && . conda-utils && get_yaml_and_make_conda_env

#---------------------------------------------------
FROM setup as build
USER coder
WORKDIR /home/coder

ARG GITHUB_TOKEN
ENV GITHUB_TOKEN=${GITHUB_TOKEN}
ARG AWS_SESSION_TOKEN
Expand All @@ -47,13 +54,15 @@ ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
ARG AWS_SECRET_ACCESS_KEY
ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}

# If .creds exists copy it to /run/secrets
COPY --chown=coder:coder .cred[s] /run/secrets
COPY --chown=coder:coder .creds /run/secrets

RUN entrypoint build-all
RUN entrypoint build-legate-all

#---------------------------------------------------
FROM stage0 as final
COPY --from=build /tmp/out /tmp/out
COPY --from=build /tmp/conda-build /tmp/conda-build
COPY --from=build /tmp/env_yaml /tmp/env_yaml
USER coder
WORKDIR /home/coder

COPY --from=build --chown=coder:coder /tmp/out /tmp/out
COPY --from=build --chown=coder:coder /tmp/conda-build /tmp/conda-build
COPY --from=build --chown=coder:coder /tmp/env_yaml /tmp/env_yaml
39 changes: 39 additions & 0 deletions continuous_integration/build-docker-image
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

set -xuo pipefail

#Set the options of the getopt command
format=$(getopt -n "$0" -l "base-image:,image-tag:,source-dir:" -- -- "$@")
if [ $# -lt 4 ]; then
echo "Wrong number of arguments passed."
exit
fi
eval set -- "$format"

#Read the argument values
while [ $# -gt 0 ]
do
case "$1" in
--base-image) BASE_IMAGE="$2"; shift;;
--image-tag) IMAGE_TAG="$2"; shift;;
--source-dir) SOURCE_DIR="$2"; shift;;
--) shift;;
esac
shift;
done

set -e

# Avoid build errors due to a missing .creds folder
mkdir -p "$SOURCE_DIR/.creds"

docker build \
--build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg AWS_SESSION_TOKEN="$AWS_SESSION_TOKEN" \
--build-arg AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
--build-arg AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
--build-arg GITHUB_TOKEN="$GITHUB_TOKEN" \
--build-arg USE_CUDA="$USE_CUDA" \
--progress=plain \
--tag="$IMAGE_TAG" \
-f "$SOURCE_DIR/continuous_integration/Dockerfile" "$SOURCE_DIR"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash


build_all() {
build_legate_all() {
set -x
cd ~/;

Expand All @@ -14,4 +14,4 @@ build_all() {
build-legate-conda;
}

(build_all "$@");
(build_legate_all "$@");
19 changes: 11 additions & 8 deletions continuous_integration/home/coder/.local/bin/conda-utils
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ generate_yaml_file() {
}

find_yaml_file() {
pattern="/tmp/env_yaml/*.yaml"
files=( $pattern )
yaml_file="${files[0]}"
pattern="/tmp/env_yaml/*.yaml";
files=( $pattern );
yaml_file="${files[0]}";

if [ -z "${yaml_file:-}" ] || [ ! -f "$yaml_file" ]; then
return 1
return 1;
fi

return 0
return 0;
}

get_yaml_and_make_conda_env() {
Expand All @@ -58,10 +58,13 @@ get_yaml_and_make_conda_env() {
make_conda_env_from_yaml;
}

make_conda_env_using_legate_core() {
mamba create -n "${DEFAULT_CONDA_ENV:-legate}"
install_legate_core_with_war() {
# WAR: legate-core depends on a different version of numpy than what is already installed.
# The correct version will be installed when legate-core is installed below.
# See github issue: https://github.com/nv-legate/legate.core/issues/812
mamba uninstall -y -n "${DEFAULT_CONDA_ENV:-legate}" numpy;

mamba install -y -n "${DEFAULT_CONDA_ENV:-legate}" -c nvidia -c conda-forge -c /tmp/conda-build/legate_core legate-core
mamba install -y -n "${DEFAULT_CONDA_ENV:-legate}" -c nvidia -c conda-forge -c /tmp/conda-build/legate_core legate-core;
}

activate_conda_env() {
Expand Down
39 changes: 24 additions & 15 deletions continuous_integration/home/coder/.local/bin/entrypoint
Original file line number Diff line number Diff line change
@@ -1,7 +1,26 @@
#!/usr/bin/env bash

entrypoint() {
sccache_stop_server_and_show_stats() {
sccache --stop-server || true && sccache --show-stats;
}

init_devcontainer() {
# disable xtrace and history
local xtrace_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'xtrace'; echo $?);
local history_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'history'; echo $?);
{ set +xo history; } 2>/dev/null;
eval "export $(find /run/secrets/ -type f -exec bash -c 'echo ${0/\/run\/secrets\//}=$(<${0})' {} \;)";
if [ "${history_enabled}" -eq "0" ]; then { set -o history; } 2>/dev/null; fi;
if [ "${xtrace_enabled}" -eq "0" ]; then { set -o xtrace; } 2>/dev/null; fi;

. devcontainer-utils-post-attach-command;

sleep 10;
. devcontainer-utils-vault-s3-test;
. devcontainer-utils-vault-s3-export 0;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding of this is lacking. :) Why do we need this?

Copy link
Contributor Author

@sandeepd-nv sandeepd-nv Aug 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

devcontainer-utils-vault-s3-test

This tests if AWS credentials are valid.

devcontainer-utils-vault-s3-export

This sets up various SCCACHE_* env vars through ~/.aws/config or ~/.bashrc.

These things are required for enabling SCCACHE for "local repro" where AWS auth happens using a GH personal access token.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also add that the 10 second wait is essential. The credentials that we get are generated on the fly, and need to be uploaded to AWS. We hit authentication issues if we try to race against the upload process.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also done in the 23.08 devcontainers (and removed in #725).


entrypoint() {
set -x

echo AWS_REGION=${AWS_REGION:-}
Expand All @@ -11,22 +30,12 @@ entrypoint() {

mkdir -p /home/coder/.cache;

if [ -d /run/secrets ]; then
# disable xtrace and history
local xtrace_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'xtrace'; echo $?);
local history_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'history'; echo $?);
{ set +xo history; } 2>/dev/null;
eval "export $(find /run/secrets/ -type f -exec bash -c 'echo ${0/\/run\/secrets\//}=$(<${0})' {} \;)";
if [ "${history_enabled}" -eq "0" ]; then { set -o history; } 2>/dev/null; fi;
if [ "${xtrace_enabled}" -eq "0" ]; then { set -o xtrace; } 2>/dev/null; fi;

. devcontainer-utils-post-attach-command;
local secrets_dir=/run/secrets

sleep 10;
. devcontainer-utils-vault-s3-test;
. devcontainer-utils-vault-s3-export 0;
if [ -d "$secrets_dir" ] && [ "$(ls -A $secrets_dir)" ]; then
init_devcontainer
else
sccache --stop-server || true && sccache --show-stats;
sccache_stop_server_and_show_stats
fi

exec "$@";
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ run_test_or_analysis() {
set -x
cd ~/

make_conda_env_using_legate_core
install_legate_core_with_war;

activate_conda_env
activate_conda_env;

conda info
conda info;

set -xeuo pipefail

Expand Down