diff --git a/pkg/provisioner/templates/common.go b/pkg/provisioner/templates/common.go index d829ee2ac..2d882419c 100644 --- a/pkg/provisioner/templates/common.go +++ b/pkg/provisioner/templates/common.go @@ -30,25 +30,23 @@ export HOLODECK_ENVIRONMENT=true echo "APT::Get::AllowUnauthenticated 1;" | sudo tee /etc/apt/apt.conf.d/99allow-unauthenticated install_packages_with_retry() { - local packages=("$@") - local max_retries=5 - local retry_delay=5 - - for ((i=1; i<=$max_retries; i++)); do - echo "Attempt $i to install packages: ${packages[@]}" - - # Attempt to install packages - sudo apt-get install -y --no-install-recommends "${packages[@]}" - - # Check if the last command failed and the error is related to unsigned repository - if [ $? -ne 0 ] && grep -q 'NO_PUBKEY' <<< "$(tail -n 1 /var/lib/dpkg/status 2>/dev/null)"; then - echo "Error: Unsigned repository. Retrying in $retry_delay seconds..." - sleep $retry_delay - else - # Exit loop if installation is successful or the error is not related to unsigned repository - break - fi - done + local max_retries=5 retry_delay=5 + local packages=("$@") + + for ((i=1; i<=max_retries; i++)); do + echo "[$i/$max_retries] apt-get update" + if sudo apt-get -o Acquire::Retries=3 update; then + echo "[$i/$max_retries] installing: ${packages[*]}" + if sudo DEBIAN_FRONTEND=noninteractive \ + apt-get install -y --no-install-recommends "${packages[@]}"; then + return 0 # success + fi + fi + echo "Attempt $i failed; sleeping ${retry_delay}s" >&2 + sleep "$retry_delay" + done + echo "All ${max_retries} attempts failed" >&2 + return 1 } with_retry() { diff --git a/pkg/provisioner/templates/kubernetes.go b/pkg/provisioner/templates/kubernetes.go index a5239a7cc..79d2586db 100644 --- a/pkg/provisioner/templates/kubernetes.go +++ b/pkg/provisioner/templates/kubernetes.go @@ -117,7 +117,7 @@ with_retry 5 10s kubectl --kubeconfig $KUBECONFIG wait --for=condition=establish with_retry 10 15s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml # Wait for cluster to be ready -with_retry 5 10s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all +with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all # Make single-node cluster schedulable kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule- diff --git a/pkg/provisioner/templates/nv-driver.go b/pkg/provisioner/templates/nv-driver.go index c9daa1a2d..170754d92 100644 --- a/pkg/provisioner/templates/nv-driver.go +++ b/pkg/provisioner/templates/nv-driver.go @@ -28,15 +28,8 @@ import ( const NvDriverTemplate = ` # Install Dependencies with_retry 3 10s sudo apt-get update -install_packages_with_retry linux-headers-$(uname -r) gcc make -install_packages_with_retry apt-utils build-essential \ - ca-certificates \ - curl \ - kmod \ - file \ - libelf-dev \ - libglvnd-dev \ - pkg-config +install_packages_with_retry linux-headers-$(uname -r) +install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make install_packages_with_retry gcc-12 g++-12 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \ diff --git a/pkg/provisioner/templates/nv-driver_test.go b/pkg/provisioner/templates/nv-driver_test.go index b253f7635..1344873da 100644 --- a/pkg/provisioner/templates/nv-driver_test.go +++ b/pkg/provisioner/templates/nv-driver_test.go @@ -43,15 +43,8 @@ func TestNVDriverTemplate(t *testing.T) { # Install Dependencies with_retry 3 10s sudo apt-get update -install_packages_with_retry linux-headers-$(uname -r) gcc make -install_packages_with_retry apt-utils build-essential \ - ca-certificates \ - curl \ - kmod \ - file \ - libelf-dev \ - libglvnd-dev \ - pkg-config +install_packages_with_retry linux-headers-$(uname -r) +install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make install_packages_with_retry gcc-12 g++-12 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \ @@ -86,15 +79,8 @@ nvidia-smi expectedOutput: ` # Install Dependencies with_retry 3 10s sudo apt-get update -install_packages_with_retry linux-headers-$(uname -r) gcc make -install_packages_with_retry apt-utils build-essential \ - ca-certificates \ - curl \ - kmod \ - file \ - libelf-dev \ - libglvnd-dev \ - pkg-config +install_packages_with_retry linux-headers-$(uname -r) +install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make install_packages_with_retry gcc-12 g++-12 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \ @@ -130,15 +116,8 @@ nvidia-smi expectedOutput: ` # Install Dependencies with_retry 3 10s sudo apt-get update -install_packages_with_retry linux-headers-$(uname -r) gcc make -install_packages_with_retry apt-utils build-essential \ - ca-certificates \ - curl \ - kmod \ - file \ - libelf-dev \ - libglvnd-dev \ - pkg-config +install_packages_with_retry linux-headers-$(uname -r) +install_packages_with_retry apt-utils build-essential ca-certificates curl kmod file libelf-dev libglvnd-dev pkg-config make install_packages_with_retry gcc-12 g++-12 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 && \