From f9103a7ba093852bd41b482076950ebf40a3fd23 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 2 Jun 2025 11:51:40 +0200 Subject: [PATCH 1/3] Normalize retry/timeouts for kubernetes installation Signed-off-by: Carlos Eduardo Arango Gutierrez --- pkg/provisioner/provisioner.go | 4 ++-- pkg/provisioner/templates/kubernetes.go | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pkg/provisioner/provisioner.go b/pkg/provisioner/provisioner.go index e3aba35ad..db8bf3258 100644 --- a/pkg/provisioner/provisioner.go +++ b/pkg/provisioner/provisioner.go @@ -93,8 +93,8 @@ func (p *Provisioner) waitForNodeReboot() error { } // Wait for the node to come back up - maxRetries := 30 - retryInterval := 10 * time.Second + maxRetries := 10 + retryInterval := 30 * time.Second for i := 0; i < maxRetries; i++ { p.log.Info("Waiting for node to come back online...") diff --git a/pkg/provisioner/templates/kubernetes.go b/pkg/provisioner/templates/kubernetes.go index 4fd18755b..88804de09 100644 --- a/pkg/provisioner/templates/kubernetes.go +++ b/pkg/provisioner/templates/kubernetes.go @@ -101,25 +101,28 @@ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config export KUBECONFIG="${HOME}/.kube/config" +# Wait explicitly for kube-apiserver availability +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG version + # Install Calico # based on https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml # Wait for Tigera operator to be ready -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator # Wait for all necessary CRDs to be established -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/installations.operator.tigera.io +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/apiservers.operator.tigera.io +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=established --timeout=300s crd/tigerastatuses.operator.tigera.io # Apply custom resources with increased retry attempts -with_retry 10 20s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml +with_retry 10 30s kubectl --kubeconfig $KUBECONFIG apply -f https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/custom-resources.yaml # Make single-node cluster schedulable -kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule- -kubectl label node --all node-role.kubernetes.io/worker= -kubectl label node --all nvidia.com/holodeck.managed=true +with_retry 10 30s kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule- +with_retry 10 30s kubectl label node --all node-role.kubernetes.io/worker= +with_retry 10 30s kubectl label node --all nvidia.com/holodeck.managed=true # Wait for cluster to be ready with_retry 10 30s kubectl --kubeconfig $KUBECONFIG wait --for=condition=ready --timeout=300s nodes --all From 4190ef6a15599376d522cd5da676248538137ea7 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 2 Jun 2025 13:18:53 +0200 Subject: [PATCH 2/3] fix kubernetes version setting on kubernetes template Signed-off-by: Carlos Eduardo Arango Gutierrez --- pkg/provisioner/templates/kubernetes.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/provisioner/templates/kubernetes.go b/pkg/provisioner/templates/kubernetes.go index 88804de09..54ca70f86 100644 --- a/pkg/provisioner/templates/kubernetes.go +++ b/pkg/provisioner/templates/kubernetes.go @@ -272,14 +272,16 @@ type KubeadmConfig struct { } func NewKubernetes(env v1alpha1.Environment) (*Kubernetes, error) { - kubernetes := &Kubernetes{ - Version: env.Spec.Kubernetes.KubernetesVersion, - } - // check if env.Spec.Kubernetes.KubernetesVersion is in the format of vX.Y.Z - // if not, set the default version - if !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") && env.Spec.Kubernetes.KubernetesInstaller != "microk8s" { - fmt.Printf("Kubernetes version %s is not in the format of vX.Y.Z, setting default version v1.32.1\n", env.Spec.Kubernetes.KubernetesVersion) + kubernetes := &Kubernetes{} + + // Normalize Kubernetes version using a switch statement + switch { + case env.Spec.Kubernetes.KubernetesVersion == "": kubernetes.Version = defaultKubernetesVersion + case !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") && env.Spec.Kubernetes.KubernetesInstaller != "microk8s": + kubernetes.Version = "v" + env.Spec.Kubernetes.KubernetesVersion + default: + kubernetes.Version = env.Spec.Kubernetes.KubernetesVersion } if env.Spec.Kubernetes.KubeletReleaseVersion != "" { kubernetes.KubeletReleaseVersion = env.Spec.Kubernetes.KubeletReleaseVersion From 13fa2be4aba2c95cc68a3cb05cab00a4e8de162f Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 2 Jun 2025 14:00:03 +0200 Subject: [PATCH 3/3] Move microK8s version normalization to it's template Signed-off-by: Carlos Eduardo Arango Gutierrez --- cmd/cli/.main.go.swp | Bin 0 -> 12288 bytes pkg/provisioner/templates/kubernetes.go | 13 ++++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 cmd/cli/.main.go.swp diff --git a/cmd/cli/.main.go.swp b/cmd/cli/.main.go.swp new file mode 100644 index 0000000000000000000000000000000000000000..45d5abb8c9208787854d961d509729aac414a2eb GIT binary patch literal 12288 zcmeI2Uu+yl9mgkaAvI81pb~HBmo&%#57C+K%Y){7H=a7igqw2ZhFmut z_8Ijn_x$9reou@#R$}E;JehRUb|vUbSMo|X^g~xTH!4ogtvJ4{^tcRcFFed%(N-u> zC~!9lJis2AKRu^MOj{@DJ@4MUo54ljLV-eoLV-eoLV-eoLV-eoLV-eoH@N~ybc{WT zIoy-a^MY?Tq~i{1LnYUIae|-vi$T-vQqSx49C!+}!3V)f@EYRe74UoTJMdF*8$1i10bd0=mTrOn=QCte_;1P&J-dEW zCZ*FeqhCsesw9-c4cQ@F(1}F~q))qSO8;6|B*$J)gHjmVRprGqj9q6$w7Y9Jc~1<7 z)D?b2=bW(DMuxly!Bb5>67TojulF;R~2JCxbZQ zfg{Qiz8WLTSql%eajC+fCjz{1gSI69AeCuagsfH z1y{s!oG~5Y-XpH=>z>~+(L&=Xus9}wnTv#OnOBf1&Q?n1|8{9 zNBV-*KT%!1yi#wC7E%Q6aH!4f?kXO5g6s=E_u}CYI-=p?%Gy$Oh1M=_G}fAW(70PI ziyhMwK^TMy*_Wg%E+MiIVA|kgp$y1GTujz1S=uOBpG?e!y%}>4p=fRe7U?VFO#a1c zYpbi(=JME6ds)Pel97&0c(z<`)z%xvuhCrWEupl&Xx*x>x8U)(MSGQp{x#1J+uWzn zScfEZC{Y9o6ElF3!k8g2cwqh zUG*i8#lGjc`Lpygjck!+te9~p_1u=i(AV*f$vIizn|bS|{5Iy?5=k1FDR*>g5Go{o z$#@hvn7w0XUM}&t3k4&TEnPtuDN(7=&3Rqer6A6P^@pnG=%}9wjChP{7hcAU|TpFjmaQ8c{Q+2X~K4k}FRa?|(S+rDbHCnUU zq0Io5CMG35o#JThQhSGZgl9NrEA;t5Kb3n*pe+e;YNvsc*iyY&io;Ga;7Y)ZwxmDZ zB#9O(mBC4E`@2by#DNgn#KlDix`Zyv=v~NfU_^uD|rf^MKF_PsLC-tftKS$4+rP2Ng zAE~6hu<}9H%-P)OS{Usp>2(oL&J4}Z%|9|rS)O4f*DtTFSM^%jL{*&KO-C(oEL2~w IT;&w|58&z8`v3p{ literal 0 HcmV?d00001 diff --git a/pkg/provisioner/templates/kubernetes.go b/pkg/provisioner/templates/kubernetes.go index 54ca70f86..127f71a32 100644 --- a/pkg/provisioner/templates/kubernetes.go +++ b/pkg/provisioner/templates/kubernetes.go @@ -171,11 +171,14 @@ echo "ssh -i ubuntu@${INSTANCE_ENDPOINT_HOST}" const microk8sTemplate = ` : ${INSTANCE_ENDPOINT_HOST:={{.K8sEndpointHost}}} +: ${K8S_VERSION:={{.Version}}} + +# Remove leading 'v' from version if present for microk8s snap channel +MICROK8S_VERSION="${K8S_VERSION#v}" # Install microk8s sudo apt-get update - -sudo snap install microk8s --classic --channel={{.Version}} +sudo snap install microk8s --classic --channel=${MICROK8S_VERSION} sudo microk8s enable gpu dashboard dns registry sudo usermod -a -G microk8s ubuntu mkdir -p ~/.kube @@ -184,7 +187,7 @@ sudo microk8s config > ~/.kube/config sudo chown -f -R ubuntu ~/.kube sudo snap alias microk8s.kubectl kubectl -echo "Microk8s {{.Version}} installed successfully" +echo "Microk8s ${MICROK8S_VERSION} installed successfully" echo "you can now access the cluster with:" echo "ssh -i ubuntu@${INSTANCE_ENDPOINT_HOST}" ` @@ -274,11 +277,11 @@ type KubeadmConfig struct { func NewKubernetes(env v1alpha1.Environment) (*Kubernetes, error) { kubernetes := &Kubernetes{} - // Normalize Kubernetes version using a switch statement + // Normalize Kubernetes version: always ensure it starts with 'v' switch { case env.Spec.Kubernetes.KubernetesVersion == "": kubernetes.Version = defaultKubernetesVersion - case !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") && env.Spec.Kubernetes.KubernetesInstaller != "microk8s": + case !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v"): kubernetes.Version = "v" + env.Spec.Kubernetes.KubernetesVersion default: kubernetes.Version = env.Spec.Kubernetes.KubernetesVersion