Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/container-toolkit.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ install_packages_with_retry nvidia-container-toolkit
# Configure container runtime
sudo nvidia-ctk runtime configure --runtime={{.ContainerRuntime}} --set-as-default --enable-cdi={{.EnableCDI}}
sudo systemctl restart {{.ContainerRuntime}}

# safely close the ssh connection
exit 0
`

type ContainerToolkit struct {
Expand Down
5 changes: 5 additions & 0 deletions pkg/provisioner/templates/container-toolkit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,9 @@ func TestContainerToolkit_Execute(t *testing.T) {
if !strings.Contains(out, "nvidia-ctk runtime configure --runtime=containerd --set-as-default --enable-cdi=true") {
t.Errorf("template output missing expected runtime config: %s", out)
}

// Test safe exit
if !strings.Contains(out, "exit 0") {
t.Errorf("template output missing safe exit: %s", out)
}
}
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ sudo ctr images pull docker.io/library/hello-world:latest
sudo ctr run --rm docker.io/library/hello-world:latest test

echo "Containerd installation completed successfully!"

# safely close the ssh connection
exit 0
`

type Containerd struct {
Expand Down
6 changes: 4 additions & 2 deletions pkg/provisioner/templates/containerd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,10 @@ func TestContainerd_Execute_SystemChecks(t *testing.T) {
if !strings.Contains(out, "TMP_DIR=$(mktemp -d)") {
t.Error("template output missing temporary directory creation")
}
if !strings.Contains(out, "rm -rf $TMP_DIR") {
t.Error("template output missing temporary directory cleanup")

// Test safe exit
if !strings.Contains(out, "exit 0") {
t.Error("template output missing safe exit")
}

// Test error handling
Expand Down
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/crio.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ apt install -y cri-o
# Start and enable Service
systemctl daemon-reload
systemctl start crio.service

# safely close the ssh connection
exit 0
`

type CriO struct {
Expand Down
5 changes: 5 additions & 0 deletions pkg/provisioner/templates/crio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ func TestCriO_Execute(t *testing.T) {
if !strings.Contains(out, "systemctl start crio.service") {
t.Errorf("template output missing crio start: %s", out)
}

// Test safe exit
if !strings.Contains(out, "exit 0") {
t.Errorf("template output missing safe exit: %s", out)
}
}
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ sudo systemctl restart docker
# Post-installation steps for Linux
sudo usermod -aG docker $USER
newgrp docker

# safely close the ssh connection
exit 0
`

type Docker struct {
Expand Down
5 changes: 5 additions & 0 deletions pkg/provisioner/templates/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,9 @@ func TestDocker_Execute(t *testing.T) {
if !strings.Contains(out, "systemctl enable docker") {
t.Errorf("template output missing enable docker: %s", out)
}

// Test safe exit
if !strings.Contains(out, "exit 0") {
t.Errorf("template output missing safe exit: %s", out)
}
}
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ if [ "${CURRENT_KERNEL}" != "${KERNEL_VERSION}" ]; then
# Run the reboot command with nohup to avoid abrupt SSH closure issues
nohup sudo reboot &
fi

# safely close the ssh connection
exit 0
{{- end }}
`

Expand Down
4 changes: 4 additions & 0 deletions pkg/provisioner/templates/kernel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ func TestKernelTemplateContent(t *testing.T) {
name: "reboot command",
contains: "nohup sudo reboot",
},
{
name: "safe exit",
contains: "# safely close the ssh connection\nexit 0",
},
}

for _, tt := range tests {
Expand Down
9 changes: 9 additions & 0 deletions pkg/provisioner/templates/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ with_retry 5 10s kubectl --kubeconfig $KUBECONFIG create -f https://raw.githubus
kubectl taint nodes --all node-role.kubernetes.io/control-plane:NoSchedule-
kubectl label node --all node-role.kubernetes.io/worker=
kubectl label node --all nvidia.com/holodeck.managed=true

# safely close the ssh connection
exit 0
`

const KindTemplate = `
Expand Down Expand Up @@ -152,6 +155,9 @@ with_retry 3 10s kind create cluster --name holodeck $KIND_CONFIG --kubeconfig="
echo "KIND installed successfully"
echo "you can now access the cluster with:"
echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"

# safely close the ssh connection
exit 0
`

const microk8sTemplate = `
Expand All @@ -173,6 +179,9 @@ sudo snap alias microk8s.kubectl kubectl
echo "Microk8s {{.Version}} installed successfully"
echo "you can now access the cluster with:"
echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"

# safely close the ssh connection
exit 0
`

const kubeadmTemplate = `apiVersion: kubeadm.k8s.io/v1beta4
Expand Down
22 changes: 14 additions & 8 deletions pkg/provisioner/templates/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ func TestKubernetes_Execute(t *testing.T) {
wantErr bool
checkTemplate bool
expectedString string
checkSafeExit bool
}{
{
name: "kubeadm installer",
Expand All @@ -192,7 +193,8 @@ func TestKubernetes_Execute(t *testing.T) {
},
},
},
wantErr: false,
wantErr: false,
checkSafeExit: true,
},
{
name: "legacy kubeadm installer",
Expand All @@ -211,6 +213,7 @@ func TestKubernetes_Execute(t *testing.T) {
wantErr: false,
checkTemplate: true,
expectedString: "kubeadm init \\\n --kubernetes-version=${K8S_VERSION} \\\n --pod-network-cidr=192.168.0.0/16 \\\n --control-plane-endpoint=test-host:6443 \\\n --ignore-preflight-errors=all",
checkSafeExit: true,
},
{
name: "kind installer",
Expand All @@ -225,7 +228,8 @@ func TestKubernetes_Execute(t *testing.T) {
},
},
},
wantErr: false,
wantErr: false,
checkSafeExit: true,
},
{
name: "microk8s installer",
Expand All @@ -240,7 +244,8 @@ func TestKubernetes_Execute(t *testing.T) {
},
},
},
wantErr: false,
wantErr: false,
checkSafeExit: true,
},
{
name: "invalid installer",
Expand Down Expand Up @@ -276,13 +281,14 @@ func TestKubernetes_Execute(t *testing.T) {
return
}
assert.NoError(t, err)
assert.NotEmpty(t, buf.String())

out := buf.String()
if tt.checkTemplate {
// Check if the template contains the expected kubeadm init command
assert.Contains(t, buf.String(), tt.expectedString)
// Verify that it doesn't use the config file
assert.NotContains(t, buf.String(), "kubeadm init --config /etc/kubernetes/kubeadm-config.yaml")
assert.Contains(t, out, tt.expectedString)
}

if tt.checkSafeExit {
assert.Contains(t, out, "exit 0", "template output missing safe exit")
}
})
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/provisioner/templates/nv-driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ sudo nvidia-persistenced --persistence-mode

# Quick check to see if the driver is installed
nvidia-smi

# safely close the ssh connection
exit 0
`

type NvDriver v1alpha1.NVIDIADriver
Expand Down
16 changes: 15 additions & 1 deletion pkg/provisioner/templates/nv-driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package templates

import (
"bytes"
"strings"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -59,6 +60,9 @@ sudo nvidia-persistenced --persistence-mode

# Quick check to see if the driver is installed
nvidia-smi

# safely close the ssh connection
exit 0
`,
},
{
Expand Down Expand Up @@ -87,6 +91,9 @@ sudo nvidia-persistenced --persistence-mode

# Quick check to see if the driver is installed
nvidia-smi

# safely close the ssh connection
exit 0
`,
},
{
Expand Down Expand Up @@ -116,6 +123,9 @@ sudo nvidia-persistenced --persistence-mode

# Quick check to see if the driver is installed
nvidia-smi

# safely close the ssh connection
exit 0
`,
},
}
Expand All @@ -128,7 +138,11 @@ nvidia-smi
err := tc.driver.Execute(&output, v1alpha1.Environment{})
require.EqualValues(t, tc.expecteError, err)

require.EqualValues(t, tc.expectedOutput, output.String())
// Compare trimmed strings to avoid whitespace issues
require.EqualValues(t, strings.TrimSpace(tc.expectedOutput), strings.TrimSpace(output.String()))

// Test safe exit
require.Contains(t, output.String(), "exit 0", "template output missing safe exit")
})

}
Expand Down