Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,26 @@ jobs:
test:
name: Run tests
runs-on: ubuntu-latest
# TODO should we run on self-hosted?

continue-on-error: true
strategy:
fail-fast: false
matrix:
flavors:
- name: capms_dell_sonic
- name: capms_sonic
- name: kamaji

steps:
- name: Free disk space
# ubuntu-latest only has ~14GB free; kind + QEMU VMs + containerlab + Docker images exhaust it.
# Remove preinstalled SDKs/toolchains we don't need to recover ~10-12GB.
# apt-get clean removes cached .deb files (~few hundred MB).
run: |
sudo rm -rf /usr/local/lib/android /usr/share/dotnet /usr/share/swift /opt/ghc /usr/local/.ghcup /opt/hostedtoolcache/CodeQL
sudo apt-get clean
df -h

- name: Gain back workspace permissions # https://github.com/actions/checkout/issues/211
run: |
[ -d "${GITHUB_WORKSPACE}" ] && sudo chown -R $USER:$USER ${GITHUB_WORKSPACE}
Expand All @@ -46,6 +58,7 @@ jobs:

- name: Run integration tests
shell: bash
timeout-minutes: 150
run: |
eval $(make -C capi-lab --silent dev-env)
./capi-lab/test/ci-cleanup.sh
Expand Down
72 changes: 42 additions & 30 deletions capi-lab/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ANSIBLE_EXTRA_VARS_FILE=$(shell pwd)/mini-lab-overrides/extra-vars.yaml
KIND_EXPERIMENTAL_DOCKER_NETWORK=mini_lab_ext
KUBECONFIG := $(shell pwd)/mini-lab/.kubeconfig

MINI_LAB_FLAVOR := $(or $(MINI_LAB_FLAVOR),capms)
MINI_LAB_FLAVOR := $(or $(MINI_LAB_FLAVOR),capms_sonic)

CLUSTER_NAME ?= metal-test
KUBERNETES_VERSION ?= 1.33.5
Expand Down Expand Up @@ -35,7 +35,9 @@ SUBMODULE_SHA=$(shell git -C mini-lab rev-parse --short=8 HEAD)
MINI_LAB_VM_IMAGE := $(or $(MINI_LAB_VM_IMAGE),ghcr.io/metal-stack/mini-lab-vms:$(SUBMODULE_SHA))
MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:$(SUBMODULE_SHA))

ifeq ($(MINI_LAB_FLAVOR),capms)
ifeq ($(MINI_LAB_FLAVOR),capms_sonic)
DEPLOY_TARGET=deploy-kubeadm
else ifeq ($(MINI_LAB_FLAVOR),capms_dell_sonic)
DEPLOY_TARGET=deploy-kubeadm
else ifeq ($(MINI_LAB_FLAVOR),kamaji)
DEPLOY_TARGET=deploy-kamaji
Expand Down Expand Up @@ -101,54 +103,64 @@ control-plane-ip:
apply-sample-cluster:
$(eval CONTROL_PLANE_IP = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network ip list --name "$(CLUSTER_NAME)-vip" -o template --template '{{ .ipaddress }}'))
echo $(CLUSTER_NAME)
clusterctl generate cluster $(CLUSTER_NAME) \
--kubeconfig=$(KUBECONFIG) \
--worker-machine-count 1 \
--control-plane-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from ../config/clusterctl-templates/cluster-template-calico-lab.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) apply -f -
docker compose -f compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) \
clusterctl generate cluster $(CLUSTER_NAME) \
--worker-machine-count 1 \
--control-plane-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-calico-lab.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) apply -f -

.PHONY: delete-sample-cluster
delete-sample-cluster:
$(eval CONTROL_PLANE_IP = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network ip list --name "$(CLUSTER_NAME)-vip" -o template --template '{{ .ipaddress }}'))
clusterctl generate cluster $(CLUSTER_NAME) \
--kubeconfig=$(KUBECONFIG) \
--worker-machine-count 1 \
--control-plane-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from ../config/clusterctl-templates/cluster-template-calico-lab.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) delete -f -
docker compose -f compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) \
clusterctl generate cluster $(CLUSTER_NAME) \
--worker-machine-count 1 \
--control-plane-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-calico-lab.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) delete -f -

.PHONY: mtu-fix
mtu-fix:
cd mini-lab && ssh -F files/ssh/config leaf01 'ip link set dev vtep-1001 mtu 9100 && echo done'
cd mini-lab && ssh -F files/ssh/config leaf02 'ip link set dev vtep-1001 mtu 9100 && echo done'

.PHONY: sample-cluster-kubeconfig
sample-cluster-kubeconfig:
kubectl --kubeconfig=$(KUBECONFIG) get secret $(CLUSTER_NAME)-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ../$(CLUSTER_NAME).kubeconfig
@echo "Sample cluster kubeconfig written to $(CLUSTER_NAME).kubeconfig"

.PHONY: sample-cluster-deploy-metal-ccm
sample-cluster-deploy-metal-ccm:
$(eval METAL_NODE_NETWORK_ID = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network list --project $(METAL_PROJECT_ID) -o json | jq -r '.[0].id'))
kubectl kustomize ../config/target-cluster/overlays/kubeadm | envsubst | kubectl --kubeconfig=../$(CLUSTER_NAME).kubeconfig apply -f -

.PHONY: create-kamaji-tenant
create-kamaji-tenant:
$(eval CONTROL_PLANE_IP = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network ip list --name "$(CLUSTER_NAME)-vip" -o template --template '{{ .ipaddress }}'))
$(eval METAL_NODE_NETWORK_ID = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network list --project $(METAL_PROJECT_ID) -o template --template '{{ (index . 0).id }}'))
$(eval METAL_NODE_NETWORK_ID = $(shell docker compose -f mini-lab/compose.yaml run $(DOCKER_COMPOSE_RUN_ARG) metalctl network list --project $(METAL_PROJECT_ID) -o json | jq -r '.[0].id'))
kubectl --kubeconfig=$(KUBECONFIG) create namespace $(TENANT_NAMESPACE) --dry-run=client -o yaml | kubectl --kubeconfig=$(KUBECONFIG) apply -f -
# let MetalLB assign the IP to the tenant cluster control plane service
envsubst < kamaji/metallb-tenant-pool.yaml | kubectl --kubeconfig=$(KUBECONFIG) apply -f -
docker compose -f compose.yaml -f compose.kamaji.yaml run $(DOCKER_COMPOSE_RUN_ARG) \
clusterctl generate cluster $(CLUSTER_NAME) \
--target-namespace $(TENANT_NAMESPACE) \
--worker-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-kamaji-tenant.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) apply -f -
clusterctl generate cluster $(CLUSTER_NAME) \
--target-namespace $(TENANT_NAMESPACE) \
--worker-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-kamaji-tenant.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) apply -f -

.PHONY: delete-kamaji-tenant
delete-kamaji-tenant:
docker compose -f compose.yaml -f compose.kamaji.yaml run $(DOCKER_COMPOSE_RUN_ARG) \
clusterctl generate cluster $(CLUSTER_NAME) \
--target-namespace $(TENANT_NAMESPACE) \
--worker-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-kamaji-tenant.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) delete -f -
clusterctl generate cluster $(CLUSTER_NAME) \
--target-namespace $(TENANT_NAMESPACE) \
--worker-machine-count 1 \
--kubernetes-version $(KUBERNETES_VERSION) \
--from /templates/cluster-template-kamaji-tenant.yaml \
| kubectl --kubeconfig=$(KUBECONFIG) delete -f -

.PHONY: kamaji-tenant-kubeconfig
kamaji-tenant-kubeconfig:
Expand All @@ -162,4 +174,4 @@ kamaji-tenant-deploy-calico:

.PHONY: kamaji-tenant-deploy-metal-ccm
kamaji-tenant-deploy-metal-ccm:
kustomize build ../config/target-cluster/overlays/kamaji | envsubst | kubectl --kubeconfig=../$(CLUSTER_NAME).kubeconfig apply -f -
kubectl kustomize ../config/target-cluster/overlays/kamaji | envsubst | kubectl --kubeconfig=../$(CLUSTER_NAME).kubeconfig apply -f -
18 changes: 0 additions & 18 deletions capi-lab/compose.kamaji.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,7 @@ services:
clusterctl:
image: registry.k8s.io/cluster-api/clusterctl:v1.12.3
environment:
- EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION=true
- METAL_API_HMAC=${METAL_API_HMAC}
- METAL_API_URL=${METAL_API_URL}
- METAL_API_HMAC_AUTH_TYPE=${METAL_API_HMAC_AUTH_TYPE}
- CLUSTER_NAME=${CLUSTER_NAME}
- TENANT_NAMESPACE=${TENANT_NAMESPACE}
- KUBERNETES_VERSION=${KUBERNETES_VERSION}
- CONTROL_PLANE_IP=${CONTROL_PLANE_IP}
- PODS_CIDR=${PODS_CIDR}
- SERVICES_CIDR=${SERVICES_CIDR}
- METAL_PARTITION=${METAL_PARTITION}
- METAL_PROJECT_ID=${METAL_PROJECT_ID}
- CONTROL_PLANE_MACHINE_IMAGE=${CONTROL_PLANE_MACHINE_IMAGE}
- CONTROL_PLANE_MACHINE_SIZE=${CONTROL_PLANE_MACHINE_SIZE}
- WORKER_MACHINE_IMAGE=${WORKER_MACHINE_IMAGE}
- WORKER_MACHINE_SIZE=${WORKER_MACHINE_SIZE}
- FIREWALL_MACHINE_IMAGE=${FIREWALL_MACHINE_IMAGE}
- FIREWALL_MACHINE_SIZE=${FIREWALL_MACHINE_SIZE}
- METAL_NODE_NETWORK_ID=${METAL_NODE_NETWORK_ID}
- FIREWALL_EXTERNAL_NETWORKS=${FIREWALL_EXTERNAL_NETWORKS}
volumes:
- ../config/clusterctl-templates:/templates:ro
18 changes: 17 additions & 1 deletion capi-lab/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,24 @@ services:
environment:
- EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION=true
- KUBECONFIG=/kubeconfig
image: registry.k8s.io/cluster-api/clusterctl:v1.9.5
- METAL_API_HMAC=${METAL_API_HMAC}
- METAL_API_URL=${METAL_API_URL}
- METAL_API_HMAC_AUTH_TYPE=${METAL_API_HMAC_AUTH_TYPE}
- CLUSTER_NAME=${CLUSTER_NAME}
- KUBERNETES_VERSION=${KUBERNETES_VERSION}
- CONTROL_PLANE_IP=${CONTROL_PLANE_IP}
- METAL_PARTITION=${METAL_PARTITION}
- METAL_PROJECT_ID=${METAL_PROJECT_ID}
- CONTROL_PLANE_MACHINE_IMAGE=${CONTROL_PLANE_MACHINE_IMAGE}
- CONTROL_PLANE_MACHINE_SIZE=${CONTROL_PLANE_MACHINE_SIZE}
- WORKER_MACHINE_IMAGE=${WORKER_MACHINE_IMAGE}
- WORKER_MACHINE_SIZE=${WORKER_MACHINE_SIZE}
- FIREWALL_MACHINE_IMAGE=${FIREWALL_MACHINE_IMAGE}
- FIREWALL_MACHINE_SIZE=${FIREWALL_MACHINE_SIZE}
- FIREWALL_EXTERNAL_NETWORKS=${FIREWALL_EXTERNAL_NETWORKS}
image: registry.k8s.io/cluster-api/clusterctl:v1.11.4
network_mode: host
user: root
volumes:
- ${KUBECONFIG}:/kubeconfig:ro
- ../config/clusterctl-templates:/templates:ro
130 changes: 126 additions & 4 deletions capi-lab/test/integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,141 @@ minWaiting=2
declare -i attempts=0
until [ "$waiting" -ge $minWaiting ]
do
if [ "$attempts" -ge 60 ]; then
if [ "$attempts" -ge 180 ]; then
echo "not enough machines in waiting state - timeout reached"
exit 1
fi
echo "$waiting/$minWaiting machines are waiting"
sleep 5
waiting=$(docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls | grep Waiting | wc -l)
attempts=$attempts+1
attempts+=1
done
echo "$waiting/$minWaiting machines are waiting"

make push-to-capi-lab

if [ "$MINI_LAB_FLAVOR" = "capms_dell_sonic" ] || [ "$MINI_LAB_FLAVOR" = "capms_sonic" ]; then

if [ "$MINI_LAB_FLAVOR" = "capms_dell_sonic" ]; then
echo "Starting capms dell sonic flavor tests"
else
echo "Starting capms sonic flavor tests"
fi

export CLUSTER_NAME=metal-test

echo "Creating control plane IP"
make -C capi-lab control-plane-ip

echo "Applying sample cluster"
make -C capi-lab apply-sample-cluster

echo "Waiting for cluster to be provisioned"
declare -i attempts=0
until kubectl --kubeconfig ${KUBECONFIG} get cluster ${CLUSTER_NAME} -o jsonpath='{.status.phase}' 2>/dev/null | grep -q "Provisioned"
do
if [ "$attempts" -ge 180 ]; then
echo "cluster was not provisioned - timeout reached"
kubectl --kubeconfig ${KUBECONFIG} get cluster ${CLUSTER_NAME} -o yaml || true
exit 1
fi
echo "cluster ${CLUSTER_NAME} is not yet provisioned"
sleep 5
attempts+=1
done
echo "Cluster ${CLUSTER_NAME} is provisioned"


echo "Waiting for firewall and control-plane to get to Phoned Home state"
phoned=$(docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls | grep Phoned | wc -l)
minPhoned=2
declare -i attempts=0
until [ "$phoned" -ge $minPhoned ]
do
if [ "$attempts" -ge 240 ]; then
echo "not enough machines phoned home - timeout reached"
exit 1
fi
echo "$phoned/$minPhoned machines have phoned home"
sleep 5
phoned=$(docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls | grep Phoned | wc -l)
attempts+=1
done
echo "$phoned/$minPhoned machines have phoned home"

if [ "$MINI_LAB_FLAVOR" = "capms_sonic" ]; then
echo "Applying mtu fix"
make -C capi-lab mtu-fix
fi

echo "Waiting for worker to get to Phoned Home state"
phoned=$(docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls | grep Phoned | wc -l)
minPhoned=3
declare -i attempts=0
until [ "$phoned" -ge $minPhoned ]
do
if [ "$attempts" -ge 480 ]; then
echo "not enough machines phoned home - timeout reached"
docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls || true
exit 1
fi
if [ $((attempts % 60)) -eq 0 ] && [ "$attempts" -gt 0 ]; then
echo "machine states after $attempts attempts:"
docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls || true
fi
echo "$phoned/$minPhoned machines have phoned home"
sleep 5
phoned=$(docker compose -f capi-lab/mini-lab/compose.yaml run --no-TTY --rm metalctl machine ls | grep Phoned | wc -l)
attempts+=1
done
echo "$phoned/$minPhoned machines have phoned home"

echo "Generating kubeconfig for sample cluster"
make -C capi-lab sample-cluster-kubeconfig

# TODO remove once we can reliably check for the nodes to be ready
kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get nodes || true
kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get pods -A || true

echo "Waiting for tenant API server to be reachable"
declare -i attempts=0
until kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig version >/dev/null 2>&1
do
if [ "$attempts" -ge 180 ]; then
echo "tenant API server not reachable - timeout reached"
kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig version || true
exit 1
fi
echo "tenant API server not reachable yet"
sleep 5
attempts+=1
done
echo "Tenant API server is reachable"

echo "Waiting for control-plane node and worker node to become Ready"
minReady=2
ready=0
declare -i attempts=0
until [ "$ready" -ge $minReady ]
do
if [ "$attempts" -ge 180 ]; then
echo "not enough nodes became Ready - timeout reached"
kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get nodes || true
exit 1
fi
echo "$ready/$minReady nodes are Ready"
sleep 5
ready=$(kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get nodes --no-headers 2>/dev/null | awk '{ print $2 }' | grep -c "^Ready$" || true)
attempts+=1
done
echo "$ready/$minReady nodes are Ready"

fi


if [ "$MINI_LAB_FLAVOR" = "kamaji" ]; then

echo "Starting kamaji tests"
echo "Starting kamaji flavor tests"

echo "Creating control plane IP"
export CLUSTER_NAME=kamaji-tenant-test
Expand All @@ -42,7 +161,7 @@ if [ "$MINI_LAB_FLAVOR" = "kamaji" ]; then
declare -i attempts=0
until [ "$phoned" -ge $minPhoned ]
do
if [ "$attempts" -ge 120 ]; then
if [ "$attempts" -ge 180 ]; then
echo "not enough machines phoned home - timeout reached"
exit 1
fi
Expand All @@ -67,6 +186,9 @@ if [ "$MINI_LAB_FLAVOR" = "kamaji" ]; then

echo "Checking if tenant cluster exists"
if kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get nodes | grep -e "Ready"; then
# Currently this also catches NotReady nodes, but that's good enough for now to verify
# that the node has joined.
# Only metal-ccm will be able to set the node to Ready but we do not go that far here
echo "Nodes have joined the cluster and are ready"
elif kubectl --kubeconfig ${CLUSTER_NAME}.kubeconfig get nodes | grep -e "No resources found"; then
echo "Nodes have not joined yet"
Expand Down
7 changes: 5 additions & 2 deletions config/clusterctl-templates/cluster-template-calico-lab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ spec:
spec:
format: ignition
clusterConfiguration:
apiServer:
extraArgs: {}
controllerManager:
extraArgs:
cloud-provider: external
Expand Down Expand Up @@ -561,6 +559,11 @@ data:
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
- effect: NoSchedule
key: node.cluster.x-k8s.io/uninitialized
operator: Exists
- key: node.kubernetes.io/not-ready
operator: Exists
restartPolicy: Always
volumes:
- name: cloud-controller-manager
Expand Down
Loading