Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,15 +1,61 @@
#!/bin/bash
set -euo pipefail
export PATH=$PATH:/tmp/bin
mkdir /tmp/bin
curl https://mirror.openshift.com/pub/openshift-v4/clients/oc/latest/linux/oc.tar.gz | tar xvzf - -C /tmp/bin/ oc
chmod ug+x /tmp/bin/oc
set -o nounset
set -o pipefail
set -e

export ARTIFACT_DIR=${ARTIFACT_DIR:-/tmp}

export CCM_NAMESPACE="openshift-cloud-controller-manager"
export KCMO_NAMESPACE="openshift-kube-controller-manager-operator"
export MCO_NAMESPACE="openshift-machine-config-operator"

function overrideCVO() {
cat <<EOF >${ARTIFACT_DIR}/override.yaml
- op: add
path: /spec/overrides
value:
- kind: Deployment
group: apps/v1
name: machine-config-controller
namespace: $MCO_NAMESPACE
unmanaged: true
- kind: Deployment
group: apps/v1
name: machine-config-operator
namespace: $MCO_NAMESPACE
unmanaged: true
- kind: ConfigMap
group: v1
name: machine-config-operator-images
namespace: $MCO_NAMESPACE
unmanaged: true
- kind: Deployment
group: apps/v1
name: kube-controller-manager-operator
namespace: $KCMO_NAMESPACE
unmanaged: true
EOF

echo "$(date -u --rfc-3339=seconds) - Unmanage MCO and KCMO in CVO"
oc patch clusterversion version --type json -p "$(cat ${ARTIFACT_DIR}/override.yaml)"
}

function overrideMCOImage() {
echo "$(date -u --rfc-3339=seconds) - Override MCO image with $MCO_IMAGE_OVERRIDE"
oc patch -n "$MCO_NAMESPACE" deploy machine-config-controller --type=json -p '[{ "op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "'$MCO_IMAGE_OVERRIDE'" }]'
oc patch -n "$MCO_NAMESPACE" deploy machine-config-operator --type=json -p '[{ "op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "'$MCO_IMAGE_OVERRIDE'" }]'
oc get -n "$MCO_NAMESPACE" cm machine-config-operator-images -o yaml | sed -E "s|machineConfigOperator[^,]*|machineConfigOperator\": \"$MCO_IMAGE_OVERRIDE\"|g" | oc apply -f -
}

echo "$(date -u --rfc-3339=seconds) - Apply external cloud-controller-manager FeatureGate configuration"
function overrideKCMOImage() {
echo "$(date -u --rfc-3339=seconds) - Override KCMO image with $KCMO_IMAGE_OVERRIDE"
oc patch -n $KCMO_NAMESPACE deploy kube-controller-manager-operator -p '{"spec":{"template":{"spec":{"containers":[{"name":"kube-controller-manager-operator","image":"'$KCMO_IMAGE_OVERRIDE'","env":[{"name":"OPERATOR_IMAGE","value":"'$KCMO_IMAGE_OVERRIDE'"}]}]}}}}'
}

cat <<EOF | oc apply -f -
function applyFeatureGate() {
echo "$(date -u --rfc-3339=seconds) - Apply external cloud-controller-manager FeatureGate configuration"

cat <<EOF | oc apply -f -
---
apiVersion: config.openshift.io/v1
kind: FeatureGate
Expand All @@ -23,37 +69,106 @@ spec:
customNoUpgrade:
enabled:
- ExternalCloudProvider
- CSIMigrationAWS
Comment thread
Danil-Grigorev marked this conversation as resolved.
Outdated
- CSIMigrationOpenStack
featureSet: CustomNoUpgrade
EOF
}

function waitForKubeletAndKCMRollout() {
echo "$(date -u --rfc-3339=seconds) - Updated machineconfig should contain --cloud-provider=external flag..."
waitFor 30m setExternalFlagMCO

echo "$(date -u --rfc-3339=seconds) - Updated kube-controller-manager pods should contain --cloud-provider=external flag..."
waitFor 30m setExternalFlagKCMO

function waitForCCMDeploymentCreation() {
echo "$(date -u --rfc-3339=seconds) - All machineconfigs should be updated after rollout..."
waitFor 30m oc wait --all --for=condition=Updated=True machineconfigpool

echo "$(date -u --rfc-3339=seconds) - Wait for the operator to go available..."
waitFor 10m oc wait --all --for=condition=Available=True clusteroperators.config.openshift.io

echo "$(date -u --rfc-3339=seconds) - Waits for operators to finish rolling out..."
waitFor 30m oc wait --all --for=condition=Progressing=False clusteroperators.config.openshift.io
}

function CCMDeploymentCreated() {
while [ "$(oc get deploy -n ${CCM_NAMESPACE} -o name | wc -l)" == 0 ]; do
echo "$(date -u --rfc-3339=seconds) - Wait for CCCMO operands creation"
sleep 5
done
}
export -f waitForCCMDeploymentCreation
export -f CCMDeploymentCreated

function setExternalFlagMCO() {
while [ "$(oc get machineconfig -o yaml | grep 'cloud-provider=external' | wc -l)" == 0 ]; do
echo "$(date -u --rfc-3339=seconds) - Wait for machineconfig to set external cloud providers..."
sleep 20
done
}
export -f setExternalFlagMCO

function setExternalFlagKCMO() {
KCM_NAMESPACE="openshift-kube-controller-manager"

kcmPodsCount="$(oc get pods -n $KCM_NAMESPACE -l 'kube-controller-manager=true' -o name | wc -l)"
while [ "$(oc get pods -n $KCM_NAMESPACE -o yaml | grep 'cloud-provider=external' | wc -l)" != "${kcmPodsCount}" ]; do
echo "$(date -u --rfc-3339=seconds) - Waiting for kube-controller-manager to set external cloud providers..."
sleep 20
done
}
export -f setExternalFlagKCMO

timeout --foreground 3m bash -c waitForCCMDeploymentCreation
function waitForCCMDeploymentReadiness() {
echo "$(date -u --rfc-3339=seconds) - Wait for CCCMO operands to be ready"
waitFor 3m oc wait --all -n "${CCM_NAMESPACE}" --for=condition=Available=True deployment
}

function waitFor() {
local TIMEOUT="${1}"
local CMD="${*:2}"

echo "$(date -u --rfc-3339=seconds) - Wait for operands to be ready"
oc wait --all -n "${CCM_NAMESPACE}" --for=condition=Available=True deployment --timeout=3m
ret=0
timeout --foreground "${TIMEOUT}" bash -c "execute ${CMD}" || ret="$?"

# Command timed out
if [[ ret -eq 124 ]]; then
echo "$(date -u --rfc-3339=seconds) - Timed out waiting for result of $CMD"
exit 1
fi
}

function execute() {
local CMD="${*}"

echo "$(date -u --rfc-3339=seconds) - Wait for some time for cluster operators to reconcile feature gate change"
sleep 30
# API server occasionally becomes unavailable, so we repeat command in case of error
while true; do
ret=0
bash -c "${CMD}"|| ret="$?"

echo "$(date -u --rfc-3339=seconds) - Wait for the imageregistry operator to go available..."
oc wait --all --for=condition=Available=True clusteroperators.config.openshift.io --timeout=10m
if [[ ret -eq 0 ]]; then
return
fi

echo "$(date -u --rfc-3339=seconds) - Command returned error $ret, retrying..."
done
}
export -f execute

echo "$(date -u --rfc-3339=seconds) - Wait for the imageregistry to rollout..."
oc wait --all --for=condition=Progressing=False clusteroperators.config.openshift.io --timeout=30m

echo "$(date -u --rfc-3339=seconds) - Wait until imageregistry config changes are observed by kube-apiserver..."
sleep 60
if [[ "$MCO_IMAGE_OVERRIDE" != "" || "$KCMO_IMAGE_OVERRIDE" != "" ]]; then
overrideCVO

echo "$(date -u --rfc-3339=seconds) - Waits for kube-apiserver to finish rolling out..."
oc wait --all --for=condition=Progressing=False clusteroperators.config.openshift.io --timeout=30m
if [[ "$MCO_IMAGE_OVERRIDE" != "" ]]; then
overrideMCOImage
fi

oc wait --all --for=condition=Degraded=False clusteroperators.config.openshift.io --timeout=1m
if [[ "$KCMO_IMAGE_OVERRIDE" != "" ]]; then
overrideKCMOImage
fi
fi

applyFeatureGate
waitFor 10m CCMDeploymentCreated
waitForCCMDeploymentReadiness
waitForKubeletAndKCMRollout
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
ref:
as: ccm-conf-apply-feature-gate
commands: ccm-conf-apply-feature-gate-commands.sh
from: cluster-cloud-controller-manager-operator-src
from: cli
resources:
requests:
cpu: 10m
memory: 100Mi
env:
- name: MCO_IMAGE_OVERRIDE
- name: KCMO_IMAGE_OVERRIDE
documentation: |-
Apply custom featureGate configuration to cluster to enable CCM functionality
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,31 @@ workflow:
as: openshift-e2e-aws-ccm
steps:
pre:
- chain: ipi-aws-pre
- ref: ipi-conf
- ref: ipi-conf-aws
# TODO: add - ref: ipi-install-monitoringpvc
# We do not include monitoring PVC introduced in https://github.com/openshift/release/pull/15040
# as functionality of this PVC depends on successfull migration to CSI we are currently unable to
# fully determine.
- chain: ipi-install
- ref: ccm-conf-apply-feature-gate
- ref: storage-conf-csi-aws-ebs
- ref: storage-conf-wait-for-csi-migration
test:
- ref: openshift-e2e-test
post:
- chain: gather-core-dump
- chain: ipi-aws-post
env:
TEST_INSTALL_CSI_DRIVERS: aws-ebs
TEST_CSI_DRIVER_MANIFEST: manifest-aws-ebs.yaml
MCO_IMAGE_OVERRIDE: "quay.io/dgrigore/machine-config-operator@sha256:afa4b9b0688f7a6e45dfc2c725983309d79ebd0b26d2a4d4b36741bfb7cd294c"
KCMO_IMAGE_OVERRIDE: "quay.io/dgrigore/cluster-kube-controller-manager-operator@sha256:9706218f03739fe5d8184ad7e32aa845aae6f331d9ce07c4f07cfd1fb13802dc"
TEST_SKIPS: >-
\[sig-arch\]\[Early\] Managed cluster should start all core operators \[Skipped:Disconnected\]\|
\[sig-instrumentation\] Prometheus when installed on the cluster shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured \[Early\] \[Skipped:Disconnected\]\|
Alerts shouldn't report any alerts in firing or pending state
Alerts shouldn't report any alerts in firing or pending state\|
Prometheus when installed on the cluster shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured
Comment thread
Danil-Grigorev marked this conversation as resolved.
Outdated
documentation: |-
The Openshift E2E AWS workflow using CCM as a primary mean to initialize nodes and create external LoadBalancers.
Executes the common end-to-end test suite on AWS to test updated cluster configuration.