From 3d99290f08274d765b825d6f58d24c050d4ab975 Mon Sep 17 00:00:00 2001 From: Gal Zaidman Date: Sun, 5 Jan 2020 17:22:50 +0200 Subject: [PATCH 1/5] Add ci operator ovirt configuration Thist patch adds ovirt to the ci-operator openshift installer configuration Signed-off-by: Gal Zaidman --- .../installer/openshift-installer-master.yaml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ci-operator/config/openshift/installer/openshift-installer-master.yaml b/ci-operator/config/openshift/installer/openshift-installer-master.yaml index 008542677f063..d52751a11c896 100644 --- a/ci-operator/config/openshift/installer/openshift-installer-master.yaml +++ b/ci-operator/config/openshift/installer/openshift-installer-master.yaml @@ -57,6 +57,14 @@ images: - builder paths: null to: baremetal-installer +- dockerfile_path: images/installer/Dockerfile.ci + from: base + inputs: + root: + as: + - build + paths: null + to: ovirt-installer promotion: name: "4.4" namespace: ocp @@ -115,6 +123,12 @@ resources: requests: cpu: "3" memory: 5Gi + ovirt-installer: + limits: + memory: 9Gi + requests: + cpu: "3" + memory: 5Gi tag_specification: name: "4.4" namespace: ocp @@ -182,4 +196,4 @@ tests: - as: e2e-vsphere commands: TEST_SUITE=openshift/conformance/parallel run-tests openshift_installer_upi: - cluster_profile: vsphere + cluster_profile: vsphere \ No newline at end of file From 36c8c513110975fd4c4d6bdae7bdcafe3992c8f4 Mon Sep 17 00:00:00 2001 From: Gal Zaidman Date: Sun, 5 Jan 2020 17:49:53 +0200 Subject: [PATCH 2/5] Add ovirt e2e template This patch adds ovirt e2e template, the template runs lease,setup,test,teardown containers Signed-off-by: Gal Zaidman --- .../cluster-launch-installer-ovirt-e2e.yaml | 569 ++++++++++++++++++ 1 file changed, 569 insertions(+) create mode 100644 ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml new file mode 100644 index 0000000000000..54481a53154d8 --- /dev/null +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml @@ -0,0 +1,569 @@ +kind: Template +apiVersion: template.openshift.io/v1 + +parameters: +- name: JOB_NAME_SAFE + required: true +- name: JOB_NAME_HASH + required: true +- name: NAMESPACE + required: true +- name: IMAGE_TESTS + required: true +- name: IMAGE_INSTALLER + required: true +- name: CLUSTER_TYPE + value: ovirt +- name: TEST_COMMAND + required: true +- name: LEASE_STATE + required: true +- name: RELEASE_IMAGE_LATEST + required: true +- name: BASE_DOMAIN + required: true + value: gcp.devcluster.openshift.com + +objects: + +# We want the cluster to be able to access these images +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-image-puller + namespace: ${NAMESPACE} + roleRef: + name: system:image-puller + subjects: + - kind: SystemGroup + name: system:unauthenticated + - kind: SystemGroup + name: system:authenticated + +# Give admin access to a known bot +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-namespace-admins + namespace: ${NAMESPACE} + roleRef: + name: admin + subjects: + - kind: ServiceAccount + namespace: ci + name: ci-chat-bot + +# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. +- kind: Pod + apiVersion: v1 + metadata: + name: ${JOB_NAME_SAFE} + namespace: ${NAMESPACE} + annotations: + # we want to gather the teardown logs no matter what + ci-operator.openshift.io/wait-for-container-artifacts: teardown + ci-operator.openshift.io/save-container-logs: "true" + ci-operator.openshift.io/container-sub-tests: "lease,setup,test,teardown" + spec: + restartPolicy: Never + activeDeadlineSeconds: 14400 + terminationGracePeriodSeconds: 900 + volumes: + - name: artifacts + emptyDir: {} + - name: shared-tmp + emptyDir: {} + - name: cluster-profile + secret: + secretName: ${JOB_NAME_SAFE}-cluster-profile + + containers: + + - name: lease + image: registry.svc.ci.openshift.org/ci/boskoscli:latest + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 10Mi + limits: + memory: 200Mi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: CLUSTER_TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ${NAMESPACE}-${JOB_NAME_HASH} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + trap 'rc=$?; CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi; if test "${rc}" -ne 0; then touch /tmp/shared/exit; fi; exit "${rc}"' EXIT + + # hack for bazel + function boskosctl() { + /app/boskos/cmd/cli/app.binary "${@}" + } + + function extract_leases_info() { + echo "$( jq ."${1}" --raw-output "${2}" )" + } + + echo "[INFO] Acquiring a lease ..." + resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-quota-slice" --state "${LEASE_STATE}" --target-state "${LEASE_STATE}-leased" --timeout 150m )" + resource_name="$(echo "${resource}"|jq .name --raw-output)" + lease_path="/etc/openshift-installer/${resource_name}.json" + + #Saving parameters for the env + cat > /tmp/shared/ovirt-lease.conf <&1 + exit 0 + fi + + sleep 15 & wait $! + done + + + # Once the cluster is up, executes shared tests + - name: test + image: ${IMAGE_TESTS} + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 1 + memory: 1Gi + limits: + memory: 7Gi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /tmp/cluster + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: ARTIFACT_DIR + value: /tmp/artifacts + - name: HOME + value: /tmp/home + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + export PATH=/usr/libexec/origin:$PATH + + trap 'touch /tmp/shared/exit' EXIT + trap 'kill $(jobs -p); exit 0' TERM + + mkdir -p "${HOME}" + + # Share oc with other containers + cp "$(command -v oc)" /tmp/shared + + # wait for the API to come up + while true; do + if [[ -f /tmp/shared/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ ! -f /tmp/shared/setup-success ]]; then + sleep 15 & wait + continue + fi + # don't let clients impact the global kubeconfig + cp "${KUBECONFIG}" /tmp/admin.kubeconfig + export KUBECONFIG=/tmp/admin.kubeconfig + break + done + + # if the cluster profile included an insights secret, install it to the cluster to + # report support data from the support-operator + if [[ -f /tmp/cluster/insights-live.yaml ]]; then + oc create -f /tmp/cluster/insights-live.yaml || true + fi + + # set up env vars + export KUBE_SSH_BASTION="$( oc --insecure-skip-tls-verify get node -l node-role.kubernetes.io/master -o 'jsonpath={.items[0].status.addresses[?(@.type=="ExternalIP")].address}' ):22" + export KUBE_SSH_KEY_PATH=/tmp/cluster/ssh-privatekey + mkdir -p ~/.ssh + cp /tmp/cluster/ssh-privatekey ~/.ssh/kube_ovirt_rsa || true + + mkdir -p /tmp/output + cd /tmp/output + + function run-upgrade-tests() { + openshift-tests run-upgrade "${TEST_SUITE}" --to-image "${RELEASE_IMAGE_LATEST}" \ + --provider "${TEST_PROVIDER:-}" -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + exit 0 + } + + function run-tests() { + openshift-tests run "${TEST_SUITE}" \ + --provider "${TEST_PROVIDER:-}" -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + exit 0 + } + + function run-minimal-tests() { + # Grab all of the tests marked Feature:Builds and conformance/parallel/minimal + openshift-tests run openshift/conformance/parallel --dry-run | + grep 'Smoke' | + openshift-tests run -o /tmp/artifacts/e2e.log \ + --junit-dir /tmp/artifacts/junit -f - + exit 0 + } + + function run-no-tests() { + # This can be used if we just want to check the installer exits 0 + echo "WARNING: No tests were run against the installed cluster" + exit 0 + } + + ${TEST_COMMAND} + + # Runs an install + - name: setup + # A midstep till we have the installer work merged, then we + # can use the CI artifact + image: quay.io/rgolangh/openshift-installer:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ovirt + - name: BASE_DOMAIN + value: ${BASE_DOMAIN} + - name: SSH_PUB_KEY_PATH + value: /etc/openshift-installer/ssh-publickey + - name: PULL_SECRET_PATH + value: /etc/openshift-installer/pull-secret + - name: OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE + value: registry.svc.ci.openshift.org/ovirt/ovirt-release:latest + - name: USER + value: test + - name: HOME + value: /tmp + - name: INSTALL_INITIAL_RELEASE + - name: RELEASE_IMAGE_INITIAL + command: + - /bin/sh + - -c + - | + #!/bin/sh + trap 'rc=$?; if test "${rc}" -eq 0; then touch /tmp/setup-success; else touch /tmp/exit; fi; exit "${rc}"' EXIT + trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi' TERM + # Wait untill lease is acquired + while true; do + if [[ -f /tmp/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ -f /tmp/leased ]]; then + echo "Lease acquired, installing..." + break + fi + sleep 15 & wait + done + + if [[ -n "${INSTALL_INITIAL_RELEASE}" && -n "${RELEASE_IMAGE_INITIAL}" ]]; then + echo "Installing from initial release ${RELEASE_IMAGE_INITIAL}" + OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="${RELEASE_IMAGE_INITIAL}" + else + echo "Installing from release ${RELEASE_IMAGE_LATEST}" + fi + # poll to make sure that the test container has dropped oc into the shared volume + while [ ! command -V oc ]; do sleep 1; done + cp "$(command -v openshift-install)" /tmp + mkdir /tmp/artifacts/installer + source /tmp/ovirt-lease.conf + source /etc/openshift-installer/ovirt.conf + export PATH=$PATH:/tmp + export EXPIRATION_DATE=$(date -d '4 hours' --iso=minutes --utc) + export SSH_PUB_KEY=$(cat "${SSH_PUB_KEY_PATH}") + export PULL_SECRET=$(cat "${PULL_SECRET_PATH}") + export TF_VAR_ovirt_template_mem=${WORKER_MEM} + export TF_VAR_ovirt_template_cpu=${WORKER_CPU} + export TF_VAR_ovirt_master_mem=${MASTER_MEM} + export TF_VAR_ovirt_master_cpu=${MASTER_CPU} + + ## Image handling - for now the CI uses a fixed rhcos template + ## TODO - the fixed template is saving time and space when creating the + ## cluster in the cost of having to maitain the supported version. This + ## maintnance procedure does not exist yet. + export OPENSHIFT_INSTALL_OS_IMAGE_OVERRIDE=${OVIRT_ENGINE_TEMPLATE_NAME} + + # We want the setup to download the latest CA from the engine + # Therefor living it empty + export OVIRT_CONFIG=/tmp/artifacts/installer/ovirt-config.yaml + cat > /tmp/artifacts/installer/ovirt-config.yaml < /tmp/artifacts/installer/install-config.yaml << EOF + apiVersion: v1 + baseDomain: ${BASE_DOMAIN} + metadata: + name: ${OCP_CLUSTER} + compute: + - hyperthreading: Enabled + name: worker + platform: {} + replicas: 2 + controlPlane: + hyperthreading: Enabled + name: master + platform: {} + replicas: 3 + platform: + ovirt: + ovirt_cluster_id: ${OVIRT_ENGINE_CLUSTER_ID} + ovirt_storage_domain_id: ${OVIRT_ENGINE_STORAGE_DOMAIN_ID} + api_vip: ${OVIRT_APIVIP} + dns_vip: ${OVIRT_DNSVIP} + ingress_vip: ${OVIRT_INGRESSVIP} + pullSecret: > + ${PULL_SECRET} + sshKey: | + ${SSH_PUB_KEY} + EOF + + #change the masters igntion , to use tempfs for etcd IOPS optimization + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create ignition-configs --log-level=debug + python -c \ + 'import json, sys; j = json.load(sys.stdin); j[u"systemd"][u"units"] = [{u"contents": "[Unit]\nDescription=Mount etcd as a ramdisk\nBefore=local-fs.target\n[Mount]\n What=none\nWhere=/var/lib/etcd\nType=tmpfs\nOptions=size=2G\n[Install]\nWantedBy=local-fs.target", u"enabled": True, u"name":u"var-lib-etcd.mount"}]; json.dump(j, sys.stdout)' \ + /tmp/artifacts/installer/master.ign.out + mv /tmp/artifacts/installer/master.ign.out /tmp/artifacts/installer/master.ign + + # What we're doing here is we generate manifests first and force that OpenShift SDN is configured. + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create manifests --log-level=debug + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create cluster --log-level=debug & + wait "$!" + install_exit_status=$? + export KUBECONFIG=/tmp/artifacts/installer/auth/kubeconfig + oc patch configs.imageregistry.operator.openshift.io cluster --type merge --patch '{"spec":{"managementState":"Managed","storage":{"emptyDir":{}}}}' + sleep 10m + oc get co/image-registry + exit $install_exit_status + + # Performs cleanup of all created resources + - name: teardown + # A midstep till we have the installer work merged, then we + # can use the CI artifact + image: quay.io/rgolangh/openshift-installer:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ovirt + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + function queue() { + local TARGET="${1}" + shift + local LIVE="$(jobs | wc -l)" + while [[ "${LIVE}" -ge 45 ]]; do + sleep 1 + LIVE="$(jobs | wc -l)" + done + echo "${@}" + if [[ -n "${FILTER}" ]]; then + "${@}" | "${FILTER}" >"${TARGET}" & + else + "${@}" >"${TARGET}" & + fi + } + + function teardown() { + set +e + touch /tmp/shared/exit + export PATH=$PATH:/tmp/shared + source /etc/openshift-installer/ovirt.conf + + echo "Gathering artifacts ..." + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap /tmp/artifacts/network + + + if [ -f /tmp/artifacts/installer/terraform.tfstate ] + then + if [ -n "${bootstrap_ip}" ] + then + for service in bootkube openshift kubelet crio + do + queue "/tmp/artifacts/bootstrap/${service}.service" curl \ + --insecure \ + --silent \ + --connect-timeout 5 \ + --retry 3 \ + --cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \ + --key /tmp/artifacts/installer/tls/journal-gatewayd.key \ + --url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" + done + if ! whoami &> /dev/null; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi + fi + eval $(ssh-agent) + ssh-add /etc/openshift-installer/ssh-privatekey + ssh -A -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip} /bin/bash -x /usr/local/bin/installer-gather.sh + scp -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip}:log-bundle.tar.gz /tmp/artifacts/installer/bootstrap-logs.tar.gz + fi + else + echo "No terraform statefile found. Skipping collection of bootstrap logs." + fi + # WORKAROUND https://github.com/openshift/installer/issues/1467 + # We need this to be able to collect logs + oc --insecure-skip-tls-verify --request-timeout=5s get csr -o name | xargs oc --insecure-skip-tls-verify --request-timeout=5s adm certificate approve + + oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes + oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers + oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + + queue /tmp/artifacts/apiservices.json oc --insecure-skip-tls-verify --request-timeout=5s get apiservices -o json + queue /tmp/artifacts/clusteroperators.json oc --insecure-skip-tls-verify --request-timeout=5s get clusteroperators -o json + queue /tmp/artifacts/clusterversion.json oc --insecure-skip-tls-verify --request-timeout=5s get clusterversion -o json + queue /tmp/artifacts/configmaps.json oc --insecure-skip-tls-verify --request-timeout=5s get configmaps --all-namespaces -o json + queue /tmp/artifacts/csr.json oc --insecure-skip-tls-verify --request-timeout=5s get csr -o json + queue /tmp/artifacts/endpoints.json oc --insecure-skip-tls-verify --request-timeout=5s get endpoints --all-namespaces -o json + queue /tmp/artifacts/deployments.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get deployments --all-namespaces -o json + queue /tmp/artifacts/daemonsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get daemonsets --all-namespaces -o json + queue /tmp/artifacts/events.json oc --insecure-skip-tls-verify --request-timeout=5s get events --all-namespaces -o json + queue /tmp/artifacts/kubeapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get kubeapiserver -o json + queue /tmp/artifacts/kubecontrollermanager.json oc --insecure-skip-tls-verify --request-timeout=5s get kubecontrollermanager -o json + queue /tmp/artifacts/machineconfigpools.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigpools -o json + queue /tmp/artifacts/machineconfigs.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigs -o json + queue /tmp/artifacts/namespaces.json oc --insecure-skip-tls-verify --request-timeout=5s get namespaces -o json + queue /tmp/artifacts/nodes.json oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o json + queue /tmp/artifacts/openshiftapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get openshiftapiserver -o json + queue /tmp/artifacts/pods.json oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces -o json + queue /tmp/artifacts/persistentvolumes.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumes --all-namespaces -o json + queue /tmp/artifacts/persistentvolumeclaims.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumeclaims --all-namespaces -o json + queue /tmp/artifacts/replicasets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get replicasets --all-namespaces -o json + queue /tmp/artifacts/rolebindings.json oc --insecure-skip-tls-verify --request-timeout=5s get rolebindings --all-namespaces -o json + queue /tmp/artifacts/roles.json oc --insecure-skip-tls-verify --request-timeout=5s get roles --all-namespaces -o json + queue /tmp/artifacts/services.json oc --insecure-skip-tls-verify --request-timeout=5s get services --all-namespaces -o json + queue /tmp/artifacts/statefulsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get statefulsets --all-namespaces -o json + + FILTER=gzip queue /tmp/artifacts/openapi.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get --raw /openapi/v2 + + # gather nodes first in parallel since they may contain the most relevant debugging info + while IFS= read -r i; do + mkdir -p /tmp/artifacts/nodes/$i + queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap + done < /tmp/nodes + + FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false + FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false + + # Snapshot iptables-save on each node for debugging possible kube-proxy issues + oc --insecure-skip-tls-verify get --request-timeout=20s -n openshift-sdn -l app=sdn pods --template '{{ range .items }}{{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/sdn-pods + while IFS= read -r i; do + queue /tmp/artifacts/network/iptables-save-$i oc --insecure-skip-tls-verify rsh --timeout=20 -n openshift-sdn -c sdn $i iptables-save -c + done < /tmp/sdn-pods + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + queue /tmp/artifacts/metrics/${file}-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' + queue /tmp/artifacts/metrics/${file}-controllers-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' + done < /tmp/pods-api + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + FILTER=gzip queue /tmp/artifacts/pods/${file}.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s $i + FILTER=gzip queue /tmp/artifacts/pods/${file}_previous.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s -p $i + done < /tmp/containers + + echo "Snapshotting prometheus (may take 15s) ..." + queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus . + + echo "Waiting for logs ..." + wait + + #We set OVIRT_CONFIG and insert he path to the engine ca to the config file + export OVIRT_CONFIG=/tmp/artifacts/installer/ovirt-config.yaml + curl -k -o "/tmp/artifacts/installer/ovirt-engine.ca" ${OVIRT_ENGINE_URL::-4}/services/pki-resource?resource=ca-certificate + sed 's|ovirt_cafile: ""|ovirt_cafile: /tmp/artifacts/installer/ovirt-engine.ca|' -i /tmp/artifacts/installer/ovirt-config.yaml + + echo "Destroy cluster ..." + openshift-install --dir /tmp/artifacts/installer destroy cluster + } + + trap 'teardown; exit $DELETE_FAIL' EXIT + trap 'kill $(jobs -p); exit 0' TERM + + for i in $(seq 1 180); do + if [[ -f /tmp/shared/exit ]]; then + exit 0 + fi + sleep 60 & wait + done From 898be6a8468372f509ef67295535b2a0684ca056 Mon Sep 17 00:00:00 2001 From: Gal Zaidman Date: Sun, 5 Jan 2020 17:51:18 +0200 Subject: [PATCH 3/5] Add presubmits and periodics-4.4 jobs Signed-off-by: Gal Zaidman --- ...openshift-installer-master-presubmits.yaml | 65 +++++++++++++++ ...enshift-release-release-4.4-periodics.yaml | 83 +++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml index 09a3677b92853..dc57fb96c22f6 100644 --- a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml +++ b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml @@ -1453,6 +1453,71 @@ presubmits: name: prow-job-cluster-launch-installer-openstack-e2e name: job-definition trigger: (?m)^/test( | .* )e2e-openstack-parallel,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - master + context: ci/prow/e2e-ovirt + decorate: true + decoration_config: + skip_cloning: true + labels: + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-openshift-installer-master-e2e-ovirt + optional: true + rerun_command: /test e2e-ovirt + run_if_changed: ^([^d]|d(d|o(d|cd))*([^do]|o([^cd]|c[^ds])))*(d(d|o(d|cd))*(oc?)?)?$ + spec: + containers: + - args: + - --artifact-dir=$(ARTIFACTS) + - --give-pr-author-access-to-namespace=true + - --secret-dir=/usr/local/e2e-ovirt-cluster-profile + - --target=e2e-ovirt + - --template=/usr/local/e2e-ovirt + command: + - ci-operator + env: + - name: CLUSTER_TYPE + value: ovirt + - name: CONFIG_SPEC + valueFrom: + configMapKeyRef: + key: openshift-installer-master.yaml + name: ci-operator-master-configs + - name: JOB_NAME_SAFE + value: e2e-ovirt + - name: LEASE_STATE + value: minimal + - name: TEST_COMMAND + value: run-minimal-tests + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /usr/local/e2e-ovirt-cluster-profile + name: cluster-profile + - mountPath: /usr/local/e2e-ovirt + name: job-definition + subPath: cluster-launch-installer-ovirt-e2e.yaml + serviceAccountName: ci-operator + volumes: + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-ovirt + - secret: + name: ovirt-infra-secrets + - configMap: + name: cluster-profile-ovirt + - configMap: + name: prow-job-cluster-launch-installer-ovirt-e2e + name: job-definition + trigger: (?m)^/test( | .* )e2e-ovirt,?($|\s.*) - agent: kubernetes always_run: false branches: diff --git a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml index f472dd27c7652..e7631cad43d97 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml @@ -4992,3 +4992,86 @@ periodics: - name: pull-secret secret: secretName: ci-pull-credentials +- agent: kubernetes + decorate: true + decoration_config: + skip_cloning: true + interval: 24h + labels: + ci.openshift.io/release-type: informing + job-env: ovirt + job-release: "4.4" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: release-openshift-ocp-installer-e2e-ovirt-4.4 + spec: + containers: + - args: + - --artifact-dir=$(ARTIFACTS) + - --give-pr-author-access-to-namespace=true + - --secret-dir=/usr/local/pull-secret + - --secret-dir=/usr/local/e2e-ovirt-cluster-profile + - --target=e2e-ovirt + - --template=/usr/local/e2e-ovirt + - --input-hash=$(BUILD_ID) + - --input-hash=$(JOB_NAME) + command: + - ci-operator + env: + - name: RELEASE_IMAGE_LATEST + value: registry.svc.ci.openshift.org/ocp/release:4.4 + - name: BRANCH + value: "4.4" + - name: CLUSTER_TYPE + value: ovirt + - name: LEASE_STATE + value: conformance + - name: CONFIG_SPEC + value: | + tag_specification: + name: "$(BRANCH)" + namespace: ocp + resources: + '*': + limits: + memory: 4Gi + requests: + cpu: 100m + memory: 200Mi + tests: + - as: e2e-$(CLUSTER_TYPE)-parallel + commands: TEST_SUITE=openshift/conformance/parallel run-tests + openshift_installer: + cluster_profile: "$(CLUSTER_TYPE)" + - name: JOB_NAME_SAFE + value: e2e-ovirt + - name: TEST_COMMAND + value: TEST_SUITE=openshift/conformance/parallel run-tests + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /usr/local/e2e-ovirt-cluster-profile + name: cluster-profile + - mountPath: /usr/local/e2e-ovirt + name: job-definition + subPath: cluster-launch-installer-ovirt-e2e.yaml + - mountPath: /usr/local/pull-secret + name: pull-secret + serviceAccountName: ci-operator + volumes: + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-ovirt + - secret: + name: ovirt-infra-secrets + - configMap: + name: prow-job-cluster-launch-installer-ovirt-e2e + name: job-definition + - name: pull-secret + secret: + secretName: ci-pull-credentials From dd5a4c4a675849665950945335d2466acc2d1001 Mon Sep 17 00:00:00 2001 From: Gal Zaidman Date: Sun, 5 Jan 2020 17:52:00 +0200 Subject: [PATCH 4/5] Add ovirt to release Signed-off-by: Gal Zaidman --- .../release-controller/_releases/release-ocp-4.4.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core-services/release-controller/_releases/release-ocp-4.4.json b/core-services/release-controller/_releases/release-ocp-4.4.json index f1f61b098bc18..4eeaa304b4872 100644 --- a/core-services/release-controller/_releases/release-ocp-4.4.json +++ b/core-services/release-controller/_releases/release-ocp-4.4.json @@ -97,6 +97,10 @@ "azure-ovn":{ "optional":true, "prowJob":{"name":"release-openshift-ocp-installer-e2e-azure-ovn-4.4"} + }, + "ovirt":{ + "optional":true, + "prowJob":{"name":"release-openshift-ocp-installer-e2e-ovirt-4.4"} } } } From 5c79bfa2718f92c575c2e8daafffe92d40b967d5 Mon Sep 17 00:00:00 2001 From: Gal Zaidman Date: Wed, 8 Jan 2020 14:21:42 +0200 Subject: [PATCH 5/5] Lease handling for conformance tests suite To better utilize our resources we determined that run-minimal test will create a cluster with half the resources needed for conformance. This patch introduces the logic for acquiring 2 leases for conformance and one for minimal suite. Notice that at the moment since we have separate ovirt clusters we needed to make sure that the first and second lease will use the same ovirt cluster. Signed-off-by: Gal Zaidman --- ...openshift-installer-master-presubmits.yaml | 2 +- ...enshift-release-release-4.4-periodics.yaml | 2 +- .../cluster-launch-installer-ovirt-e2e.yaml | 54 ++++++++++++++----- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml index dc57fb96c22f6..19375f2ec0782 100644 --- a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml +++ b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml @@ -1487,7 +1487,7 @@ presubmits: name: ci-operator-master-configs - name: JOB_NAME_SAFE value: e2e-ovirt - - name: LEASE_STATE + - name: LEASE_TYPE value: minimal - name: TEST_COMMAND value: run-minimal-tests diff --git a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml index e7631cad43d97..f5bfb83c4c91d 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml @@ -5023,7 +5023,7 @@ periodics: value: "4.4" - name: CLUSTER_TYPE value: ovirt - - name: LEASE_STATE + - name: LEASE_TYPE value: conformance - name: CONFIG_SPEC value: | diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml index 54481a53154d8..cfcc3c8589647 100644 --- a/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml @@ -16,7 +16,7 @@ parameters: value: ovirt - name: TEST_COMMAND required: true -- name: LEASE_STATE +- name: LEASE_TYPE required: true - name: RELEASE_IMAGE_LATEST required: true @@ -118,23 +118,44 @@ objects: echo "$( jq ."${1}" --raw-output "${2}" )" } + function acquire_lease() { + resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-quota-slice" --state "free" --target-state "leased" --timeout 150m )" + resource_name="$(echo "${resource}"|jq .name --raw-output)" + lease_path="/etc/openshift-installer/${resource_name}.json" + ovirt_engine_template_name="$(extract_leases_info ovirt_engine_template_name ${lease_path})" + if [ "${LEASE_TYPE}" == "conformance" ]; then + bm_name="$(extract_leases_info ovirt_engine_cluster_bm ${lease_path})" + conformance_resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-${bm_name}" --state "free" --target-state "leased" --timeout 150m )" + conformance_resource_name="$(echo "${conformance_resource}"|jq .name --raw-output)" + worker_cpu=8 + worker_mem=16384 + master_cpu=8 + master_mem=16384 + fi + if [ "${LEASE_TYPE}" == "minimal" ]; then + ovirt_engine_template_name="${ovirt_engine_template_name}-8G" + worker_cpu=4 + worker_mem=8192 + master_cpu=4 + master_mem=8192 + fi + } + echo "[INFO] Acquiring a lease ..." - resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-quota-slice" --state "${LEASE_STATE}" --target-state "${LEASE_STATE}-leased" --timeout 150m )" - resource_name="$(echo "${resource}"|jq .name --raw-output)" - lease_path="/etc/openshift-installer/${resource_name}.json" - + acquire_lease + #Saving parameters for the env cat > /tmp/shared/ovirt-lease.conf <