From 05547be75b1774984c2f92587fd7353de8891794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Gull=C3=B3n?= Date: Fri, 5 Sep 2025 08:11:57 +0200 Subject: [PATCH] add new releases scenario: optional, osconfig and tuned --- Makefile.kube_git.var | 4 +- Makefile.version.aarch64.var | 2 +- Makefile.version.x86_64.var | 2 +- .../roles/add-kubelet-logging/tasks/main.yml | 58 +- ansible/roles/common/tasks/boot.yml | 28 +- ansible/roles/common/tasks/disk.yml | 4 +- .../configure-firewall/defaults/main.yml | 1 + .../roles/configure-firewall/tasks/main.yml | 62 +- .../roles/fetch-kubeconfig/defaults/main.yml | 2 +- ansible/roles/fetch-kubeconfig/tasks/main.yml | 71 +- .../roles/install-logging/defaults/main.yml | 1 + .../files/microshift_perf.json | 1743 ++++++++++++++++- ansible/roles/install-logging/tasks/main.yml | 84 +- ansible/roles/manage-repos/defaults/main.yml | 2 +- .../manage-repos/tasks/create-mirrors.yaml | 7 +- ansible/roles/manage-repos/tasks/main.yml | 22 +- ansible/roles/microshift-start/files/ready.sh | 48 +- ansible/roles/microshift-start/tasks/main.yml | 4 +- ansible/roles/run-workloads/defaults/main.yml | 23 +- .../roles/run-workloads/tasks/kube-burner.yml | 12 +- .../setup-microshift-host/defaults/main.yml | 3 +- .../setup-microshift-host/tasks/main.yml | 29 +- .../setup-microshift-host/tasks/storage.yml | 29 + .../templates/lvmd.yaml.j2 | 7 + ansible/vars/microshift_versions.yml | 20 + .../csi_controller_deployment.yaml | 1 + .../multus/kustomization.aarch64.yaml | 4 +- .../multus/kustomization.x86_64.yaml | 4 +- .../multus/release-multus-aarch64.json | 6 +- .../multus/release-multus-x86_64.json | 6 +- .../openshift-dns/dns/daemonset.yaml | 10 + .../node-resolver/daemonset.yaml | 2 +- .../node-resolver/update-node-resolver.sh | 2 +- .../openshift-router/deployment.yaml | 2 +- assets/core/kubelet-client-ca.yaml | 8 + .../cert-manager/manager/kustomization.yaml | 4 +- .../cert-manager/manager/manager.yaml | 12 +- .../release-cert-manager-aarch64.json | 12 +- .../release-cert-manager-x86_64.json | 12 +- .../kube-proxy/kustomization.aarch64.yaml | 2 +- .../kube-proxy/kustomization.x86_64.yaml | 2 +- .../release-kube-proxy-aarch64.json | 4 +- .../kube-proxy/release-kube-proxy-x86_64.json | 4 +- ...000_50_olm_07-olm-operator.deployment.yaml | 5 + ...50_olm_08-catalog-operator.deployment.yaml | 5 + .../kustomization.aarch64.yaml | 10 +- .../kustomization.x86_64.yaml | 10 +- .../release-olm-aarch64.json | 8 +- .../release-olm-x86_64.json | 8 +- assets/release/release-aarch64.json | 18 +- assets/release/release-x86_64.json | 18 +- ...hboard-microshift-telemetry.configmap.yaml | 1425 ++++++++++++++ .../kubernetes/CHANGELOG/CHANGELOG-1.33.md | 234 ++- .../cmd/kubeadm/app/util/etcd/etcd.go | 9 + .../cmd/k8s-tests-ext/disabled_tests.go | 20 +- .../k8s-tests-ext/environment_selectors.go | 28 + .../cmd/k8s-tests-ext/labels.go | 15 - .../generated/zz_generated.annotations.go | 12 +- .../images/hyperkube/Dockerfile.rhel | 2 +- .../openshift-hack/test-kubernetes-e2e.sh | 2 +- .../authentication/validate_authentication.go | 324 ++- .../validate_authentication_test.go | 536 ++++- .../performantsecuritypolicy/admission.go | 179 ++ .../admission_test.go | 225 +++ .../pkg/controller/job/job_controller.go | 6 + .../pkg/features/openshift_features.go | 5 + .../kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS | 1 + .../kubelet/allocation/allocation_manager.go | 18 +- .../allocation/allocation_manager_test.go | 42 +- .../pkg/kubelet/allocation/state/state.go | 1 + .../allocation/state/state_checkpoint.go | 13 +- .../pkg/kubelet/allocation/state/state_mem.go | 8 + .../kubelet/apis/podresources/server_v1.go | 13 +- .../apis/podresources/server_v1_test.go | 157 ++ .../podresources/testing/pods_provider.go | 47 + .../pkg/kubelet/apis/podresources/types.go | 1 + .../pkg/kubelet/images/image_gc_manager.go | 3 + .../kubernetes/pkg/kubelet/kubelet.go | 18 +- .../kuberuntime_container_linux.go | 2 +- .../kuberuntime/kuberuntime_sandbox_linux.go | 2 +- .../kubernetes/pkg/kubelet/managed/managed.go | 28 +- .../pkg/kubelet/managed/managed_test.go | 228 ++- .../pkg/registry/batch/job/strategy.go | 3 +- .../pkg/registry/batch/job/strategy_test.go | 30 + .../apiserver/schema/cel/compilation_test.go | 12 + .../schema/cel/model/schemas_test.go | 41 +- .../apiserver/pkg/cel/common/schemas.go | 28 +- .../storage/cacher/cacher_whitebox_test.go | 120 ++ .../apiserver/pkg/storage/cacher/delegator.go | 19 +- .../apiserver/pkg/util/webhook/webhook.go | 1 + .../pkg/util/webhook/webhook_test.go | 59 + .../test/e2e/common/node/lifecycle_hook.go | 136 +- .../test/e2e/framework/pod/resize.go | 43 +- .../e2e/storage/testsuites/provisioning.go | 28 +- .../testsuites/volume_group_snapshottable.go | 2 +- .../csi-hostpath-plugin.yaml | 2 +- .../run_group_snapshot_e2e.sh | 8 +- .../storage-csi/gce-pd/controller_ss.yaml | 2 +- .../hostpath/csi-hostpath-plugin.yaml | 2 +- .../mock/csi-mock-driver-snapshotter.yaml | 2 +- .../cel/validatingadmissionpolicy_test.go | 1 + .../test/integration/job/job_test.go | 120 ++ docs/user/howto_metrics_server.md | 113 ++ etcd/cmd/microshift-etcd/run.go | 8 + etcd/go.mod | 28 +- etcd/go.sum | 24 +- .../v3/etcdserver/api/rafthttp/pipeline.go | 2 +- .../etcd/server/v3/etcdserver/raft.go | 11 +- .../go.etcd.io/etcd/server/v3/wal/wal.go | 38 +- etcd/vendor/modules.txt | 40 +- go.mod | 58 +- go.sum | 12 +- packaging/blueprint/blueprint.toml.template | 16 +- .../crio.conf.d/10-microshift_amd64.conf | 2 +- .../crio.conf.d/10-microshift_arm64.conf | 2 +- ...croshift-running-check-ai-model-serving.sh | 38 - .../microshift-running-check-cert-manager.sh | 37 - .../microshift-running-check-gateway-api.sh | 37 - .../microshift-running-check-multus.sh | 37 - .../greenboot/microshift-running-check-olm.sh | 37 - packaging/rpm/microshift.spec | 27 +- packaging/systemd/firewalld-no-iptables.conf | 3 - pkg/cmd/healthcheck.go | 5 - pkg/cmd/run.go | 1 + pkg/controllers/kubelet-ca-manager.go | 232 +++ pkg/healthcheck/debug_info.go | 128 +- pkg/healthcheck/debug_info_test.go | 217 ++ pkg/healthcheck/healthcheck.go | 4 + pkg/healthcheck/microshift_core_workloads.go | 7 + .../microshift_optional_workloads.go | 51 + pkg/healthcheck/workloads.go | 247 ++- robocop.toml | 22 + .../advisory_publication_report.py | 284 ++- scripts/auto-rebase/assets.yaml | 2 + scripts/auto-rebase/changelog.txt | 6 + scripts/auto-rebase/commits.txt | 54 +- scripts/auto-rebase/last_rebase.sh | 2 +- ...11-ingress-deployment-access-logging.patch | 2 +- ...nd-cpu-partitioning-admission-plugin.patch | 37 +- .../tests/08-test-caikit-tgis.sh | 77 + .../tests/caikit-tgis/010-minio.yaml | 39 + .../011-minio-connection-secret.yaml | 12 + .../tests/caikit-tgis/012-minio-sa.yaml | 6 + .../tests/caikit-tgis/020-inference-svc.yaml | 16 + .../tests/caikit-tgis/021-route.yaml | 13 + scripts/microshift-sos-report.sh | 45 +- scripts/multinode/configure-sec.sh | 8 +- scripts/verify/verify-rf.sh | 9 +- test/README.md | 7 +- .../fake-serial-communication.py | 11 +- .../generic-device-plugin/fuse-test-pod.yaml | 19 + test/bin/common_versions.sh | 20 +- test/bin/scenario.sh | 6 + ...-bootc-brew-y2-with-optional.containerfile | 1 + ...-bootc-brew-ec-with-optional.containerfile | 18 + ...c-brew-nightly-with-optional.containerfile | 18 + ...-bootc-brew-rc-with-optional.containerfile | 17 + ...-bootc-brew-y1-with-optional.containerfile | 1 + ...-brew-zstream-with-optional.containerfile} | 18 + .../rhel96-bootc-source-gitops.containerfile | 11 + .../cos9-bootc-source-fips.containerfile | 13 - .../group2/rhel94-brew-y2-with-optionals.toml | 1 + .../group3/rhel96-brew-y1-with-optionals.toml | 1 + .../group4/rhel96-brew-ec-with-optionals.toml | 5 + .../rhel96-brew-nightly-with-optionals.toml | 5 + .../group4/rhel96-brew-rc-with-optionals.toml | 5 + ...> rhel96-brew-zstream-with-optionals.toml} | 5 + .../group1/rhel96-lrel-optionals-tuned.toml | 125 ++ .../microshift-gitops.repo | 10 + test/requirements.txt | 11 +- test/resources/common.resource | 4 +- test/resources/fault-tests.resource | 2 +- test/resources/kubeconfig.resource | 4 +- test/resources/microshift-config.resource | 12 +- .../microshift-etcd-process.resource | 2 +- test/resources/microshift-host.resource | 2 +- test/resources/microshift-network.resource | 13 +- test/resources/microshift-process.resource | 2 +- test/resources/microshift-rpm.resource | 4 +- test/resources/multus.resource | 2 +- test/resources/oc.resource | 4 +- test/resources/offline.resource | 12 +- test/resources/openssl.resource | 6 +- test/resources/ostree-health.resource | 4 +- test/resources/ostree.resource | 4 +- test/resources/selinux.resource | 4 +- test/resources/sos-on-failure-listener.py | 72 + test/resources/systemd.resource | 4 +- .../periodics/cos9-src@fips.sh | 29 - ...> el96-crel@optional-sigstore.sh.disabled} | 0 .../el96-prel@el96-crel@upgrade-ok.sh | 2 +- .../periodics/el96-src@gitops.sh | 17 + .../releases/el94-y2@el96-lrel@standard1.sh | 39 + .../releases/el94-y2@el96-lrel@standard2.sh | 38 + .../el96-lrel@ai-model-serving-online.sh | 36 + .../releases/el96-lrel@dual-stack.sh | 24 + .../releases/el96-lrel@ipv6.sh | 29 + .../releases/el96-lrel@multi-nic.sh | 24 + .../releases/el96-lrel@optional.sh | 28 + .../releases/el96-lrel@osconfig.sh | 18 + .../releases/el96-lrel@router.sh | 19 + .../releases/el96-lrel@standard1.sh | 18 + .../releases/el96-lrel@standard2.sh | 16 + .../releases/el96-lrel@storage.sh | 16 + .../releases/el96-lrel@telemetry.sh | 21 + .../releases/el96-y1@el96-lrel@standard1.sh | 39 + .../releases/el96-y1@el96-lrel@standard2.sh | 38 + ...> el96-crel@optional-sigstore.sh.disabled} | 0 .../presubmits/el96-src@low-latency.sh | 2 +- .../el94-yminus2@el96-lrel@standard1.sh | 24 + .../el94-yminus2@el96-lrel@standard2.sh | 23 + test/scenarios/releases/el96-lrel@backups.sh | 16 + .../releases/el96-lrel@dual-stack.sh | 23 + test/scenarios/releases/el96-lrel@ipv6.sh | 25 + .../releases/el96-lrel@low-latency.sh | 42 + .../scenarios/releases/el96-lrel@multi-nic.sh | 24 + test/scenarios/releases/el96-lrel@optional.sh | 36 + test/scenarios/releases/el96-lrel@osconfig.sh | 18 + test/scenarios/releases/el96-lrel@router.sh | 19 + .../scenarios/releases/el96-lrel@standard1.sh | 18 + .../scenarios/releases/el96-lrel@standard2.sh | 16 + test/scenarios/releases/el96-lrel@storage.sh | 16 + .../el96-yminus1@el96-lrel@standard1.sh | 24 + .../el96-yminus1@el96-lrel@standard2.sh | 23 + .../ai-model-serving-offline.robot | 8 +- .../ai-model-serving-online.robot | 14 +- test/suites/backup/auto-recovery-extra.robot | 2 +- test/suites/backup/auto-recovery.robot | 6 +- .../backup/backup-restore-on-reboot.robot | 2 +- test/suites/fault-tests/outages.robot | 4 +- test/suites/fips/validate-fips.robot | 16 +- test/suites/gitops/gitops.robot | 23 + test/suites/gitops/test-deployment.yaml | 19 + test/suites/greenboot/greenboot.robot | 2 +- test/suites/ipv6/dualstack.robot | 16 +- test/suites/logscan/log-scan.robot | 4 +- test/suites/network/multi-nic.robot | 6 +- test/suites/network/offline.robot | 10 +- test/suites/optional/cert-manager.robot | 2 +- test/suites/optional/gateway-api.robot | 30 +- .../optional/generic-device-plugin.robot | 186 +- test/suites/optional/multus.robot | 6 +- test/suites/optional/observability.robot | 10 +- test/suites/optional/olm.robot | 2 +- test/suites/optional/strings.py | 57 + test/suites/osconfig/clusterid.robot | 2 +- test/suites/router/router.robot | 7 +- .../rpm/install-and-upgrade-successful.robot | 8 +- test/suites/standard1/containers-policy.robot | 4 +- test/suites/standard1/hostname.robot | 2 +- test/suites/standard1/kustomize.robot | 36 +- .../tls-configuration.robot | 22 +- test/suites/standard1/version.robot | 14 +- test/suites/standard2/configuration.robot | 2 +- .../validate-certificate-rotation.robot | 4 +- .../validate-custom-certificates.robot | 27 +- test/suites/storage/pvc-resize.robot | 2 +- test/suites/storage/reboot.robot | 2 +- test/suites/storage/snapshot.robot | 2 +- .../storage/storage-version-migration.robot | 2 +- test/suites/telemetry/telemetry.robot | 9 +- test/suites/tuned/profile.robot | 2 +- test/suites/tuned/workload-partitioning.robot | 16 +- test/suites/upgrade/upgrade-multus.robot | 1 - .../apiserver/pkg/cel/common/schemas.go | 28 +- .../apiserver/pkg/storage/cacher/delegator.go | 19 +- .../apiserver/pkg/util/webhook/webhook.go | 1 + .../authentication/validate_authentication.go | 324 ++- .../pkg/controller/job/job_controller.go | 6 + .../pkg/features/openshift_features.go | 5 + .../kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS | 1 + .../kubelet/allocation/allocation_manager.go | 18 +- .../pkg/kubelet/allocation/state/state.go | 1 + .../allocation/state/state_checkpoint.go | 13 +- .../pkg/kubelet/allocation/state/state_mem.go | 8 + .../kubelet/apis/podresources/server_v1.go | 13 +- .../pkg/kubelet/apis/podresources/types.go | 1 + .../pkg/kubelet/images/image_gc_manager.go | 3 + .../k8s.io/kubernetes/pkg/kubelet/kubelet.go | 18 +- .../kuberuntime_container_linux.go | 2 +- .../kuberuntime/kuberuntime_sandbox_linux.go | 2 +- .../kubernetes/pkg/kubelet/managed/managed.go | 28 +- .../pkg/registry/batch/job/strategy.go | 3 +- vendor/modules.txt | 64 +- 284 files changed, 9854 insertions(+), 1384 deletions(-) create mode 100644 ansible/roles/setup-microshift-host/tasks/storage.yml create mode 100644 ansible/roles/setup-microshift-host/templates/lvmd.yaml.j2 create mode 100644 ansible/vars/microshift_versions.yml create mode 100644 assets/core/kubelet-client-ca.yaml create mode 100644 dashboards/grafana-dashboard-microshift-telemetry.configmap.yaml create mode 100644 deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission.go create mode 100644 deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission_test.go create mode 100644 docs/user/howto_metrics_server.md delete mode 100644 packaging/greenboot/microshift-running-check-ai-model-serving.sh delete mode 100755 packaging/greenboot/microshift-running-check-cert-manager.sh delete mode 100644 packaging/greenboot/microshift-running-check-gateway-api.sh delete mode 100755 packaging/greenboot/microshift-running-check-multus.sh delete mode 100755 packaging/greenboot/microshift-running-check-olm.sh delete mode 100644 packaging/systemd/firewalld-no-iptables.conf create mode 100644 pkg/controllers/kubelet-ca-manager.go create mode 100644 pkg/healthcheck/debug_info_test.go create mode 100644 pkg/healthcheck/microshift_optional_workloads.go create mode 100644 robocop.toml create mode 100755 scripts/ci-ai-model-serving/tests/08-test-caikit-tgis.sh create mode 100644 scripts/ci-ai-model-serving/tests/caikit-tgis/010-minio.yaml create mode 100644 scripts/ci-ai-model-serving/tests/caikit-tgis/011-minio-connection-secret.yaml create mode 100644 scripts/ci-ai-model-serving/tests/caikit-tgis/012-minio-sa.yaml create mode 100644 scripts/ci-ai-model-serving/tests/caikit-tgis/020-inference-svc.yaml create mode 100644 scripts/ci-ai-model-serving/tests/caikit-tgis/021-route.yaml create mode 100644 test/assets/generic-device-plugin/fuse-test-pod.yaml rename test/image-blueprints-bootc/layer1-base/group2/{rhel96-bootc-brew-y0-with-optional.containerfile => rhel96-bootc-brew-zstream-with-optional.containerfile} (73%) create mode 100644 test/image-blueprints-bootc/layer3-periodic/group2/rhel96-bootc-source-gitops.containerfile delete mode 100644 test/image-blueprints-bootc/layer3-periodic/group3/cos9-bootc-source-fips.containerfile rename test/image-blueprints/layer1-base/group4/{rhel96-brew-y0-with-optionals.toml => rhel96-brew-zstream-with-optionals.toml} (96%) create mode 100644 test/image-blueprints/layer2-presubmit/group1/rhel96-lrel-optionals-tuned.toml create mode 100644 test/package-sources-bootc/microshift-gitops.repo create mode 100644 test/resources/sos-on-failure-listener.py delete mode 100644 test/scenarios-bootc/periodics/cos9-src@fips.sh rename test/scenarios-bootc/periodics/{el96-crel@optional-sigstore.sh => el96-crel@optional-sigstore.sh.disabled} (100%) create mode 100644 test/scenarios-bootc/periodics/el96-src@gitops.sh create mode 100644 test/scenarios-bootc/releases/el94-y2@el96-lrel@standard1.sh create mode 100644 test/scenarios-bootc/releases/el94-y2@el96-lrel@standard2.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@ai-model-serving-online.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@dual-stack.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@ipv6.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@multi-nic.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@optional.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@osconfig.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@router.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@standard1.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@standard2.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@storage.sh create mode 100644 test/scenarios-bootc/releases/el96-lrel@telemetry.sh create mode 100644 test/scenarios-bootc/releases/el96-y1@el96-lrel@standard1.sh create mode 100644 test/scenarios-bootc/releases/el96-y1@el96-lrel@standard2.sh rename test/scenarios/periodics/{el96-crel@optional-sigstore.sh => el96-crel@optional-sigstore.sh.disabled} (100%) create mode 100644 test/scenarios/releases/el94-yminus2@el96-lrel@standard1.sh create mode 100644 test/scenarios/releases/el94-yminus2@el96-lrel@standard2.sh create mode 100644 test/scenarios/releases/el96-lrel@backups.sh create mode 100644 test/scenarios/releases/el96-lrel@dual-stack.sh create mode 100644 test/scenarios/releases/el96-lrel@ipv6.sh create mode 100644 test/scenarios/releases/el96-lrel@low-latency.sh create mode 100644 test/scenarios/releases/el96-lrel@multi-nic.sh create mode 100644 test/scenarios/releases/el96-lrel@optional.sh create mode 100644 test/scenarios/releases/el96-lrel@osconfig.sh create mode 100644 test/scenarios/releases/el96-lrel@router.sh create mode 100644 test/scenarios/releases/el96-lrel@standard1.sh create mode 100644 test/scenarios/releases/el96-lrel@standard2.sh create mode 100644 test/scenarios/releases/el96-lrel@storage.sh create mode 100644 test/scenarios/releases/el96-yminus1@el96-lrel@standard1.sh create mode 100644 test/scenarios/releases/el96-yminus1@el96-lrel@standard2.sh create mode 100644 test/suites/gitops/gitops.robot create mode 100644 test/suites/gitops/test-deployment.yaml rename test/suites/{standard2 => standard1}/tls-configuration.robot (93%) diff --git a/Makefile.kube_git.var b/Makefile.kube_git.var index a8c7023c94..8df1c08da0 100644 --- a/Makefile.kube_git.var +++ b/Makefile.kube_git.var @@ -1,5 +1,5 @@ KUBE_GIT_MAJOR=1 KUBE_GIT_MINOR=33 -KUBE_GIT_VERSION=v1.33.2 -KUBE_GIT_COMMIT=80b82ba5ae030c3e2b7d7752a204ddf71139842c +KUBE_GIT_VERSION=v1.33.3 +KUBE_GIT_COMMIT=a237dcbeee8bbe1440c94349399cc6eac8870910 KUBE_GIT_TREE_STATE=clean diff --git a/Makefile.version.aarch64.var b/Makefile.version.aarch64.var index 8a61704ed9..a459395efa 100644 --- a/Makefile.version.aarch64.var +++ b/Makefile.version.aarch64.var @@ -1 +1 @@ -OCP_VERSION := 4.20.0-0.nightly-arm64-2025-08-05-094808 +OCP_VERSION := 4.20.0-0.nightly-arm64-2025-09-01-210443 diff --git a/Makefile.version.x86_64.var b/Makefile.version.x86_64.var index 2478d7f028..12487a3b14 100644 --- a/Makefile.version.x86_64.var +++ b/Makefile.version.x86_64.var @@ -1 +1 @@ -OCP_VERSION := 4.20.0-0.nightly-2025-07-31-063120 +OCP_VERSION := 4.20.0-0.nightly-2025-09-01-101753 diff --git a/ansible/roles/add-kubelet-logging/tasks/main.yml b/ansible/roles/add-kubelet-logging/tasks/main.yml index 5725981273..835b365c2b 100644 --- a/ansible/roles/add-kubelet-logging/tasks/main.yml +++ b/ansible/roles/add-kubelet-logging/tasks/main.yml @@ -1,45 +1,48 @@ --- # add-kubelet-logging tasks -- name: check to ensure promdir target exists +- name: Check to ensure promdir target exists ansible.builtin.stat: path: "{{ prometheus_dir }}" register: promdir -- name: check if the file exists +- name: Check if the file exists ansible.builtin.stat: path: "{{ sa_token_file }}" register: token_file -- block: - - name: load sa-token file from localhost - ansible.builtin.slurp: - src: "{{ sa_token_file }}" - register: bearer_token_slurp - delegate_to: localhost +- name: Deal with metrics service account token file + when: token_file.stat.exists + block: + - name: Load sa-token file from localhost + ansible.builtin.slurp: + src: "{{ sa_token_file }}" + register: bearer_token_slurp + delegate_to: localhost - - name: decode bearer token - set_fact: - bearer_token: "{{ bearer_token_slurp.content | b64decode }}" + - name: Decode bearer token + ansible.builtin.set_fact: + bearer_token: "{{ bearer_token_slurp.content | b64decode }}" - - name: create metrics service account token file in prometheus folder - ansible.builtin.copy: - content: "{{ bearer_token }}" - dest: "{{ kubelet_auth_token_file }}" - when: promdir.stat.exists + - name: Create metrics service account token file in prometheus folder + ansible.builtin.copy: + content: "{{ bearer_token }}" + dest: "{{ kubelet_auth_token_file }}" + mode: '0644' + when: promdir.stat.exists - - name: remove the sa-token file - ansible.builtin.file: - path: "{{ sa_token_file }}" - state: absent - when: token_file.stat.exists + - name: Remove the sa-token file + ansible.builtin.file: + path: "{{ sa_token_file }}" + state: absent -- name: append kubelet scrape config target to prometheus config +- name: Append kubelet scrape config target to prometheus config ansible.builtin.blockinfile: path: "{{ prometheus_config }}" block: | # kubelet targets - - job_name: kubelet + {% for host in groups['microshift'] %} + - job_name: kubelet-{{ host }} scheme: https authorization: credentials_file: "{{ kubelet_auth_token_file }}" @@ -47,9 +50,9 @@ insecure_skip_verify: true static_configs: - targets: - - microshift-dev:10250 + - {{ hostvars[host].ansible_host }}:10250 - - job_name: kubelet cadvisor + - job_name: kubelet-{{ host }}-cadvisor scheme: https authorization: credentials_file: "{{ kubelet_auth_token_file }}" @@ -58,9 +61,10 @@ metrics_path: /metrics/cadvisor static_configs: - targets: - - microshift-dev:10250 + - {{ hostvars[host].ansible_host }}:10250 + {% endfor %} -- name: restart prometheus to pick up new target +- name: Restart prometheus to pick up new target ansible.builtin.systemd: state: restarted name: prometheus diff --git a/ansible/roles/common/tasks/boot.yml b/ansible/roles/common/tasks/boot.yml index e032272af4..afef3350f6 100644 --- a/ansible/roles/common/tasks/boot.yml +++ b/ansible/roles/common/tasks/boot.yml @@ -1,6 +1,11 @@ --- # common microshift boot start time script +# Include shared microshift version configuration +- name: Include shared microshift version configuration + include_vars: + file: "{{ playbook_dir }}/vars/microshift_versions.yml" + - block: - name: find microshift cleanup script ansible.builtin.find: @@ -16,13 +21,13 @@ microshift_cleanup_bin: "{{ find_cleanup.files[0].path if find_cleanup.files }}" - name: cleanup microshift data - become: yes + become: true ansible.builtin.shell: echo 1 | {{ microshift_cleanup_bin }} --all when: cleanup_microshift | default('false') | bool - block: - name: reboot machine - become: yes + become: true ansible.builtin.reboot: reboot_timeout: 600 @@ -31,9 +36,26 @@ seconds: 60 when: reboot | default('false') | bool +- name: Extract major version for pod counts + set_fact: + microshift_major_version: "{{ microshift_version.split('.')[:2] | join('.') }}" + +- name: Validate that version exists in config + fail: + msg: "Error: MicroShift version {{ microshift_major_version }} not found in microshift_versions dictionary. Please update the defaults/main.yml file." + when: microshift_major_version not in microshift_versions + +- name: Set expected pod counts + set_fact: + expected_pods: "{{ microshift_versions[microshift_major_version].expected_pods }}" + all_pods: "{{ microshift_versions[microshift_major_version].all_pods }}" + - name: run the microshift boot script ansible.builtin.script: - cmd: ready.sh + cmd: > + ready.sh + {{ expected_pods }} + {{ all_pods }} register: script_output - name: display script output diff --git a/ansible/roles/common/tasks/disk.yml b/ansible/roles/common/tasks/disk.yml index f74199c5a8..cb5a0e336d 100644 --- a/ansible/roles/common/tasks/disk.yml +++ b/ansible/roles/common/tasks/disk.yml @@ -13,10 +13,12 @@ - name: Add disk info to local file lineinfile: path: "{{ filename }}" - line: "{{ item }}" + line: "{{ disk_item }}" create: yes delegate_to: localhost with_items: - "{{ item }}:" - "{{ disk_usage.stdout_lines }}" + loop_control: + loop_var: disk_item diff --git a/ansible/roles/configure-firewall/defaults/main.yml b/ansible/roles/configure-firewall/defaults/main.yml index 69ee4bc9f1..d3fe1992d0 100644 --- a/ansible/roles/configure-firewall/defaults/main.yml +++ b/ansible/roles/configure-firewall/defaults/main.yml @@ -27,4 +27,5 @@ firewall_ports: firewall_trusted_cidr: - 10.42.0.0/16 - 169.254.169.1/32 + - fd01::/48 diff --git a/ansible/roles/configure-firewall/tasks/main.yml b/ansible/roles/configure-firewall/tasks/main.yml index 158642f612..47526d5d27 100644 --- a/ansible/roles/configure-firewall/tasks/main.yml +++ b/ansible/roles/configure-firewall/tasks/main.yml @@ -1,30 +1,44 @@ --- # configure-firewall tasks -- name: permit traffic in trusted zone from CIDR - ansible.posix.firewalld: - source: "{{ item }}" - state: enabled - immediate: yes - permanent: yes - zone: trusted - with_items: "{{ firewall_trusted_cidr }}" +- name: check firewalld service status + ansible.builtin.systemd: + name: firewalld + register: firewalld_status + ignore_errors: true -- name: permit traffic in public zone for services - ansible.posix.firewalld: - service: "{{ item }}" - state: enabled - immediate: yes - permanent: yes - zone: public - with_items: "{{ firewall_services }}" +- name: Configure firewall rules + when: firewalld_status.status.UnitFileState is not defined or firewalld_status.status.UnitFileState != 'masked' + block: + - name: permit traffic in trusted zone from CIDR + ansible.posix.firewalld: + source: "{{ item }}" + state: enabled + immediate: yes + permanent: yes + zone: trusted + with_items: "{{ firewall_trusted_cidr }}" -- name: permit traffic in public zone for ports - ansible.posix.firewalld: - port: "{{ item }}" - state: enabled - immediate: yes - permanent: yes - zone: public - with_items: "{{ firewall_ports }}" + - name: permit traffic in public zone for services + ansible.posix.firewalld: + service: "{{ item }}" + state: enabled + immediate: yes + permanent: yes + zone: public + with_items: "{{ firewall_services }}" + + - name: permit traffic in public zone for ports + ansible.posix.firewalld: + port: "{{ item }}" + state: enabled + immediate: yes + permanent: yes + zone: public + with_items: "{{ firewall_ports }}" + +- name: skip firewall configuration if masked + ansible.builtin.debug: + msg: "Firewalld is masked, skipping firewall configuration" + when: firewalld_status.status.UnitFileState is defined and firewalld_status.status.UnitFileState == 'masked' diff --git a/ansible/roles/fetch-kubeconfig/defaults/main.yml b/ansible/roles/fetch-kubeconfig/defaults/main.yml index 40145bb609..76579b1240 100644 --- a/ansible/roles/fetch-kubeconfig/defaults/main.yml +++ b/ansible/roles/fetch-kubeconfig/defaults/main.yml @@ -1,6 +1,6 @@ --- # install-microshift default vars -kubeconfig_directory: "{{ local_home }}/.kube" +kubeconfig_directory: "{{ lookup('env', 'HOME') }}/.kube" kubeconfig_local_file: "{{ kubeconfig_directory }}/config" kubeconfig_remote_file: "/var/lib/microshift/resources/kubeadmin/{{ ansible_fqdn }}/kubeconfig" diff --git a/ansible/roles/fetch-kubeconfig/tasks/main.yml b/ansible/roles/fetch-kubeconfig/tasks/main.yml index 928eb21e7a..d80ac2c9e0 100644 --- a/ansible/roles/fetch-kubeconfig/tasks/main.yml +++ b/ansible/roles/fetch-kubeconfig/tasks/main.yml @@ -1,39 +1,78 @@ --- # fetch-kubeconfig tasks -- name: set local kubeconfig path +- name: Set local kubeconfig path ansible.builtin.set_fact: - local_home: "{{ lookup('env', 'HOME') }}" + kubeconfig_dest_file: "{{ kubeconfig_directory }}/kubeconfig.{{ inventory_hostname }}" -- name: check if kubeconfig directory exists on local machine +- name: Check if kubeconfig directory exists on local machine ansible.builtin.stat: path: "{{ kubeconfig_directory }}" delegate_to: localhost register: kubeconfig_directory_check -- name: create .kube folder in home directory if missing +- name: Create .kube folder in home directory if missing ansible.builtin.file: path: "{{ kubeconfig_directory }}" state: directory delegate_to: localhost when: not kubeconfig_directory_check.stat.exists -- name: check a kubeconfig file exists on local machine +- name: Fetch remote kubeconfig to a host-specific file (always overwrites) + become: yes + ansible.builtin.fetch: + src: "{{ kubeconfig_remote_file }}" + dest: "{{ kubeconfig_dest_file }}" + flat: yes + force: yes + +- name: Check if a default kubeconfig file exists on local machine ansible.builtin.stat: path: "{{ kubeconfig_local_file }}" delegate_to: localhost register: kubeconfig_local_file_check -- name: backup existing kubeconfig_local_file - ansible.builtin.copy: - src: "{{ kubeconfig_local_file }}" - dest: "{{ kubeconfig_local_file }}.old" +- name: Create/overwrite symbolic link if default is not a regular file + ansible.builtin.file: + src: "{{ kubeconfig_dest_file }}" + dest: "{{ kubeconfig_local_file }}" + state: link + force: yes delegate_to: localhost - when: kubeconfig_local_file_check.stat.exists + when: not kubeconfig_local_file_check.stat.exists or (kubeconfig_local_file_check.stat.islnk | default(false)) -- name: try to fetch remote kubeconfig to local - become: yes - ansible.builtin.fetch: - src: "{{ kubeconfig_remote_file }}" - dest: "{{ kubeconfig_local_file }}" - flat: yes +- name: Notify user if default kubeconfig is a regular file and was not overwritten + ansible.builtin.debug: + msg: | + The default kubeconfig at '{{ kubeconfig_local_file }}' is a regular file and was not overwritten. + A new kubeconfig has been saved to '{{ kubeconfig_dest_file }}'. + Please update your KUBECONFIG environment variable or switch contexts manually. + delegate_to: localhost + when: kubeconfig_local_file_check.stat.exists and (kubeconfig_local_file_check.stat.isreg | default(false)) + +- name: Verify connectivity to MicroShift cluster + block: + - name: Test cluster connectivity with oc + ansible.builtin.command: oc get nodes --kubeconfig={{ kubeconfig_dest_file }} + delegate_to: localhost + register: oc_test + changed_when: false + failed_when: false + + - name: Display cluster connectivity status + ansible.builtin.debug: + msg: "Successfully connected to MicroShift cluster - Node is ready" + delegate_to: localhost + when: oc_test.rc == 0 + + - name: Fail if cluster is not accessible + ansible.builtin.fail: + msg: | + Failed to connect to MicroShift cluster! + Error: {{ oc_test.stderr | default('Unknown error') }} + + Please check: + 1. Port 6443 is accessible from this host (firewall/iptables rules) + 2. MicroShift service is running on the remote host + 3. The kubeconfig file is valid + when: oc_test.rc != 0 diff --git a/ansible/roles/install-logging/defaults/main.yml b/ansible/roles/install-logging/defaults/main.yml index 7cfe2a7241..fd67ef2f73 100644 --- a/ansible/roles/install-logging/defaults/main.yml +++ b/ansible/roles/install-logging/defaults/main.yml @@ -12,6 +12,7 @@ logging_services: grafana_setup: false grafana_username: admin grafana_password: admin +grafana_host_address: "192.168.1.100" grafana_port: 3000 prometheus_port: 9091 diff --git a/ansible/roles/install-logging/files/microshift_perf.json b/ansible/roles/install-logging/files/microshift_perf.json index ae75af46d0..24b9828b13 100644 --- a/ansible/roles/install-logging/files/microshift_perf.json +++ b/ansible/roles/install-logging/files/microshift_perf.json @@ -59,10 +59,23 @@ "links": [], "liveNow": false, "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 25, + "panels": [], + "title": "Node Metrics", + "type": "row" + }, { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -76,7 +89,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -112,23 +125,277 @@ "value": 80 } ] - } + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (mode)(irate(node_cpu_seconds_total{job=~\".*\"}[$interval]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Node CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 13, + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "node_memory_Active_bytes", + "legendFormat": "Active", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "node_memory_MemTotal_bytes", + "hide": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "node_memory_Cached_bytes + node_memory_Buffers_bytes", + "hide": false, + "legendFormat": "Cached + Buffers", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "node_memory_MemAvailable_bytes", + "hide": false, + "legendFormat": "Available", + "range": true, + "refId": "D" + } + ], + "title": "Node Memory", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, "w": 24, "x": 0, - "y": 0 + "y": 9 + }, + "id": 14, + "panels": [], + "title": "Process Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 10 }, "id": 2, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -142,7 +409,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "topk(20, sum by(groupname) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"conmon|crio|coredns|csi.*|haproxy|kube-rbac-proxy|livenessprobe|lvmd|microshift|openshift-route|ovn.*|ovs.*|service-ca-oper|topolvm.*\"}[$__interval]))) * 100", + "expr": "topk(20, sum by(groupname) (irate(namedprocess_namegroup_cpu_seconds_total{groupname!~\"auditd|bash|crond|dbus.*|dnf|sshd|systemd.*|process-.*|prometheus.*|vnstat.*\"}[$interval]))) > 0", "legendFormat": "__auto", "range": true, "refId": "A" @@ -168,7 +435,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -204,23 +471,30 @@ "value": 80 } ] - } + }, + "unit": "decmbytes" }, "overrides": [] }, "gridPos": { - "h": 13, - "w": 24, - "x": 0, - "y": 13 + "h": 12, + "w": 12, + "x": 12, + "y": 10 }, "id": 4, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -234,7 +508,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "topk(10, sum by(groupname) (namedprocess_namegroup_memory_bytes{groupname=~\"conmon|crio|coredns|csi.*|haproxy|kube-rbac-proxy|livenessprobe|lvmd|microshift|openshift-route|ovn.*|ovs.*|service-ca-oper|topolvm.*\",memtype=\"resident\"}) / 1024 / 1024)", + "expr": "topk(12, sum by(groupname) (namedprocess_namegroup_memory_bytes{groupname!~\"auditd|bash|crond|dbus.*|sshd|systemd.*|process-.*|prometheus.*|vnstat.*\",memtype=\"resident\"}) / 1024 / 1024)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -260,7 +534,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -296,23 +570,30 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 26 + "y": 22 }, "id": 6, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -326,7 +607,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"ov.*\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"ov.*\"}[$interval]))", "legendFormat": "cni", "range": true, "refId": "A" @@ -337,7 +618,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"csi.*|topo.*|lvmd|livenessprobe\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"csi.*|topo.*|lvmd|livenessprobe\"}[$interval]))", "hide": false, "legendFormat": "csi", "range": true, @@ -349,7 +630,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"haproxy|openshift-route\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"haproxy|openshift-route\"}[$interval]))", "hide": false, "legendFormat": "ingress", "range": true, @@ -361,7 +642,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"coredns|kube-rbac-proxy\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"coredns|kube-rbac-proxy\"}[$interval]))", "hide": false, "legendFormat": "dns", "range": true, @@ -373,7 +654,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"service-ca-oper\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"service-ca-oper\"}[$interval]))", "hide": false, "legendFormat": "service-ca", "range": true, @@ -385,7 +666,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"microshift\"}[$__interval])) * 100", + "expr": "sum by(job) (irate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"microshift\"}[$interval]))", "hide": false, "legendFormat": "microshift", "range": true, @@ -397,7 +678,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"crio|conmon\"}[$__interval])) * 100", + "expr": "sum by(job) (rate(namedprocess_namegroup_cpu_seconds_total{groupname=~\"crio|conmon\"}[$interval]))", "hide": false, "legendFormat": "runtime", "range": true, @@ -424,7 +705,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -460,23 +741,30 @@ "value": 80 } ] - } + }, + "unit": "decmbytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 26 + "y": 22 }, "id": 8, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Max", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -588,7 +876,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -616,14 +904,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "decmbytes" }, "overrides": [] }, @@ -631,15 +921,21 @@ "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 32 }, "id": 10, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -679,7 +975,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -707,14 +1003,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "decmbytes" }, "overrides": [] }, @@ -722,15 +1020,21 @@ "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 32 }, "id": 12, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "mode": "single", @@ -752,31 +1056,1312 @@ ], "title": "CSI RSS Usage", "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 39, + "panels": [], + "title": "Container Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(10,sum(irate(container_cpu_usage_seconds_total{container!=\"\",namespace!=\"\"}[1m])) by (pod,container,namespace))", + "legendFormat": "{{ pod }}: {{container}}", + "range": true, + "refId": "A" + } + ], + "title": "Container CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(10, container_memory_rss{container!=\"\",namespace!=\"\"})", + "hide": false, + "legendFormat": "{{pod}}: {{container}}", + "range": true, + "refId": "D" + } + ], + "title": "Container Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 40, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "irate(container_runtime_crio_operations_latency_seconds[$interval])", + "hide": false, + "legendFormat": "{{operation}}", + "range": true, + "refId": "B" + } + ], + "title": "Container Runtime Operations Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(container_runtime_crio_operations_latency_seconds_total{quantile=\"0.99\"}[$interval]) > 0", + "legendFormat": "{{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "Container Runtime Operations Latency (99th pct)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 16, + "panels": [], + "title": "Kubelet Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "yellow", + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(10,irate(process_cpu_seconds_total[$interval]))", + "legendFormat": "{{ job }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(10,process_resident_memory_bytes)", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 30, + "panels": [], + "title": "API Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{subresource!=\"log\",verb!~\"WATCH|WATCHLIST|PROXY\"}[$interval])) by(verb,le))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "API Request Duration (99th latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|DELETE|PATCH\", subresource!~\"log|exec|portforward|attach|proxy\"}[$interval])) by (le, resource, verb, scope)) > 0", + "legendFormat": "{{verb}} {{resource}} {{scope}}", + "range": true, + "refId": "A" + } + ], + "title": "Mutating API Call (99th latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{component=\"apiserver\",verb=~\"LIST|GET\", subresource!~\"log|exec|portforward|attach|proxy\"}[$interval])) by (resource, le, verb)) > 0", + "legendFormat": "{{verb}} {{resource}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Only API Calls (Resource, 99th latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(irate(apiserver_request_total{verb!=\"WATCH\"}[$interval])) by (verb,resource,code) > 0", + "legendFormat": "{{verb}} {{resource}} {{code}}", + "range": true, + "refId": "A" + } + ], + "title": "API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{component=\"apiserver\", scope=\"namespace\", verb=~\"LIST|GET\", subresource!~\"log|exec|portforward|attach|proxy\"}[$interval])) by (le, resource, verb)) > 0", + "legendFormat": "{{verb}} {{resource}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Only API Calls (Namespace, 99th latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{component=\"apiserver\", scope=\"cluster\", verb=~\"LIST|GET\", subresource!~\"log|exec|portforward|attach|proxy\"}[$interval])) by (le, resource, verb)) > 0", + "legendFormat": "{{verb}} {{resource}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Only API Calls (Cluster, 99th latency)", + "type": "timeseries" + } + ], + "refresh": false, + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, "type": "datasource" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "30s", + "value": "30s" + }, + "hide": 0, + "name": "interval", + "options": [ + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "10s,30s,1m,3m,5m,10m,30m,1h", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" } ] }, @@ -788,6 +2373,6 @@ "timezone": "", "title": "MicroShift Perf", "uid": "_eYa3wVVk", - "version": 1, + "version": 51, "weekStart": "" } diff --git a/ansible/roles/install-logging/tasks/main.yml b/ansible/roles/install-logging/tasks/main.yml index 275625a88b..118d055e91 100644 --- a/ansible/roles/install-logging/tasks/main.yml +++ b/ansible/roles/install-logging/tasks/main.yml @@ -1,73 +1,75 @@ --- # install-logging tasks -- name: rpm tasks +- name: RPM tasks + when: (ansible_distribution == "CentOS") or (ansible_distribution == "RedHat") or (ansible_distribution == "Fedora") block: - - name: install prometheus & grafana + - name: Install prometheus & grafana ansible.builtin.dnf: name: "{{ logging_packages }}" state: present update_cache: true - - name: check that the prometheus cli vars file exists + - name: Check that the prometheus cli vars file exists ansible.builtin.stat: path: "{{ prometheus_vars_file }}" register: prometheus_vars - - name: set prometheus args to change listen port + - name: Set prometheus args to change listen port ansible.builtin.replace: path: "{{ prometheus_vars_file }}" regexp: "ARGS=''" replace: "ARGS='--web.listen-address=0.0.0.0:{{ prometheus_port }}'" when: prometheus_vars.stat.exists - when: (ansible_distribution == "CentOS") or (ansible_distribution == "RedHat") or (ansible_distribution == "Fedora") -- name: copy prometheus config +- name: Copy prometheus config ansible.builtin.template: src: prometheus.yml.j2 dest: /etc/prometheus/prometheus.yml + mode: '0640' backup: true -- name: start and enable prometheus & grafana service(s) +- name: Start and enable prometheus & grafana service(s) ansible.builtin.systemd: name: "{{ item }}" state: restarted - enabled: yes + enabled: true loop: "{{ logging_services }}" -- block: - - name: get content of microshift grafana dashboard - ansible.builtin.set_fact: - microshift_dashboard: "{{ lookup('ansible.builtin.file', 'microshift_perf.json') }}" +- name: Configure Grafana dashboards and datasources + when: grafana_setup + block: + - name: Get content of microshift grafana dashboard + ansible.builtin.set_fact: + microshift_dashboard: "{{ lookup('ansible.builtin.file', 'microshift_perf.json') }}" - # The following URI commands fail without accessing some external network - - name: wake up network access - ansible.builtin.command: curl github.com + # The following URI commands fail without accessing some external network + - name: Wake up network access + ansible.builtin.command: curl github.com - - name: create prometheus datasource in grafana - ansible.builtin.uri: - url: http://{{ ansible_default_ipv4.address }}:{{ grafana_port }}/api/datasources - url_username: "{{ grafana_username }}" - url_password: "{{ grafana_password }}" - status_code: [200, 409] - force_basic_auth: yes - method: POST - body_format: json - headers: - Accept: application/json - Content-Type: application/json - body: "{{ lookup('ansible.builtin.template', 'prometheus_datasource.json.j2') }}" + - name: Create prometheus datasource in grafana + ansible.builtin.uri: + url: http://{{ grafana_host_address | default(ansible_default_ipv4.address) }}:{{ grafana_port }}/api/datasources + url_username: "{{ grafana_username }}" + url_password: "{{ grafana_password }}" + status_code: [200, 409] + force_basic_auth: true + method: POST + body_format: json + headers: + Accept: application/json + Content-Type: application/json + body: "{{ lookup('ansible.builtin.template', 'prometheus_datasource.json.j2') }}" - - name: create microshift perf dashboard in grafana - ansible.builtin.uri: - url: http://{{ ansible_default_ipv4.address }}:{{ grafana_port }}/api/dashboards/db - url_username: "{{ grafana_username }}" - url_password: "{{ grafana_password }}" - force_basic_auth: yes - method: POST - body_format: json - headers: - Accept: application/json - Content-Type: application/json - body: "{{ lookup('ansible.builtin.template', 'grafana_dashboard.json.j2') }}" - when: grafana_setup + - name: Create microshift perf dashboard in grafana + ansible.builtin.uri: + url: http://{{ grafana_host_address | default(ansible_default_ipv4.address) }}:{{ grafana_port }}/api/dashboards/db + url_username: "{{ grafana_username }}" + url_password: "{{ grafana_password }}" + force_basic_auth: true + method: POST + body_format: json + headers: + Accept: application/json + Content-Type: application/json + body: "{{ lookup('ansible.builtin.template', 'grafana_dashboard.json.j2') }}" diff --git a/ansible/roles/manage-repos/defaults/main.yml b/ansible/roles/manage-repos/defaults/main.yml index af7ae7937f..f9c82cd7a6 100644 --- a/ansible/roles/manage-repos/defaults/main.yml +++ b/ansible/roles/manage-repos/defaults/main.yml @@ -1,7 +1,7 @@ --- # manage-repos default vars -microshift_version: 4.18.0-rc.3 +microshift_version: 4.19 ocp_version: "{{ microshift_version.split('.')[0] }}.{{ microshift_version.split('.')[1] }}" repo_list: - { repo_name: 'microshift-{{ ocp_version }}-for-rhel-{{ ansible_distribution_major_version }}-mirrorbeta-{{ ansible_architecture }}-rpms', repo_url: "https://mirror.openshift.com/pub/openshift-v4/{{ ansible_architecture }}/microshift/ocp/{{ microshift_version }}/el9/os/"} diff --git a/ansible/roles/manage-repos/tasks/create-mirrors.yaml b/ansible/roles/manage-repos/tasks/create-mirrors.yaml index 5d555b9061..4d5acce831 100644 --- a/ansible/roles/manage-repos/tasks/create-mirrors.yaml +++ b/ansible/roles/manage-repos/tasks/create-mirrors.yaml @@ -1,13 +1,14 @@ -- name: create mirrors block +- name: Create mirrors block block: - - name: check if microshift_version is valid for pre-release + - name: Check if microshift_version is valid for pre-release ansible.builtin.uri: url: "{{ item.repo_url }}" method: GET status_code: 200 timeout: 5 - - name: install microshift mirror repo for pre-releases + - name: Install microshift mirror repo for pre-releases ansible.builtin.template: src: ocpbeta.repo.j2 dest: "/etc/yum.repos.d/{{ item.repo_name }}.repo" + mode: '0644' diff --git a/ansible/roles/manage-repos/tasks/main.yml b/ansible/roles/manage-repos/tasks/main.yml index 4bee79c8a4..5e60f2f8e1 100644 --- a/ansible/roles/manage-repos/tasks/main.yml +++ b/ansible/roles/manage-repos/tasks/main.yml @@ -5,62 +5,62 @@ block: - name: subscription-manager tasks block: - - name: check if we have subscription-manager installed + - name: Check if we have subscription-manager installed ansible.builtin.command: which subscription-manager register: sm_present ignore_errors: true - - name: install subscription-manager + - name: Install subscription-manager ansible.builtin.dnf: name: - subscription-manager when: sm_present.rc != 0 - - name: register host with subscription manager + - name: Register host with subscription manager community.general.redhat_subscription: state: present username: "{{ rhel_username }}" password: "{{ rhel_password }}" - - name: enable repo management from subscription-manager + - name: Enable repo management from subscription-manager ansible.builtin.command: subscription-manager config --rhsm.manage_repos=1 when: manage_subscription - - name: slurp redhat-release + - name: Slurp redhat-release ansible.builtin.slurp: src: /etc/redhat-release register: redhat_release_slurp - - name: set beta release of RHEL + - name: Set beta release of RHEL ansible.builtin.set_fact: rhel_beta: beta- when: "'Beta' in (redhat_release_slurp['content'] | b64decode)" - - name: set microshift_prerelease if microshift_version is a prerelease + - name: Set microshift_prerelease if microshift_version is a prerelease ansible.builtin.set_fact: microshift_prerelease: "{{ 'rc' in microshift_version or 'ec' in microshift_version }}" - - name: create microshift prerelease mirrors + - name: Create microshift prerelease mirrors include_tasks: roles/manage-repos/tasks/create-mirrors.yaml loop: "{{ repo_list }}" when: - microshift_prerelease - not build_microshift - - name: enable required repos for microshift + - name: Enable required repos for microshift community.general.rhsm_repository: name: "{{ rhel_repos }}" when: - not microshift_prerelease - - name: install EPEL repo + - name: Install EPEL repo ansible.builtin.dnf: name: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" disable_gpg_check: true state: present when: ansible_distribution == "RedHat" -- name: gather the package facts +- name: Gather the package facts ansible.builtin.package_facts: manager: auto diff --git a/ansible/roles/microshift-start/files/ready.sh b/ansible/roles/microshift-start/files/ready.sh index 08c6e1ba34..c713f07906 100755 --- a/ansible/roles/microshift-start/files/ready.sh +++ b/ansible/roles/microshift-start/files/ready.sh @@ -1,14 +1,50 @@ #!/bin/bash +# Parse command line arguments +# Default values +DEFAULT_EXPECTED_PODS=6 +DEFAULT_ALL_PODS=10 + +# Validate arguments +if [[ $# -eq 1 ]]; then + echo "Error: When providing arguments, you must specify both EXPECTED_PODS and ALL_PODS" + echo "Usage: $0 [EXPECTED_PODS ALL_PODS]" + exit 1 +fi + +if [[ $# -eq 2 ]]; then + # Validate that both arguments are numbers + if ! [[ $1 =~ ^[0-9]+$ ]]; then + echo "Error: EXPECTED_PODS must be a number" + echo "Usage: $0 [EXPECTED_PODS ALL_PODS]" + exit 1 + fi + + if ! [[ $2 =~ ^[0-9]+$ ]]; then + echo "Error: ALL_PODS must be a number" + echo "Usage: $0 [EXPECTED_PODS ALL_PODS]" + exit 1 + fi + + # Validate that EXPECTED_PODS is less than or equal to ALL_PODS + if [[ $1 -gt $2 ]]; then + echo "Error: EXPECTED_PODS ($1) cannot be greater than ALL_PODS ($2)" + exit 1 + fi +fi + +# Use provided values or defaults +EXPECTED_PODS=${1:-${DEFAULT_EXPECTED_PODS}} +ALL_PODS=${2:-${DEFAULT_ALL_PODS}} + # Define our check command COMMAND="oc get pods -A -o 'jsonpath={..status.conditions[?(@.type==\"Ready\")].status}'" -# Define the specific output we are waiting for -EXPECTED_PODS=6 -ALL_PODS=10 # Define the location of the microshift kubeconfig KUBECONFIG="/var/lib/microshift/resources/kubeadmin/kubeconfig" USER_KUBECONFIG="${HOME}/.kube" +echo "Waiting for MicroShift to start with ${EXPECTED_PODS} expected non-storage pods and ${ALL_PODS} total pods" + # Start the timer START_TIME=$(date +%s) @@ -40,7 +76,7 @@ podcheck_nostorage() { expected=$1 while true; do OUTPUT=$(eval "oc get po -A --no-headers") - PODS_READY=$(echo "${OUTPUT}" | grep -vE "csi|lvm" | grep -c Running) + PODS_READY=$(echo "${OUTPUT}" | grep -vE "csi|storage" | grep -c Running) # Wait until all pods report ready if [[ ${PODS_READY} -ge ${expected} ]]; then @@ -81,5 +117,5 @@ podcheck() { echo "Boot: ${DURATION} seconds (${expected} pods)" } -podcheck_nostorage ${EXPECTED_PODS} -podcheck ${ALL_PODS} +podcheck_nostorage "${EXPECTED_PODS}" +podcheck "${ALL_PODS}" diff --git a/ansible/roles/microshift-start/tasks/main.yml b/ansible/roles/microshift-start/tasks/main.yml index 0380ed74a0..c5456130bb 100644 --- a/ansible/roles/microshift-start/tasks/main.yml +++ b/ansible/roles/microshift-start/tasks/main.yml @@ -149,8 +149,8 @@ register: vnstat - name: record network usage to file - local_action: - module: copy + ansible.builtin.copy: content: "{{ vnstat.stdout }}" dest: network.txt + delegate_to: localhost when: vnstat_check.rc == 0 diff --git a/ansible/roles/run-workloads/defaults/main.yml b/ansible/roles/run-workloads/defaults/main.yml index 942b03733c..4c9a3bf8f7 100644 --- a/ansible/roles/run-workloads/defaults/main.yml +++ b/ansible/roles/run-workloads/defaults/main.yml @@ -7,10 +7,8 @@ tarball_name: "kube-burner-0.14.2-Linux-x86_64.tar.gz" e2e_path: "{{ temp_dest }}/e2e-benchmarking" e2e_repo: "https://github.com/cloud-bulldozer/e2e-benchmarking.git" local_tarball_path: "{{ temp_dest }}/{{ tarball_name }}" -kube_burner_burst: 10 kube_burner_indexing: false kube_burner_pod_ready: "10000ms" -kube_burner_qps: 10 kube_burner_url: "https://github.com/cloud-bulldozer/kube-burner/releases/download/v0.14.2/{{ tarball_name }}" pre_delete_pause_duration: 60 post_run_pause_duration: 180 @@ -18,6 +16,23 @@ delete_grace_period: 600 delete_wait_timeout: 600 delete_label_selectors: - kube-burner-job + workloads_to_run: - - node-density - - node-density-cni-networkpolicy + - { name: node-density, qps: 2, burst: 2 } + - { name: node-density, qps: 4, burst: 4 } + - { name: node-density, qps: 6, burst: 6 } + - { name: node-density, qps: 8, burst: 8 } + - { name: node-density, qps: 10, burst: 10 } + - { name: node-density, qps: 12, burst: 12 } + - { name: node-density-cni, qps: 2, burst: 2 } + - { name: node-density-cni, qps: 4, burst: 4 } + - { name: node-density-cni, qps: 6, burst: 6 } + - { name: node-density-cni, qps: 8, burst: 8 } + - { name: node-density-cni, qps: 10, burst: 10 } + - { name: node-density-cni, qps: 12, burst: 12 } + - { name: node-density-cni-networkpolicy, qps: 2, burst: 2 } + - { name: node-density-cni-networkpolicy, qps: 4, burst: 4 } + - { name: node-density-cni-networkpolicy, qps: 6, burst: 6 } + - { name: node-density-cni-networkpolicy, qps: 8, burst: 8 } + - { name: node-density-cni-networkpolicy, qps: 10, burst: 10 } + - { name: node-density-cni-networkpolicy, qps: 12, burst: 12 } diff --git a/ansible/roles/run-workloads/tasks/kube-burner.yml b/ansible/roles/run-workloads/tasks/kube-burner.yml index ad49b2ee49..a008c276ca 100644 --- a/ansible/roles/run-workloads/tasks/kube-burner.yml +++ b/ansible/roles/run-workloads/tasks/kube-burner.yml @@ -1,16 +1,20 @@ --- # kube-burner tasks -- name: "run kube-kube burner workload {{ kube_burner_workload }}" +- name: "run kube-burner workload {{ kube_burner_workload.name }} with QPS={{ kube_burner_workload.qps }}" ansible.builtin.command: cmd: ./run.sh chdir: "{{ e2e_path }}/workloads/kube-burner" environment: - BURST: "{{ kube_burner_burst }}" + BURST: "{{ kube_burner_workload.burst }}" INDEXING: "{{ kube_burner_indexing }}" POD_READY_THRESHOLD: "{{ kube_burner_pod_ready }}" - QPS: "{{ kube_burner_qps }}" - WORKLOAD: "{{ kube_burner_workload }}" + QPS: "{{ kube_burner_workload.qps }}" + WORKLOAD: "{{ kube_burner_workload.name }}" + +- name: "log workload execution details" + ansible.builtin.debug: + msg: "Completed {{ kube_burner_workload.name }} with QPS={{ kube_burner_workload.qps }}, BURST={{ kube_burner_workload.burst }}" - ansible.builtin.pause: seconds: "{{ pre_delete_pause_duration }}" diff --git a/ansible/roles/setup-microshift-host/defaults/main.yml b/ansible/roles/setup-microshift-host/defaults/main.yml index 05a2fb91d2..c37291f5b3 100644 --- a/ansible/roles/setup-microshift-host/defaults/main.yml +++ b/ansible/roles/setup-microshift-host/defaults/main.yml @@ -5,10 +5,11 @@ go_arch: arm64 go_files: - go - gofmt -go_version: 1.20.3 +go_version: 1.24.4 go_install_dir: /usr/local/go{{ go_version }} install_packages: + - avahi-tools - bash-completion - firewalld - git diff --git a/ansible/roles/setup-microshift-host/tasks/main.yml b/ansible/roles/setup-microshift-host/tasks/main.yml index d5d7429e18..e00157ccfe 100644 --- a/ansible/roles/setup-microshift-host/tasks/main.yml +++ b/ansible/roles/setup-microshift-host/tasks/main.yml @@ -32,22 +32,49 @@ state: link with_items: "{{ go_files }}" +- name: check firewalld service status + ansible.builtin.systemd: + name: firewalld + register: firewalld_status + ignore_errors: true + - name: start and enable firewalld ansible.builtin.systemd: name: firewalld state: started enabled: yes + when: firewalld_status.status.UnitFileState is not defined or firewalld_status.status.UnitFileState != 'masked' - name: check if rhel vg exists ansible.builtin.command: vgdisplay -s {{ vg_name }} register: rhel_vg_present ignore_errors: true +- name: check if lvm disk exists + ansible.builtin.stat: + path: "{{ lvm_disk }}" + register: lvm_disk_stat + when: rhel_vg_present.rc != 0 + - name: create a volume group on top of secondary disk for topolvm community.general.lvg: vg: "{{ vg_name }}" pvs: "{{ lvm_disk }}" - when: rhel_vg_present.rc != 0 + when: + - rhel_vg_present.rc != 0 + - lvm_disk_stat.stat.exists + +- name: notify about missing lvm disk + ansible.builtin.debug: + msg: "LVM disk {{ lvm_disk }} not found, skipping TopoLVM volume group creation" + when: + - rhel_vg_present.rc != 0 + - lvm_disk_stat is defined + - not lvm_disk_stat.stat.exists + +- name: Configure TopoLVM storage + include_tasks: storage.yml + when: rhel_vg_present.rc == 0 or (lvm_disk_stat is defined and lvm_disk_stat.stat.exists) - name: upgrade all packages ansible.builtin.dnf: diff --git a/ansible/roles/setup-microshift-host/tasks/storage.yml b/ansible/roles/setup-microshift-host/tasks/storage.yml new file mode 100644 index 0000000000..df718715d2 --- /dev/null +++ b/ansible/roles/setup-microshift-host/tasks/storage.yml @@ -0,0 +1,29 @@ +--- +# Configure TopoLVM storage for MicroShift + +- name: Detect largest volume group for TopoLVM + ansible.builtin.shell: | + vgs --noheadings --units g -o vg_name,vg_size --sort -vg_size | head -1 | awk '{print $1}' + register: largest_vg + changed_when: false + failed_when: false + +- name: Configure TopoLVM with detected volume group + when: + - largest_vg.rc == 0 + - largest_vg.stdout | length > 0 + block: + - name: Display detected volume group + ansible.builtin.debug: + msg: "Configuring TopoLVM to use volume group: {{ largest_vg.stdout }}" + + - name: Deploy lvmd.yaml configuration + ansible.builtin.template: + src: lvmd.yaml.j2 + dest: /etc/microshift/lvmd.yaml + owner: root + group: root + mode: '0644' + backup: yes + vars: + topolvm_vg: "{{ largest_vg.stdout }}" \ No newline at end of file diff --git a/ansible/roles/setup-microshift-host/templates/lvmd.yaml.j2 b/ansible/roles/setup-microshift-host/templates/lvmd.yaml.j2 new file mode 100644 index 0000000000..628b325a51 --- /dev/null +++ b/ansible/roles/setup-microshift-host/templates/lvmd.yaml.j2 @@ -0,0 +1,7 @@ +socket-name: /run/lvmd/lvmd.socket + +device-classes: + - name: default + volume-group: {{ topolvm_vg }} + spare-gb: 0 + default: true \ No newline at end of file diff --git a/ansible/vars/microshift_versions.yml b/ansible/vars/microshift_versions.yml new file mode 100644 index 0000000000..a478405152 --- /dev/null +++ b/ansible/vars/microshift_versions.yml @@ -0,0 +1,20 @@ +--- +# Shared microshift version-specific configuration +# This file is included by multiple roles + +# Version-specific pod counts +# expected_pods is the number of non-storage pods that should be running first +# all_pods is the total number of pods that should be running after the boot script has completed +microshift_versions: + "4.16": + expected_pods: 6 + all_pods: 10 + "4.17": + expected_pods: 6 + all_pods: 10 + "4.18": + expected_pods: 6 + all_pods: 9 + "4.19": + expected_pods: 6 + all_pods: 9 diff --git a/assets/components/csi-snapshot-controller/csi_controller_deployment.yaml b/assets/components/csi-snapshot-controller/csi_controller_deployment.yaml index 5b48acfc99..8d7fe03c61 100644 --- a/assets/components/csi-snapshot-controller/csi_controller_deployment.yaml +++ b/assets/components/csi-snapshot-controller/csi_controller_deployment.yaml @@ -35,6 +35,7 @@ spec: - name: snapshot-controller securityContext: allowPrivilegeEscalation: false + readOnlyRootFilesystem: true capabilities: drop: - ALL diff --git a/assets/components/multus/kustomization.aarch64.yaml b/assets/components/multus/kustomization.aarch64.yaml index 4f8a4e2632..2119ce828b 100644 --- a/assets/components/multus/kustomization.aarch64.yaml +++ b/assets/components/multus/kustomization.aarch64.yaml @@ -2,7 +2,7 @@ images: - name: multus-cni-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:583c49ec9121482395edfbaf5b77d33db70c6ea71ddbd3f01c2e2b0b09f14f70 + digest: sha256:8737e7670cf4be57ac830cfac55a885a6ff36cc1091123d35d695e06b0b6b75f - name: containernetworking-plugins-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:23f235f453c837b02aa0a3042844a2dab42eb0712ceeadb18ae4fe0d1cdcf3ec + digest: sha256:b9ec783e0d56bee2c642ebb6320a5ad1ddae225910136713333f549dd4d25768 diff --git a/assets/components/multus/kustomization.x86_64.yaml b/assets/components/multus/kustomization.x86_64.yaml index 4c2c69b0c2..6649c4aed4 100644 --- a/assets/components/multus/kustomization.x86_64.yaml +++ b/assets/components/multus/kustomization.x86_64.yaml @@ -2,7 +2,7 @@ images: - name: multus-cni-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:5a64d2b1a142fc6af09a42314b596edd4134742b92fea7499f1cc0db31143423 + digest: sha256:416f7edbc6d4406eff5e049f817d90033864a29d10b6b93f51de64fe139bd037 - name: containernetworking-plugins-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:b2970007f884f13b5b22a93050f09440a057fd6d290af4eb9e67dff0c70d95c4 + digest: sha256:91d80cd3b78a80b10b0354d277101a6d64fed89433b0811cfaa94160440742bd diff --git a/assets/components/multus/release-multus-aarch64.json b/assets/components/multus/release-multus-aarch64.json index 4d92d98d7b..d616ec3b52 100644 --- a/assets/components/multus/release-multus-aarch64.json +++ b/assets/components/multus/release-multus-aarch64.json @@ -1,9 +1,9 @@ { "release": { - "base": "4.20.0-0.nightly-arm64-2025-08-05-094808" + "base": "4.20.0-0.nightly-arm64-2025-09-01-210443" }, "images": { - "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:583c49ec9121482395edfbaf5b77d33db70c6ea71ddbd3f01c2e2b0b09f14f70", - "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:23f235f453c837b02aa0a3042844a2dab42eb0712ceeadb18ae4fe0d1cdcf3ec" + "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8737e7670cf4be57ac830cfac55a885a6ff36cc1091123d35d695e06b0b6b75f", + "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b9ec783e0d56bee2c642ebb6320a5ad1ddae225910136713333f549dd4d25768" } } diff --git a/assets/components/multus/release-multus-x86_64.json b/assets/components/multus/release-multus-x86_64.json index a535f557e6..f37d0c4a1e 100644 --- a/assets/components/multus/release-multus-x86_64.json +++ b/assets/components/multus/release-multus-x86_64.json @@ -1,9 +1,9 @@ { "release": { - "base": "4.20.0-0.nightly-2025-07-31-063120" + "base": "4.20.0-0.nightly-2025-09-01-101753" }, "images": { - "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:5a64d2b1a142fc6af09a42314b596edd4134742b92fea7499f1cc0db31143423", - "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b2970007f884f13b5b22a93050f09440a057fd6d290af4eb9e67dff0c70d95c4" + "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:416f7edbc6d4406eff5e049f817d90033864a29d10b6b93f51de64fe139bd037", + "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:91d80cd3b78a80b10b0354d277101a6d64fed89433b0811cfaa94160440742bd" } } diff --git a/assets/components/openshift-dns/dns/daemonset.yaml b/assets/components/openshift-dns/dns/daemonset.yaml index d6b720767b..630fa3f931 100644 --- a/assets/components/openshift-dns/dns/daemonset.yaml +++ b/assets/components/openshift-dns/dns/daemonset.yaml @@ -22,6 +22,8 @@ spec: - name: config-volume mountPath: /etc/coredns readOnly: true + - mountPath: /tmp + name: tmp-dir ports: - containerPort: 5353 name: dns @@ -52,6 +54,8 @@ spec: requests: cpu: 50m memory: 70Mi + securityContext: + readOnlyRootFilesystem: true image: '{{ .ReleaseImage.coredns }}' - name: kube-rbac-proxy args: @@ -73,6 +77,10 @@ spec: - mountPath: /etc/tls/private name: metrics-tls readOnly: true + - mountPath: /tmp + name: tmp-dir + securityContext: + readOnlyRootFilesystem: true image: '{{ .ReleaseImage.kube_rbac_proxy }}' imagePullPolicy: IfNotPresent dnsPolicy: Default @@ -87,6 +95,8 @@ spec: secret: defaultMode: 420 secretName: dns-default-metrics-tls + - name: tmp-dir + emptyDir: {} nodeSelector: kubernetes.io/os: linux tolerations: diff --git a/assets/components/openshift-dns/node-resolver/daemonset.yaml b/assets/components/openshift-dns/node-resolver/daemonset.yaml index 6d8a5fbf32..04e7548730 100644 --- a/assets/components/openshift-dns/node-resolver/daemonset.yaml +++ b/assets/components/openshift-dns/node-resolver/daemonset.yaml @@ -28,7 +28,7 @@ spec: NAMESERVER=${DNS_DEFAULT_SERVICE_HOST} OPENSHIFT_MARKER="openshift-generated-node-resolver" HOSTS_FILE="/etc/hosts" - TEMP_FILE="/etc/hosts.tmp" + TEMP_FILE="/tmp/hosts.tmp" IFS=', ' read -r -a services <<< "${SERVICES}" diff --git a/assets/components/openshift-dns/node-resolver/update-node-resolver.sh b/assets/components/openshift-dns/node-resolver/update-node-resolver.sh index 327718fef7..1a7563411b 100644 --- a/assets/components/openshift-dns/node-resolver/update-node-resolver.sh +++ b/assets/components/openshift-dns/node-resolver/update-node-resolver.sh @@ -5,7 +5,7 @@ trap 'jobs -p | xargs kill || true; wait; exit 0' TERM OPENSHIFT_MARKER="openshift-generated-node-resolver" HOSTS_FILE="/etc/hosts" -TEMP_FILE="/etc/hosts.tmp" +TEMP_FILE="/tmp/hosts.tmp" IFS=', ' read -r -a services <<< "${SERVICES}" diff --git a/assets/components/openshift-router/deployment.yaml b/assets/components/openshift-router/deployment.yaml index 4a7520c32e..af1f48f67f 100644 --- a/assets/components/openshift-router/deployment.yaml +++ b/assets/components/openshift-router/deployment.yaml @@ -219,7 +219,7 @@ spec: args: - -v=4 {{- if and .AccessLoggingEnabled (not .AccessLoggingSyslogAddress) }} - - name: access-logs + - name: logs imagePullPolicy: IfNotPresent terminationMessagePolicy: FallbackToLogsOnError image: '{{ .ReleaseImage.haproxy_router }}' diff --git a/assets/core/kubelet-client-ca.yaml b/assets/core/kubelet-client-ca.yaml new file mode 100644 index 0000000000..7ffacd8f3f --- /dev/null +++ b/assets/core/kubelet-client-ca.yaml @@ -0,0 +1,8 @@ +# Kubelet client CA bundle - populated at runtime by the KubeletCAManager +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubelet-client-ca + namespace: kube-system +data: + ca.crt: "" diff --git a/assets/optional/cert-manager/manager/kustomization.yaml b/assets/optional/cert-manager/manager/kustomization.yaml index 9655ea23ae..4b0005d8c3 100644 --- a/assets/optional/cert-manager/manager/kustomization.yaml +++ b/assets/optional/cert-manager/manager/kustomization.yaml @@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: quay.io/microshift/cert-manager - newTag: latest \ No newline at end of file + newName: registry.redhat.io/cert-manager/cert-manager-operator-rhel9@sha256 + newTag: 4d5e238300ce6f427a1045d51d6b37a4e5c5633985208ebb44f91e7dd53897d9 \ No newline at end of file diff --git a/assets/optional/cert-manager/manager/manager.yaml b/assets/optional/cert-manager/manager/manager.yaml index 62d1a16aa1..d6b4394d51 100644 --- a/assets/optional/cert-manager/manager/manager.yaml +++ b/assets/optional/cert-manager/manager/manager.yaml @@ -75,21 +75,21 @@ spec: - name: OPERATOR_NAME value: cert-manager-operator - name: RELATED_IMAGE_CERT_MANAGER_WEBHOOK - value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4 + value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659 - name: RELATED_IMAGE_CERT_MANAGER_CA_INJECTOR - value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4 + value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659 - name: RELATED_IMAGE_CERT_MANAGER_CONTROLLER - value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4 + value: registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659 - name: RELATED_IMAGE_CERT_MANAGER_ACMESOLVER - value: registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:438d487c6b644319094f92250d43e0becf1bd0cc4b7d2864f4de72bacd1b9daf + value: registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:4f7c045819c39e176a6090efdaba6ec736edf772d88fc87dd1c6fb33d3b5b26b - name: RELATED_IMAGE_CERT_MANAGER_ISTIOCSR value: registry.redhat.io/cert-manager/cert-manager-istio-csr-rhel9@sha256:9ea2c29a384b964cef14f853278821df3cd30320f25afab8823897192f67fc7e - name: OPERAND_IMAGE_VERSION - value: 1.16.4 + value: 1.17.0 - name: ISTIOCSR_OPERAND_IMAGE_VERSION value: 0.14.0 - name: OPERATOR_IMAGE_VERSION - value: 1.16.0 + value: 1.17.0 - name: OPERATOR_LOG_LEVEL value: '2' - name: TRUSTED_CA_CONFIGMAP_NAME diff --git a/assets/optional/cert-manager/release-cert-manager-aarch64.json b/assets/optional/cert-manager/release-cert-manager-aarch64.json index d8fb19b37e..643c6e34ef 100644 --- a/assets/optional/cert-manager/release-cert-manager-aarch64.json +++ b/assets/optional/cert-manager/release-cert-manager-aarch64.json @@ -1,13 +1,13 @@ { "release": { - "base": "4.20" + "base": "1.17.0" }, "images": { - "cert-manager-operator": "registry.redhat.io/cert-manager/cert-manager-operator-rhel9@sha256:41146965b3344b008ff0f6d119c1cb071efa7f02c742ce9af303b896ae43bff7", - "cert-manager-webhook": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-ca-injector": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-controller": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-acmesolver": "registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:438d487c6b644319094f92250d43e0becf1bd0cc4b7d2864f4de72bacd1b9daf", + "cert-manager-operator": "registry.redhat.io/cert-manager/cert-manager-operator-rhel9@sha256:4d5e238300ce6f427a1045d51d6b37a4e5c5633985208ebb44f91e7dd53897d9", + "cert-manager-webhook": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-ca-injector": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-controller": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-acmesolver": "registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:4f7c045819c39e176a6090efdaba6ec736edf772d88fc87dd1c6fb33d3b5b26b", "cert-manager-istiocsr": "registry.redhat.io/cert-manager/cert-manager-istio-csr-rhel9@sha256:9ea2c29a384b964cef14f853278821df3cd30320f25afab8823897192f67fc7e" } } diff --git a/assets/optional/cert-manager/release-cert-manager-x86_64.json b/assets/optional/cert-manager/release-cert-manager-x86_64.json index d8fb19b37e..643c6e34ef 100644 --- a/assets/optional/cert-manager/release-cert-manager-x86_64.json +++ b/assets/optional/cert-manager/release-cert-manager-x86_64.json @@ -1,13 +1,13 @@ { "release": { - "base": "4.20" + "base": "1.17.0" }, "images": { - "cert-manager-operator": "registry.redhat.io/cert-manager/cert-manager-operator-rhel9@sha256:41146965b3344b008ff0f6d119c1cb071efa7f02c742ce9af303b896ae43bff7", - "cert-manager-webhook": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-ca-injector": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-controller": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:408a5c91e6066d33801456db5b0c214095ab7e47a0af1dcb91b5c88bfbcca4d4", - "cert-manager-acmesolver": "registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:438d487c6b644319094f92250d43e0becf1bd0cc4b7d2864f4de72bacd1b9daf", + "cert-manager-operator": "registry.redhat.io/cert-manager/cert-manager-operator-rhel9@sha256:4d5e238300ce6f427a1045d51d6b37a4e5c5633985208ebb44f91e7dd53897d9", + "cert-manager-webhook": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-ca-injector": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-controller": "registry.redhat.io/cert-manager/jetstack-cert-manager-rhel9@sha256:96d51e3a64bf30cbd92836c7cbd82f06edca16eef78ab1432757d34c16628659", + "cert-manager-acmesolver": "registry.redhat.io/cert-manager/jetstack-cert-manager-acmesolver-rhel9@sha256:4f7c045819c39e176a6090efdaba6ec736edf772d88fc87dd1c6fb33d3b5b26b", "cert-manager-istiocsr": "registry.redhat.io/cert-manager/cert-manager-istio-csr-rhel9@sha256:9ea2c29a384b964cef14f853278821df3cd30320f25afab8823897192f67fc7e" } } diff --git a/assets/optional/kube-proxy/kustomization.aarch64.yaml b/assets/optional/kube-proxy/kustomization.aarch64.yaml index e79a2ad924..d11f9dc4f1 100644 --- a/assets/optional/kube-proxy/kustomization.aarch64.yaml +++ b/assets/optional/kube-proxy/kustomization.aarch64.yaml @@ -2,4 +2,4 @@ images: - name: kube-proxy newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:6640e154d4b7894596479b99edcde2d6ca0596b046934b11ce8d47bd3fce8eba + digest: sha256:aa9c6655aa89198548a407071f2089c800e440cb88756cee525d2bcf29a38be4 diff --git a/assets/optional/kube-proxy/kustomization.x86_64.yaml b/assets/optional/kube-proxy/kustomization.x86_64.yaml index 4977602304..eb1c703e3b 100644 --- a/assets/optional/kube-proxy/kustomization.x86_64.yaml +++ b/assets/optional/kube-proxy/kustomization.x86_64.yaml @@ -2,4 +2,4 @@ images: - name: kube-proxy newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:c13527c27d15b62e1b1d94fbc122c267fc76f8db1189d71b751a0e67e41e90f5 + digest: sha256:29d197e8994d8fe3e1ada82916ffc83c1d85e592c648e205b39e2bc38fe0a9cf diff --git a/assets/optional/kube-proxy/release-kube-proxy-aarch64.json b/assets/optional/kube-proxy/release-kube-proxy-aarch64.json index 8cee641f3d..3c371eab6c 100644 --- a/assets/optional/kube-proxy/release-kube-proxy-aarch64.json +++ b/assets/optional/kube-proxy/release-kube-proxy-aarch64.json @@ -1,8 +1,8 @@ { "release": { - "base": "4.20.0-0.nightly-arm64-2025-08-05-094808" + "base": "4.20.0-0.nightly-arm64-2025-09-01-210443" }, "images": { - "kube-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:6640e154d4b7894596479b99edcde2d6ca0596b046934b11ce8d47bd3fce8eba" + "kube-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:aa9c6655aa89198548a407071f2089c800e440cb88756cee525d2bcf29a38be4" } } diff --git a/assets/optional/kube-proxy/release-kube-proxy-x86_64.json b/assets/optional/kube-proxy/release-kube-proxy-x86_64.json index def83d5765..cc883a1544 100644 --- a/assets/optional/kube-proxy/release-kube-proxy-x86_64.json +++ b/assets/optional/kube-proxy/release-kube-proxy-x86_64.json @@ -1,8 +1,8 @@ { "release": { - "base": "4.20.0-0.nightly-2025-07-31-063120" + "base": "4.20.0-0.nightly-2025-09-01-101753" }, "images": { - "kube-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:c13527c27d15b62e1b1d94fbc122c267fc76f8db1189d71b751a0e67e41e90f5" + "kube-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:29d197e8994d8fe3e1ada82916ffc83c1d85e592c648e205b39e2bc38fe0a9cf" } } diff --git a/assets/optional/operator-lifecycle-manager/0000_50_olm_07-olm-operator.deployment.yaml b/assets/optional/operator-lifecycle-manager/0000_50_olm_07-olm-operator.deployment.yaml index ebec29236f..e9ad66d461 100644 --- a/assets/optional/operator-lifecycle-manager/0000_50_olm_07-olm-operator.deployment.yaml +++ b/assets/optional/operator-lifecycle-manager/0000_50_olm_07-olm-operator.deployment.yaml @@ -35,10 +35,13 @@ spec: - name: profile-collector-cert secret: secretName: pprof-cert + - name: tmpfs + emptyDir: {} containers: - name: olm-operator securityContext: allowPrivilegeEscalation: false + readOnlyRootFilesystem: true capabilities: drop: ["ALL"] volumeMounts: @@ -48,6 +51,8 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true + - name: tmpfs + mountPath: /tmp command: - /bin/olm args: diff --git a/assets/optional/operator-lifecycle-manager/0000_50_olm_08-catalog-operator.deployment.yaml b/assets/optional/operator-lifecycle-manager/0000_50_olm_08-catalog-operator.deployment.yaml index 9b5370c81e..ef3c9c95e7 100644 --- a/assets/optional/operator-lifecycle-manager/0000_50_olm_08-catalog-operator.deployment.yaml +++ b/assets/optional/operator-lifecycle-manager/0000_50_olm_08-catalog-operator.deployment.yaml @@ -35,10 +35,13 @@ spec: - name: profile-collector-cert secret: secretName: pprof-cert + - name: tmpfs + emptyDir: {} containers: - name: catalog-operator securityContext: allowPrivilegeEscalation: false + readOnlyRootFilesystem: true capabilities: drop: ["ALL"] volumeMounts: @@ -48,6 +51,8 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true + - name: tmpfs + mountPath: /tmp command: - /bin/catalog args: diff --git a/assets/optional/operator-lifecycle-manager/kustomization.aarch64.yaml b/assets/optional/operator-lifecycle-manager/kustomization.aarch64.yaml index 0eda766989..79d3c2a0fe 100644 --- a/assets/optional/operator-lifecycle-manager/kustomization.aarch64.yaml +++ b/assets/optional/operator-lifecycle-manager/kustomization.aarch64.yaml @@ -2,13 +2,13 @@ images: - name: quay.io/operator-framework/olm newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:ea2cb58510c4a074db617576667879f0dc06738953a58f1f9fa70a552ba392bd + digest: sha256:3e8c3e8351e7e273a7a8fa6ef073c4c51f39c1db8e97dcbd1e107c61e33a171a - name: quay.io/operator-framework/configmap-operator-registry newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:b8c6f87c891c68cfabbc93c15e37b94e4bae4c285ae0a6b5c5e9a625b1b3884b + digest: sha256:45c01c21e00141d773788579c405528fdb136475e2a725e39e7fdd7196fa2a81 - name: quay.io/openshift/origin-kube-rbac-proxy newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:dfaaf4cca763022209eea9e4d57f6eee8316529df5287ad6f745c411fc8719ff + digest: sha256:6dcc5506bd757ca1bded6ef9ca42b2a56eb720f0d1d1f47a48c491673a2a5aa4 patches: - patch: |- @@ -16,12 +16,12 @@ patches: path: /spec/template/spec/containers/0/env/- value: name: OPERATOR_REGISTRY_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b8c6f87c891c68cfabbc93c15e37b94e4bae4c285ae0a6b5c5e9a625b1b3884b + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:45c01c21e00141d773788579c405528fdb136475e2a725e39e7fdd7196fa2a81 - op: add path: /spec/template/spec/containers/0/env/- value: name: OLM_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ea2cb58510c4a074db617576667879f0dc06738953a58f1f9fa70a552ba392bd + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3e8c3e8351e7e273a7a8fa6ef073c4c51f39c1db8e97dcbd1e107c61e33a171a target: kind: Deployment labelSelector: app=catalog-operator diff --git a/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml b/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml index fc6b7798f9..be4a2fc614 100644 --- a/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml +++ b/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml @@ -2,13 +2,13 @@ images: - name: quay.io/operator-framework/olm newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:1b2fd325ec662c371f1aac15c5c60462afb4461430de3523e273eb0e34a3ba98 + digest: sha256:d717dce83f4a92a53a2607d623a1640efaba1e838cf9446e33a145e315b796d1 - name: quay.io/operator-framework/configmap-operator-registry newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:60b511049f757f2a4433ddb39897733bc68ec8ff57ee2c3ed10a571bbca917cd + digest: sha256:2421a51b40b6a1bd600507e04c24e8c8e870b3092b0dc9c3c9be6d29375e4c4c - name: quay.io/openshift/origin-kube-rbac-proxy newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:2c17c061c672839fbcca9a61b7d1c1d07419042b0603ed05a69afd60c23ca3c0 + digest: sha256:d10dffbb2295f46289fd0de097293133f0d9cb6cd81c0016b7bb1de5dc6e6e59 patches: - patch: |- @@ -16,12 +16,12 @@ patches: path: /spec/template/spec/containers/0/env/- value: name: OPERATOR_REGISTRY_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:60b511049f757f2a4433ddb39897733bc68ec8ff57ee2c3ed10a571bbca917cd + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2421a51b40b6a1bd600507e04c24e8c8e870b3092b0dc9c3c9be6d29375e4c4c - op: add path: /spec/template/spec/containers/0/env/- value: name: OLM_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:1b2fd325ec662c371f1aac15c5c60462afb4461430de3523e273eb0e34a3ba98 + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d717dce83f4a92a53a2607d623a1640efaba1e838cf9446e33a145e315b796d1 target: kind: Deployment labelSelector: app=catalog-operator diff --git a/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json b/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json index 4958ce0495..d918824e15 100644 --- a/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json +++ b/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json @@ -1,10 +1,10 @@ { "release": { - "base": "4.20.0-0.nightly-arm64-2025-08-05-094808" + "base": "4.20.0-0.nightly-arm64-2025-09-01-210443" }, "images": { - "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ea2cb58510c4a074db617576667879f0dc06738953a58f1f9fa70a552ba392bd", - "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b8c6f87c891c68cfabbc93c15e37b94e4bae4c285ae0a6b5c5e9a625b1b3884b", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:dfaaf4cca763022209eea9e4d57f6eee8316529df5287ad6f745c411fc8719ff" + "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3e8c3e8351e7e273a7a8fa6ef073c4c51f39c1db8e97dcbd1e107c61e33a171a", + "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:45c01c21e00141d773788579c405528fdb136475e2a725e39e7fdd7196fa2a81", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:6dcc5506bd757ca1bded6ef9ca42b2a56eb720f0d1d1f47a48c491673a2a5aa4" } } diff --git a/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json b/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json index f60a5ca0c4..a1ea4e3e31 100644 --- a/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json +++ b/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json @@ -1,10 +1,10 @@ { "release": { - "base": "4.20.0-0.nightly-2025-07-31-063120" + "base": "4.20.0-0.nightly-2025-09-01-101753" }, "images": { - "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:1b2fd325ec662c371f1aac15c5c60462afb4461430de3523e273eb0e34a3ba98", - "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:60b511049f757f2a4433ddb39897733bc68ec8ff57ee2c3ed10a571bbca917cd", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2c17c061c672839fbcca9a61b7d1c1d07419042b0603ed05a69afd60c23ca3c0" + "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d717dce83f4a92a53a2607d623a1640efaba1e838cf9446e33a145e315b796d1", + "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2421a51b40b6a1bd600507e04c24e8c8e870b3092b0dc9c3c9be6d29375e4c4c", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d10dffbb2295f46289fd0de097293133f0d9cb6cd81c0016b7bb1de5dc6e6e59" } } diff --git a/assets/release/release-aarch64.json b/assets/release/release-aarch64.json index 37aef90c75..3d5c20476f 100644 --- a/assets/release/release-aarch64.json +++ b/assets/release/release-aarch64.json @@ -1,16 +1,16 @@ { "release": { - "base": "4.20.0-0.nightly-arm64-2025-08-05-094808" + "base": "4.20.0-0.nightly-arm64-2025-09-01-210443" }, "images": { - "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:535573797f93560a86f3107f2d4eab8fe336dbf5db39d7e6bc2b31223b3c1e31", - "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:49c314a9a566b95902573ac8892c9ab49a56139571d60d2a2ce566606cc461f6", - "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:1e7e0cd5b8939bf13d0a371f3c005edb98b38bb7cb5aa3ba2e1ff898875eab77", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:dfaaf4cca763022209eea9e4d57f6eee8316529df5287ad6f745c411fc8719ff", - "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:393869c80562352e49857cbd0576aeef3a8c8df441851d73d37745ca1e3ad4ea", - "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca9bd83ae2bb56f52208d769ec111a84b92b0590425baeafe842cef440b44037", - "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e1cd276e908e2a7822fde2a705f1a6b7378de5867533760ae84d65f5d9d456aa", + "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:65df57d4a5c54683fa88710f6d3a172a37f0deaa5ded38dd46accdbbc740188d", + "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:1343607f9b2a0181bca4042869fe9cac040f343d29b0add4d16c66a4ce25d2d4", + "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:c635634de8fa2e193d58fe99acc63416d9916462d710a86db6742e87cabf02c9", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:6dcc5506bd757ca1bded6ef9ca42b2a56eb720f0d1d1f47a48c491673a2a5aa4", + "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8123533d36736a1d3f6b11e060c6a557f8d6995416fc4c86918b8c2ac817cb72", + "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e7c93974ccc75b84c699b7f1ead135226bfe0689689d3f0d1d7d621ed5b5a7ab", + "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:178838fc3cfd7299d32082e423681f9a0bc2ba28a011de52c6e718ead2bc7634", "lvms_operator": "registry.redhat.io/lvms4/lvms-rhel9-operator@sha256:0689e27defe411a07da33e339ec65df67e09a3a7080b8e5494fae5cb1a35db36", - "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:f2ccbf41f647b3f7987d6ed8dd5d7b38c83a502f7883115e2c22cfa82c9dd9df" + "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:291ed630d7551a39b3a720049400bcb8d0d8b3cf0f1a9bfe27358d3f6fe826c5" } } diff --git a/assets/release/release-x86_64.json b/assets/release/release-x86_64.json index 2b958f3168..13607fe756 100644 --- a/assets/release/release-x86_64.json +++ b/assets/release/release-x86_64.json @@ -1,16 +1,16 @@ { "release": { - "base": "4.20.0-0.nightly-2025-07-31-063120" + "base": "4.20.0-0.nightly-2025-09-01-101753" }, "images": { - "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8dafb6121bf7124f8f58bea42d37d9bba10a2a0cc2b61e5c91798a8829d36ba3", - "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3ecc69d400d2c9282c635d7e60113158d1093cda8cd4f35d760e08ea2b80bded", - "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:6baf21da47a043e562bd2507e9f54625aba81212b953fb92aeb1f94b11237930", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2c17c061c672839fbcca9a61b7d1c1d07419042b0603ed05a69afd60c23ca3c0", - "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2fc5e1fcd84651b617010a48802f77c5e432b5f351be3e0705a5472c2965a232", - "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:57287ba005a5a5ab513a645134676759f134be56e2108f39a5a3c51ecac71d98", - "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:7d2ea3c3777514a3a38bb68a5d67e648879acb9485af443d1deee92e7a95416a", + "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:fd68002bffef6ca072019f186f359d6e4474a64e347002441a388cdeac9ce882", + "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b38dd8938bdf9324a0ace965e77ba8e765cd2f913d674c3b9dc9e53f4934874e", + "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:feb7ba209de3503d795fec89dadf119779073b289e8c71a9cf6bd01120199d63", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d10dffbb2295f46289fd0de097293133f0d9cb6cd81c0016b7bb1de5dc6e6e59", + "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d9b7e6bbdccc7edaa6682491bdb3e172cc2f7e484920ae690cf60f4b851285de", + "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d6a7df177c6efee91051d9b09f600751b7685855f1791be9a08f777921daaa9d", + "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:b6e248968976bedbfb4f83d98241d8c97b14ee01261e1018c9aba3c7c68a4219", "lvms_operator": "registry.redhat.io/lvms4/lvms-rhel9-operator@sha256:b372811ed8263f80c0903a63e9fd9d05d4dbf2b4d7adcb32304108124841e5bc", - "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:98920d012fd0eef9cc528e2ec38de6ec8fd70daaaffa05e61cbff2750b3794c6" + "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2106c18d27a2638dd74d491f0fcfbdd65cf75a1fda337f34e30347f792a2bdb0" } } diff --git a/dashboards/grafana-dashboard-microshift-telemetry.configmap.yaml b/dashboards/grafana-dashboard-microshift-telemetry.configmap.yaml new file mode 100644 index 0000000000..69849b7c9e --- /dev/null +++ b/dashboards/grafana-dashboard-microshift-telemetry.configmap.yaml @@ -0,0 +1,1425 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-microshift-telemetry + labels: + grafana_dashboard: "true" + annotations: + grafana-folder: /grafana-dashboard-definitions/MicroShift +data: + microshift-telemetry.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 886246, + "links": [], + "panels": [ + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 21, + "x": 0, + "y": 0 + }, + "id": 29, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "* The majority of MicroShift use cases is disconnected environments. Data below represents a small fraction of clusters.\n* Telemetry feature was introduced in 4.19. There is no data prior to that version.\n* A cluster is considered to be offline when it stops reporting metrics.\n * `Offline period threshold` is a configurable option to establish how long a cluster can stop reporting metrics until it is considered to be offline.\n* Short lived clusters (<1h uptime) are explicitly excluded from production info.\n * CI and QE run some tests on production servers for quality assurance. These clusters are short lived.\n* Non-production clusters belong to CI and QE.", + "mode": "markdown" + }, + "pluginVersion": "11.6.3", + "title": "About this dashboard", + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 22, + "panels": [], + "title": "Production clusters", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 7 + }, + "id": 30, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count (\n max by (_id, version) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active clusters", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "displayName", + "value": "Clusters" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 5, + "y": 7 + }, + "id": 31, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count (\n max by (_id, version) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active clusters", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "displayName", + "value": "1-4 CPUs" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "displayName", + "value": "4-8 CPUs" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "displayName", + "value": "8-16 CPUs" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "displayName", + "value": "+16 CPUs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 11, + "y": 7 + }, + "id": 32, + "options": { + "displayMode": "gradient", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n ) <= 4\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n ) > 4\n and\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n ) <= 8\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n ) > 8\n and\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n ) <= 16\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n )\n ) > 16\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + } + ], + "title": "Active clusters", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "displayName", + "value": "0-1 day" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "displayName", + "value": "1-7 days" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "displayName", + "value": "7-30 days" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "displayName", + "value": "30-180 days" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "E" + }, + "properties": [ + { + "id": "displayName", + "value": "180+ days" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 4, + "x": 17, + "y": 7 + }, + "id": 33, + "options": { + "displayMode": "gradient", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) > 3600\n and\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) <= 86400 # <1 day uptime.\n )\n)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) > 86400\n and\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) <= 604800 # <1 week uptime.\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) > 604800\n and\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) <= 2592000 # < 1 month uptime.\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) > 2592000\n and\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) <= 15552000 # < 6 month uptime.\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n - \n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) > 15552000\n )\n)\n", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "E" + } + ], + "title": "Cluster uptime", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 14 + }, + "id": 34, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active CPUs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "displayName", + "value": "CPUs" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 5, + "y": 14 + }, + "id": 35, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(\n sum by (_id) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active CPUs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 11, + "y": 14 + }, + "id": 36, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (label_kubernetes_io_arch) (\n sum by (_id, label_kubernetes_io_arch) (cluster:capacity_cpu_cores:sum{label_beta_kubernetes_io_instance_type=\"rhde\"})\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPUs by architecture", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 7, + "x": 0, + "y": 21 + }, + "id": 37, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count by (deployment_type, version) (\n (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Clusters by version and type", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 14, + "x": 7, + "y": 21 + }, + "id": 39, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count by (deployment_type) (\n (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cluster deployment types", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 14, + "x": 7, + "y": 28 + }, + "id": 40, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count by (version) (\n (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) < 3600\n )\n )\n)\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cluster versions", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 24, + "panels": [], + "title": "Non-production clusters", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 36 + }, + "id": 38, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) >= 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Created clusters", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "Active clusters", + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 15, + "x": 6, + "y": 36 + }, + "id": 42, + "options": { + "dataLinks": [], + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.6.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n and on(_id)\n (\n max by (_id) (\n count_over_time(microshift_version[$offline_threshold]) > 0\n )\n unless on (_id)\n (\n (\n max by(_id) (max_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n -\n min by(_id) (min_over_time(timestamp(microshift_version)[$offline_threshold:5m]))\n ) >= 3600\n )\n )\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active clusters", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "telemeter-recent-production", + "value": "PD776AFABBE26000A" + }, + "includeAll": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "telemeter-.*", + "type": "datasource" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "7d", + "value": "7d" + }, + "description": "Period that a cluster can go without reporting any metrics before considering it is offline.", + "label": "Offline period threshold", + "name": "offline_threshold", + "options": [ + { + "selected": false, + "text": "2h", + "value": "2h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": true, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "2h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-30d", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "MicroShift Telemetry", + "uid": "sK7hN2pXf", + "version": 1 + } diff --git a/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.33.md b/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.33.md index d8f58ab307..de03744792 100644 --- a/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.33.md +++ b/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.33.md @@ -1,84 +1,85 @@ -- [v1.33.1](#v1331) - - [Downloads for v1.33.1](#downloads-for-v1331) +- [v1.33.2](#v1332) + - [Downloads for v1.33.2](#downloads-for-v1332) - [Source Code](#source-code) - [Client Binaries](#client-binaries) - [Server Binaries](#server-binaries) - [Node Binaries](#node-binaries) - [Container Images](#container-images) - - [Changelog since v1.33.0](#changelog-since-v1330) + - [Changelog since v1.33.1](#changelog-since-v1331) + - [Important Security Information](#important-security-information) + - [CVE-2025-4563: Nodes can bypass dynamic resource allocation authorization checks](#cve-2025-4563-nodes-can-bypass-dynamic-resource-allocation-authorization-checks) - [Changes by Kind](#changes-by-kind) + - [Feature](#feature) - [Bug or Regression](#bug-or-regression) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake) - [Dependencies](#dependencies) - [Added](#added) - [Changed](#changed) - [Removed](#removed) -- [v1.33.0](#v1330) - - [Downloads for v1.33.0](#downloads-for-v1330) +- [v1.33.1](#v1331) + - [Downloads for v1.33.1](#downloads-for-v1331) - [Source Code](#source-code-1) - [Client Binaries](#client-binaries-1) - [Server Binaries](#server-binaries-1) - [Node Binaries](#node-binaries-1) - [Container Images](#container-images-1) - - [Changelog since v1.32.0](#changelog-since-v1320) - - [Urgent Upgrade Notes](#urgent-upgrade-notes) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade) + - [Changelog since v1.33.0](#changelog-since-v1330) - [Changes by Kind](#changes-by-kind-1) - - [Deprecation](#deprecation) - - [API Change](#api-change) - - [Feature](#feature) - - [Documentation](#documentation) - [Bug or Regression](#bug-or-regression-1) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake) - [Dependencies](#dependencies-1) - [Added](#added-1) - [Changed](#changed-1) - [Removed](#removed-1) -- [v1.33.0-rc.1](#v1330-rc1) - - [Downloads for v1.33.0-rc.1](#downloads-for-v1330-rc1) +- [v1.33.0](#v1330) + - [Downloads for v1.33.0](#downloads-for-v1330) - [Source Code](#source-code-2) - [Client Binaries](#client-binaries-2) - [Server Binaries](#server-binaries-2) - [Node Binaries](#node-binaries-2) - [Container Images](#container-images-2) - - [Changelog since v1.33.0-rc.0](#changelog-since-v1330-rc0) + - [Changelog since v1.32.0](#changelog-since-v1320) + - [Urgent Upgrade Notes](#urgent-upgrade-notes) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade) - [Changes by Kind](#changes-by-kind-2) + - [Deprecation](#deprecation) + - [API Change](#api-change) + - [Feature](#feature-1) + - [Documentation](#documentation) - [Bug or Regression](#bug-or-regression-2) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-1) - [Dependencies](#dependencies-2) - [Added](#added-2) - [Changed](#changed-2) - [Removed](#removed-2) -- [v1.33.0-rc.0](#v1330-rc0) - - [Downloads for v1.33.0-rc.0](#downloads-for-v1330-rc0) +- [v1.33.0-rc.1](#v1330-rc1) + - [Downloads for v1.33.0-rc.1](#downloads-for-v1330-rc1) - [Source Code](#source-code-3) - [Client Binaries](#client-binaries-3) - [Server Binaries](#server-binaries-3) - [Node Binaries](#node-binaries-3) - [Container Images](#container-images-3) - - [Changelog since v1.33.0-beta.0](#changelog-since-v1330-beta0) - - [Urgent Upgrade Notes](#urgent-upgrade-notes-1) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-1) + - [Changelog since v1.33.0-rc.0](#changelog-since-v1330-rc0) - [Changes by Kind](#changes-by-kind-3) - - [Deprecation](#deprecation-1) - - [API Change](#api-change-1) - - [Feature](#feature-1) - [Bug or Regression](#bug-or-regression-3) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-1) - [Dependencies](#dependencies-3) - [Added](#added-3) - [Changed](#changed-3) - [Removed](#removed-3) -- [v1.33.0-beta.0](#v1330-beta0) - - [Downloads for v1.33.0-beta.0](#downloads-for-v1330-beta0) +- [v1.33.0-rc.0](#v1330-rc0) + - [Downloads for v1.33.0-rc.0](#downloads-for-v1330-rc0) - [Source Code](#source-code-4) - [Client Binaries](#client-binaries-4) - [Server Binaries](#server-binaries-4) - [Node Binaries](#node-binaries-4) - [Container Images](#container-images-4) - - [Changelog since v1.33.0-alpha.3](#changelog-since-v1330-alpha3) + - [Changelog since v1.33.0-beta.0](#changelog-since-v1330-beta0) + - [Urgent Upgrade Notes](#urgent-upgrade-notes-1) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-1) - [Changes by Kind](#changes-by-kind-4) - - [API Change](#api-change-2) + - [Deprecation](#deprecation-1) + - [API Change](#api-change-1) - [Feature](#feature-2) - [Bug or Regression](#bug-or-regression-4) - [Other (Cleanup or Flake)](#other-cleanup-or-flake-2) @@ -86,19 +87,16 @@ - [Added](#added-4) - [Changed](#changed-4) - [Removed](#removed-4) -- [v1.33.0-alpha.3](#v1330-alpha3) - - [Downloads for v1.33.0-alpha.3](#downloads-for-v1330-alpha3) +- [v1.33.0-beta.0](#v1330-beta0) + - [Downloads for v1.33.0-beta.0](#downloads-for-v1330-beta0) - [Source Code](#source-code-5) - [Client Binaries](#client-binaries-5) - [Server Binaries](#server-binaries-5) - [Node Binaries](#node-binaries-5) - [Container Images](#container-images-5) - - [Changelog since v1.33.0-alpha.2](#changelog-since-v1330-alpha2) - - [Urgent Upgrade Notes](#urgent-upgrade-notes-2) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-2) + - [Changelog since v1.33.0-alpha.3](#changelog-since-v1330-alpha3) - [Changes by Kind](#changes-by-kind-5) - - [Deprecation](#deprecation-2) - - [API Change](#api-change-3) + - [API Change](#api-change-2) - [Feature](#feature-3) - [Bug or Regression](#bug-or-regression-5) - [Other (Cleanup or Flake)](#other-cleanup-or-flake-3) @@ -106,17 +104,19 @@ - [Added](#added-5) - [Changed](#changed-5) - [Removed](#removed-5) -- [v1.33.0-alpha.2](#v1330-alpha2) - - [Downloads for v1.33.0-alpha.2](#downloads-for-v1330-alpha2) +- [v1.33.0-alpha.3](#v1330-alpha3) + - [Downloads for v1.33.0-alpha.3](#downloads-for-v1330-alpha3) - [Source Code](#source-code-6) - [Client Binaries](#client-binaries-6) - [Server Binaries](#server-binaries-6) - [Node Binaries](#node-binaries-6) - [Container Images](#container-images-6) - - [Changelog since v1.33.0-alpha.1](#changelog-since-v1330-alpha1) + - [Changelog since v1.33.0-alpha.2](#changelog-since-v1330-alpha2) + - [Urgent Upgrade Notes](#urgent-upgrade-notes-2) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-2) - [Changes by Kind](#changes-by-kind-6) - - [Deprecation](#deprecation-3) - - [API Change](#api-change-4) + - [Deprecation](#deprecation-2) + - [API Change](#api-change-3) - [Feature](#feature-4) - [Bug or Regression](#bug-or-regression-6) - [Other (Cleanup or Flake)](#other-cleanup-or-flake-4) @@ -124,29 +124,167 @@ - [Added](#added-6) - [Changed](#changed-6) - [Removed](#removed-6) -- [v1.33.0-alpha.1](#v1330-alpha1) - - [Downloads for v1.33.0-alpha.1](#downloads-for-v1330-alpha1) +- [v1.33.0-alpha.2](#v1330-alpha2) + - [Downloads for v1.33.0-alpha.2](#downloads-for-v1330-alpha2) - [Source Code](#source-code-7) - [Client Binaries](#client-binaries-7) - [Server Binaries](#server-binaries-7) - [Node Binaries](#node-binaries-7) - [Container Images](#container-images-7) - - [Changelog since v1.32.0](#changelog-since-v1320-1) - - [Urgent Upgrade Notes](#urgent-upgrade-notes-3) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-3) + - [Changelog since v1.33.0-alpha.1](#changelog-since-v1330-alpha1) - [Changes by Kind](#changes-by-kind-7) - - [API Change](#api-change-5) + - [Deprecation](#deprecation-3) + - [API Change](#api-change-4) - [Feature](#feature-5) - - [Documentation](#documentation-1) - [Bug or Regression](#bug-or-regression-7) - [Other (Cleanup or Flake)](#other-cleanup-or-flake-5) - [Dependencies](#dependencies-7) - [Added](#added-7) - [Changed](#changed-7) - [Removed](#removed-7) +- [v1.33.0-alpha.1](#v1330-alpha1) + - [Downloads for v1.33.0-alpha.1](#downloads-for-v1330-alpha1) + - [Source Code](#source-code-8) + - [Client Binaries](#client-binaries-8) + - [Server Binaries](#server-binaries-8) + - [Node Binaries](#node-binaries-8) + - [Container Images](#container-images-8) + - [Changelog since v1.32.0](#changelog-since-v1320-1) + - [Urgent Upgrade Notes](#urgent-upgrade-notes-3) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-3) + - [Changes by Kind](#changes-by-kind-8) + - [API Change](#api-change-5) + - [Feature](#feature-6) + - [Documentation](#documentation-1) + - [Bug or Regression](#bug-or-regression-8) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-6) + - [Dependencies](#dependencies-8) + - [Added](#added-8) + - [Changed](#changed-8) + - [Removed](#removed-8) +# v1.33.2 + + +## Downloads for v1.33.2 + + + +### Source Code + +filename | sha512 hash +-------- | ----------- +[kubernetes.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes.tar.gz) | 6983c9b0c8005ab8b332eba337ed1ca8d14a1419d6cb26473ffdcf1a3ec564e107ff3baadc7306d01d1cd722470034de8ab936a1040e0d367efdaccbea911432 +[kubernetes-src.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-src.tar.gz) | ab55d41194cdcef73331add791ae438705436f1d280ba615293aa27727cf0cbf82c8d93b50e71ca2a2ab72d77a13232894a6e56a190c5ea7ffac3633606761a9 + +### Client Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-client-darwin-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-darwin-amd64.tar.gz) | 2ee37c2e6592a6f1c5da07c53098747985c644174a0dcba1aab55850382c19fb6ee96ac5f718d8b9a3df42a200d0ef6517deb3396f241a107805ef3e8c5a5729 +[kubernetes-client-darwin-arm64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-darwin-arm64.tar.gz) | 7ef489ef82f1e6d3a4ca0424cf5a09b289a4d8778e52c567ee5dc80779c0d652015343f224f2556ff80b59d9745dd2ec8294955a33f1c6af2073256d8fc54b92 +[kubernetes-client-linux-386.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-386.tar.gz) | 0d1ee8cd9db1a131845bdaab59ff07fcc960468d4d231506ba500e7c361992dcec1530c0f6ba13742f6846052357dbff7b412ee7b95ef4e613afb6b311805f6b +[kubernetes-client-linux-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-amd64.tar.gz) | 1d20d5f3705b2c585afc2814e7cc56f8cf0de223345f8dffb62c625697ae97698c5e9d62a13d9def2db4152c3d636e7eefba9cd6d750167c8bf5150c2034c272 +[kubernetes-client-linux-arm.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-arm.tar.gz) | 41a3043805f20f98157464c3ddd0310336ca417a4775460344fe421dfdd04e3f69b7d99b2495fc1959e566230ae3280d998b5a689de473928d2f8895ea68e3bb +[kubernetes-client-linux-arm64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-arm64.tar.gz) | c82a54169ca775ac85aaa9ed17370eee2addb471442a85d52fa8cf4fbba59b31cef57d328e4cd56f5f6c1489c51203d658aa24ead855bd3518afae5ad993b823 +[kubernetes-client-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-ppc64le.tar.gz) | 0e29bc915785911d6f23c1a6de3ec603db8edcb4504d5d87fca373943d6427fd47f1dfa874afded1157c870953a36caa4da24ca2008857cf664b417d66812f22 +[kubernetes-client-linux-s390x.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-linux-s390x.tar.gz) | 00a38841c1a6419f63db255b76932db7cfd448177b8ae17f9147f4850e4030dce075eeebde5052ac818e5104f21c47b766af10043f0b739aa479509c19b5eb5d +[kubernetes-client-windows-386.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-windows-386.tar.gz) | 963980e4e11ee925a6c4d7b4c82e5e9bb357353be7aaa12368451f507074484a6085367f153c615d25905f3d0d3de67c2793a9e5ee7ed4e67779f646f7ab285c +[kubernetes-client-windows-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-windows-amd64.tar.gz) | e15af258c113f5e0b5d83812b53a4f62fa3550b0c0301a116d91a62fbec0448dc9ac9b825bce11dd5c2c649aa084ae1fc418381de1c51eeb06c38ab99096ec47 +[kubernetes-client-windows-arm64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-client-windows-arm64.tar.gz) | 25e3690418010cb8d5bb9882a60af91e39768650f80f9b2fca910e09917f6d8dec000c17c22011b501e6d72e4ecb4faaed1bc165cb7af4ba82361dce6e664e8c + +### Server Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-server-linux-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-server-linux-amd64.tar.gz) | 1831758107a36c6915d6b4257b44c63cd68e1788fdf412f40401015f483407de116d7cfd4d1e61b5e8ff959d2182a41d6f9b70e2248eb97cba718f3f8715eaa2 +[kubernetes-server-linux-arm64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-server-linux-arm64.tar.gz) | c355f704091efd969c0af60d87d4320b8f9ce6617dcb0429d7702ac85466a40c4ed71d1996c0e480e7bc562ecd49ec36213ee43fb0c98f6502eee1293b0ad01c +[kubernetes-server-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-server-linux-ppc64le.tar.gz) | e1711fcdb303b1685712dd6e3a7cbf2ca209c2a49fa010e36fec1bde6b4df4675b873f843804602dab5705c7d0d7db61d98cc344c5aace009bd008b115d084cc +[kubernetes-server-linux-s390x.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-server-linux-s390x.tar.gz) | 570ec1707d9b08803ab9c307eef3c8a54cba6ffde032246ab3fe2186d6d9c199f353f65f1d798df522c40af53e195bff99ef64e56bfa2c9f3ee6b776ead3ce6f + +### Node Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-node-linux-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-node-linux-amd64.tar.gz) | ac478b9504b153cee9d5fea8595621d65380c1040013d2f55070c1fab5a06a035d1e8ca6c62da3f70d8e2a980d7d30765607fde57c6a27c3b42c2de1270cf18c +[kubernetes-node-linux-arm64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-node-linux-arm64.tar.gz) | b7a0c5d2e51c81a879bc8785eabc10226d7c00e9cb337e572f41f00c8e5d122050401da6cc3a981db2eb8b5295d47fa69a4dc72de8ae4dad9964aa192f2f28ff +[kubernetes-node-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-node-linux-ppc64le.tar.gz) | a64c192e0961089662351f1d74b9de66433064e86e1b986ef704c8e8ecfd9acc5dbe94cd906302666adc7a7463d1e04a36098f4f892ce2350dd66beb8c36d388 +[kubernetes-node-linux-s390x.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-node-linux-s390x.tar.gz) | 4fcdde7c52f82c463effb13bce8b59014a585edff716203dfb33f25223710346501fc49acb245a90e2bc1e99642f23d951ac16aece1d4b167dd71e7c2c622c13 +[kubernetes-node-windows-amd64.tar.gz](https://dl.k8s.io/v1.33.2/kubernetes-node-windows-amd64.tar.gz) | 89d12b1359b15f030afab110195d90227a38420a6ad93c84237317b958c4c13826e35fc9dba687e345c04f38f03e92045714acaee88f4f9c21f3a12a575de609 + +### Container Images + +All container images are available as manifest lists and support the described +architectures. It is also possible to pull a specific architecture directly by +adding the "-$ARCH" suffix to the container image name. + +name | architectures +---- | ------------- +[registry.k8s.io/conformance:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-s390x) +[registry.k8s.io/kube-apiserver:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-s390x) +[registry.k8s.io/kube-controller-manager:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-s390x) +[registry.k8s.io/kube-proxy:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-s390x) +[registry.k8s.io/kube-scheduler:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-s390x) +[registry.k8s.io/kubectl:v1.33.2](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-s390x) + +## Changelog since v1.33.1 + +## Important Security Information + +This release contains changes that address the following vulnerabilities: + +### CVE-2025-4563: Nodes can bypass dynamic resource allocation authorization checks + +A vulnerability exists in the NodeRestriction admission controller where nodes can bypass dynamic resource allocation authorization checks. When the DynamicResourceAllocation feature gate is enabled, the controller properly validates resource claim statuses during pod status updates but fails to perform equivalent validation during pod creation. This allows a compromised node to create mirror pods that access unauthorized dynamic resources, potentially leading to privilege escalation. + + +**Affected Versions**: + - kube-apiserver v1.32.0 - v1.32.5 + - kube-apiserver v1.33.0 - v1.33.1 + +**Fixed Versions**: + - kube-apiserver v1.32.6 + - kube-apiserver v1.33.2 + +This vulnerability was reported by amitschendel. + + +**CVSS Rating:** Low (2.7) [CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:N/I:N/A:L](https://www.first.org/cvss/calculator/3.1#CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:N/I:N/A:L) + +## Changes by Kind + +### Feature + +- Kubernetes is now built using Go 1.24.3 ([#131935](https://github.com/kubernetes/kubernetes/pull/131935), [@cpanato](https://github.com/cpanato)) [SIG Release and Testing] +- Kubernetes is now built using Go 1.24.4 ([#132226](https://github.com/kubernetes/kubernetes/pull/132226), [@cpanato](https://github.com/cpanato)) [SIG Release and Testing] + +### Bug or Regression + +- Do not expand volume on the node, if controller expansion is finished ([#131987](https://github.com/kubernetes/kubernetes/pull/131987), [@gnufied](https://github.com/gnufied)) [SIG Storage] +- Do not log error event when waiting for expansion on the kubelet ([#132098](https://github.com/kubernetes/kubernetes/pull/132098), [@gnufied](https://github.com/gnufied)) [SIG Storage] +- Fixes an issue where Windows kube-proxy's ModifyLoadBalancer API updates did not match HNS state in version 15.4. ModifyLoadBalancer policy is supported from Kubernetes 1.31+. ([#131649](https://github.com/kubernetes/kubernetes/pull/131649), [@princepereira](https://github.com/princepereira)) [SIG Windows] +- Kubelet: close a loophole where static pods could reference arbitrary ResourceClaims. The pods created by the kubelet then don't run due to a sanity check, but such references shouldn't be allowed regardless. ([#131876](https://github.com/kubernetes/kubernetes/pull/131876), [@pohly](https://github.com/pohly)) [SIG Apps, Auth and Node] +- The shorthand for --output flag in kubectl explain was accidentally deleted, but has been added back. ([#131993](https://github.com/kubernetes/kubernetes/pull/131993), [@superbrothers](https://github.com/superbrothers)) [SIG CLI] + +### Other (Cleanup or Flake) + +- Improve error message when a pod with user namespaces is created and the runtime doesn't support user namespaces. ([#131781](https://github.com/kubernetes/kubernetes/pull/131781), [@rata](https://github.com/rata)) [SIG Node] + +## Dependencies + +### Added +_Nothing has changed._ + +### Changed +- github.com/Microsoft/hnslib: [v0.0.8 → v0.1.1](https://github.com/Microsoft/hnslib/compare/v0.0.8...v0.1.1) + +### Removed +_Nothing has changed._ + + + # v1.33.1 diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go index 480d779df2..8dba40b85d 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go @@ -576,6 +576,15 @@ func (c *Client) MemberPromote(learnerID uint64) error { ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout) defer cancel() + isLearner, err := c.isLearner(learnerID) + if err != nil { + return false, err + } + if !isLearner { + klog.V(1).Infof("[etcd] Member %s was already promoted.", strconv.FormatUint(learnerID, 16)) + return true, nil + } + _, err = cli.MemberPromote(ctx, learnerID) if err == nil { klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %s", strconv.FormatUint(learnerID, 16)) diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/disabled_tests.go b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/disabled_tests.go index a0d940e3f9..4fa657cf5a 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/disabled_tests.go +++ b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/disabled_tests.go @@ -11,16 +11,12 @@ func filterOutDisabledSpecs(specs et.ExtensionTestSpecs) et.ExtensionTestSpecs { "Alpha": { // alpha features that are not gated "[Feature:StorageVersionAPI]", "[Feature:ClusterTrustBundle]", - "[Feature:SELinuxMount]", - "[FeatureGate:SELinuxMount]", "[Feature:DynamicResourceAllocation]", - "[Feature:VolumeAttributesClass]", // disabled Beta "[sig-cli] Kubectl client Kubectl prune with applyset should apply and prune objects", // Alpha feature since k8s 1.27 // 4.19 "[Feature:PodLevelResources]", "[Feature:PodLogsQuerySplitStreams]", // 4.20 - "[Feature:OffByDefault]", "[Feature:CBOR]", }, // tests for features that are not implemented in openshift @@ -36,6 +32,7 @@ func filterOutDisabledSpecs(specs et.ExtensionTestSpecs) et.ExtensionTestSpecs { "[Feature:KubeProxyDaemonSetMigration]", // upgrades are run separately "[Feature:BoundServiceAccountTokenVolume]", // upgrades are run separately "[Feature:StatefulUpgrade]", // upgrades are run separately + "Service CIDRs", // requires extra support from some components }, // tests that rely on special configuration that we do not yet support "SpecialConfig": { @@ -187,6 +184,21 @@ func filterOutDisabledSpecs(specs et.ExtensionTestSpecs) et.ExtensionTestSpecs { "[sig-storage] In-tree Volumes [Driver: vsphere] [Testpattern: Dynamic PV (immediate binding)] topology should fail to schedule a pod which has topologies that conflict with AllowedTopologies", "[sig-storage] In-tree Volumes [Driver: vsphere] [Testpattern: Dynamic PV (immediate binding)] topology should provision a volume and schedule a pod with AllowedTopologies", }, + // tests too slow to be part of conformance + "Slow": { + "[sig-scalability]", // disable from the default set for now + "should create and stop a working application", // Inordinately slow tests + + "[Feature:PerformanceDNS]", // very slow + + "validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP", // 5m, really? + }, + // tests that are known flaky + "Flaky": { + "Job should run a job to completion when tasks sometimes fail and are not locally restarted", // seems flaky, also may require too many resources + // TODO(node): test works when run alone, but not in the suite in CI + "[Feature:HPA] Horizontal pod autoscaling (scale resource: CPU) [sig-autoscaling] ReplicationController light Should scale from 1 pod to 2 pods", + }, } var disabledSpecs et.ExtensionTestSpecs diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/environment_selectors.go b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/environment_selectors.go index 6b451e03de..89c5418351 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/environment_selectors.go +++ b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/environment_selectors.go @@ -2,8 +2,10 @@ package main import ( "fmt" + "strings" et "github.com/openshift-eng/openshift-tests-extension/pkg/extension/extensiontests" + "k8s.io/kubernetes/pkg/features" ) // addEnvironmentSelectors adds the environmentSelector field to appropriate specs to facilitate including or excluding @@ -14,6 +16,7 @@ func addEnvironmentSelectors(specs et.ExtensionTestSpecs) { filterByTopology(specs) filterByNoOptionalCapabilities(specs) filterByNetwork(specs) + filterByFeatureGates(specs) // LoadBalancer tests in 1.31 require explicit platform-specific skips // https://issues.redhat.com/browse/OCPBUGS-38840 @@ -25,6 +28,13 @@ func addEnvironmentSelectors(specs et.ExtensionTestSpecs) { specs.SelectAny([]et.SelectFunction{ // Since these must use "NameContainsAll" they cannot be included in filterByNetwork et.NameContainsAll("NetworkPolicy", "named port"), }).Exclude(et.NetworkEquals("OVNKubernetes")).AddLabel("[Skipped:Network/OVNKubernetes]") + + // SELinux tests marked with [Feature:SELinuxMountReadWriteOncePodOnly] require SELinuxMount + // feature gate **disabled**. + // REBASE NOTE: this will intentionally fail to compile when the feature gate is removed upstream. + // Just remove this check + notify the OCP storage team. + specs.Select(et.NameContains("[Feature:SELinuxMountReadWriteOncePodOnly]")). + Exclude(et.FeatureGateEnabled(string(features.SELinuxMount))) } // filterByPlatform is a helper function to do, simple, "NameContains" filtering on tests by platform @@ -123,6 +133,12 @@ func filterByPlatform(specs et.ExtensionTestSpecs) { // https://issues.redhat.com/browse/OCPBUGS-53249 "[sig-network] LoadBalancers [Feature:LoadBalancer] should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on", }, + // MicroShift identifies itself as "none" + "none": { + // LoadBalancer tests in 1.31 require explicit platform-specific skips + // https://issues.redhat.com/browse/OCPBUGS-53249 + "[sig-network] LoadBalancers [Feature:LoadBalancer] should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on", + }, } for platform, exclusions := range platformExclusions { @@ -259,3 +275,15 @@ func filterByNetwork(specs et.ExtensionTestSpecs) { AddLabel(fmt.Sprintf("[Skipped:%s]", network)) } } + +// filter all tests from feature gates that are not explicitly enabled +func filterByFeatureGates(specs et.ExtensionTestSpecs) { + for _, spec := range specs { + for label := range spec.Labels { + if strings.Contains(label, "FeatureGate:") { + featureGate := strings.TrimPrefix(label, "FeatureGate:") + spec.Exclude(et.FeatureGateDisabled(featureGate)) + } + } + } +} diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/labels.go b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/labels.go index ff60f0156d..747f2db5f3 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/labels.go +++ b/deps/github.com/openshift/kubernetes/openshift-hack/cmd/k8s-tests-ext/labels.go @@ -6,21 +6,6 @@ import ( func addLabelsToSpecs(specs et.ExtensionTestSpecs) { var namesByLabel = map[string][]string{ - // tests too slow to be part of conformance - "[Slow]": { - "[sig-scalability]", // disable from the default set for now - "should create and stop a working application", // Inordinately slow tests - - "[Feature:PerformanceDNS]", // very slow - - "validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP", // 5m, really? - }, - // tests that are known flaky - "[Flaky]": { - "Job should run a job to completion when tasks sometimes fail and are not locally restarted", // seems flaky, also may require too many resources - // TODO(node): test works when run alone, but not in the suite in CI - "[Feature:HPA] Horizontal pod autoscaling (scale resource: CPU) [sig-autoscaling] ReplicationController light Should scale from 1 pod to 2 pods", - }, // tests that must be run without competition "[Serial]": { "[Disruptive]", diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/e2e/annotate/generated/zz_generated.annotations.go b/deps/github.com/openshift/kubernetes/openshift-hack/e2e/annotate/generated/zz_generated.annotations.go index 48eecf34ad..0a564ac269 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/e2e/annotate/generated/zz_generated.annotations.go +++ b/deps/github.com/openshift/kubernetes/openshift-hack/e2e/annotate/generated/zz_generated.annotations.go @@ -1735,6 +1735,12 @@ var Annotations = map[string]string{ "[sig-node] Lease lease API should be available [Conformance]": " [Suite:openshift/conformance/parallel/minimal] [Suite:k8s]", + "[sig-node] Lifecycle Sleep Hook when create a pod with lifecycle hook using sleep action ignore terminated container": " [Suite:openshift/conformance/parallel] [Suite:k8s]", + + "[sig-node] Lifecycle Sleep Hook when create a pod with lifecycle hook using sleep action reduce GracePeriodSeconds during runtime": " [Suite:openshift/conformance/parallel] [Suite:k8s]", + + "[sig-node] Lifecycle Sleep Hook when create a pod with lifecycle hook using sleep action valid prestop hook using sleep action": " [Suite:openshift/conformance/parallel] [Suite:k8s]", + "[sig-node] Mount propagation should propagate mounts within defined scopes": " [Suite:openshift/conformance/parallel] [Suite:k8s]", "[sig-node] NoExecuteTaintManager Multiple Pods [Serial] evicts pods with minTolerationSeconds [Disruptive] [Conformance]": " [Suite:k8s]", @@ -2145,12 +2151,6 @@ var Annotations = map[string]string{ "[sig-node] [Feature:PodLifecycleSleepActionAllowZero] when create a pod with lifecycle hook using sleep action with a duration of zero seconds prestop hook using sleep action with zero duration": " [Suite:openshift/conformance/parallel] [Suite:k8s]", - "[sig-node] [Feature:PodLifecycleSleepAction] when create a pod with lifecycle hook using sleep action ignore terminated container": " [Suite:openshift/conformance/parallel] [Suite:k8s]", - - "[sig-node] [Feature:PodLifecycleSleepAction] when create a pod with lifecycle hook using sleep action reduce GracePeriodSeconds during runtime": " [Suite:openshift/conformance/parallel] [Suite:k8s]", - - "[sig-node] [Feature:PodLifecycleSleepAction] when create a pod with lifecycle hook using sleep action valid prestop hook using sleep action": " [Suite:openshift/conformance/parallel] [Suite:k8s]", - "[sig-node] [Feature:SidecarContainers] Probing restartable init container should *not* be restarted by liveness probe because startup probe delays it": " [Suite:openshift/conformance/parallel] [Suite:k8s]", "[sig-node] [Feature:SidecarContainers] Probing restartable init container should *not* be restarted with a /healthz http liveness probe": " [Suite:openshift/conformance/parallel] [Suite:k8s]", diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel b/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel index 68c9e3b673..331cc88d99 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel +++ b/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel @@ -14,4 +14,4 @@ COPY --from=builder /tmp/build/* /usr/bin/ LABEL io.k8s.display-name="OpenShift Kubernetes Server Commands" \ io.k8s.description="OpenShift is a platform for developing, building, and deploying containerized applications." \ io.openshift.tags="openshift,hyperkube" \ - io.openshift.build.versions="kubernetes=1.33.2" \ No newline at end of file + io.openshift.build.versions="kubernetes=1.33.3" \ No newline at end of file diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/test-kubernetes-e2e.sh b/deps/github.com/openshift/kubernetes/openshift-hack/test-kubernetes-e2e.sh index b74bd86c12..9e7ed7796e 100755 --- a/deps/github.com/openshift/kubernetes/openshift-hack/test-kubernetes-e2e.sh +++ b/deps/github.com/openshift/kubernetes/openshift-hack/test-kubernetes-e2e.sh @@ -31,7 +31,7 @@ NETWORK_SKIPS="\[Skipped:Network/OVNKubernetes\]|\[Feature:Networking-IPv6\]|\[F TEST_SUITE="${TEST_SUITE:-parallel}" COMMON_SKIPS="\[Slow\]|\[Disruptive\]|\[Flaky\]|\[Disabled:.+\]|\[Skipped:${PLATFORM}\]|${NETWORK_SKIPS}" # Skip tests for features that require a TechPreview cluster. TODO: Remove when the feature is enabled by default. -COMMON_SKIPS="\[OCPFeatureGate:VolumeGroupSnapshot\]|${COMMON_SKIPS}" +COMMON_SKIPS="\[OCPFeatureGate:VolumeGroupSnapshot\]|\[Feature:OffByDefault\]|${COMMON_SKIPS}" case "${TEST_SUITE}" in serial) diff --git a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go index 26506e4701..67781bcb4e 100644 --- a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go +++ b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go @@ -1,22 +1,51 @@ package authentication import ( + "cmp" "context" "fmt" "io" + "math" + "slices" + "time" + "golang.org/x/sync/singleflight" "k8s.io/apimachinery/pkg/api/validation" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/cel/library" + "k8s.io/apiserver/pkg/warning" + "k8s.io/klog/v2" + "k8s.io/utils/lru" + + "github.com/google/cel-go/checker" configv1 "github.com/openshift/api/config/v1" + authenticationcel "k8s.io/apiserver/pkg/authentication/cel" crvalidation "k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation" ) const PluginName = "config.openshift.io/ValidateAuthentication" +const ( + wholeResourceExcessiveCostThreshold = 100000000 + excessiveCompileDuration = time.Second + costlyExpressionWarningCount = 3 + + // This is the default KAS request header size limit in bytes. + // Because JWTs are only limited in size by the maximum request header size, + // we can use this fixed value to make pessimistic size estimates by assuming + // that the inputs were decoded from base64-encoded JSON. + // + // This isn't very precise, but can still be used to provide + // end-users a signal that they are potentially doing very expensive + // operations with CEL expressions whose cost is dependent + // on the size of the input. + fixedSize = 1 << 20 +) + // Register registers a plugin func Register(plugins *admission.Plugins) { plugins.Register(PluginName, func(config io.Reader) (admission.Interface, error) { @@ -25,7 +54,9 @@ func Register(plugins *admission.Plugins) { configv1.GroupVersion.WithResource("authentications").GroupResource(): true, }, map[schema.GroupVersionKind]crvalidation.ObjectValidator{ - configv1.GroupVersion.WithKind("Authentication"): authenticationV1{}, + configv1.GroupVersion.WithKind("Authentication"): authenticationV1{ + cel: defaultCelStore(), + }, }) }) } @@ -46,21 +77,66 @@ func toAuthenticationV1(uncastObj runtime.Object) (*configv1.Authentication, fie return obj, nil } -type authenticationV1 struct{} +type celStore struct { + compilingGroup singleFlightDoer + compiledStore compiledExpressionStore + compiler authenticationcel.Compiler + sizeEstimator checker.CostEstimator + timerFactory timerFactory +} + +func defaultCelStore() *celStore { + return &celStore{ + compiledStore: lru.New(100), + compilingGroup: new(singleflight.Group), + compiler: authenticationcel.NewDefaultCompiler(), + sizeEstimator: &fixedSizeEstimator{ + size: fixedSize, + }, + timerFactory: &excessiveCompileTimerFactory{}, + } +} + +type singleFlightDoer interface { + Do(key string, fn func() (any, error)) (any, error, bool) +} + +type compiledExpressionStore interface { + Add(key lru.Key, value interface{}) + Get(key lru.Key) (value interface{}, ok bool) +} + +type timerFactory interface { + Timer(time.Duration, func()) timer +} + +type timer interface { + Stop() bool +} + +type excessiveCompileTimerFactory struct{} + +func (ectf *excessiveCompileTimerFactory) Timer(duration time.Duration, do func()) timer { + return time.AfterFunc(duration, do) +} + +type authenticationV1 struct { + cel *celStore +} -func (authenticationV1) ValidateCreate(_ context.Context, uncastObj runtime.Object) field.ErrorList { +func (a authenticationV1) ValidateCreate(ctx context.Context, uncastObj runtime.Object) field.ErrorList { obj, errs := toAuthenticationV1(uncastObj) if len(errs) > 0 { return errs } errs = append(errs, validation.ValidateObjectMeta(&obj.ObjectMeta, false, crvalidation.RequireNameCluster, field.NewPath("metadata"))...) - errs = append(errs, validateAuthenticationSpecCreate(obj.Spec)...) + errs = append(errs, validateAuthenticationSpecCreate(ctx, obj.Spec, a.cel)...) return errs } -func (authenticationV1) ValidateUpdate(_ context.Context, uncastObj runtime.Object, uncastOldObj runtime.Object) field.ErrorList { +func (a authenticationV1) ValidateUpdate(ctx context.Context, uncastObj runtime.Object, uncastOldObj runtime.Object) field.ErrorList { obj, errs := toAuthenticationV1(uncastObj) if len(errs) > 0 { return errs @@ -71,7 +147,7 @@ func (authenticationV1) ValidateUpdate(_ context.Context, uncastObj runtime.Obje } errs = append(errs, validation.ValidateObjectMetaUpdate(&obj.ObjectMeta, &oldObj.ObjectMeta, field.NewPath("metadata"))...) - errs = append(errs, validateAuthenticationSpecUpdate(obj.Spec, oldObj.Spec)...) + errs = append(errs, validateAuthenticationSpecUpdate(ctx, obj.Spec, oldObj.Spec, a.cel)...) return errs } @@ -92,15 +168,15 @@ func (authenticationV1) ValidateStatusUpdate(_ context.Context, uncastObj runtim return errs } -func validateAuthenticationSpecCreate(spec configv1.AuthenticationSpec) field.ErrorList { - return validateAuthenticationSpec(spec) +func validateAuthenticationSpecCreate(ctx context.Context, spec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { + return validateAuthenticationSpec(ctx, spec, cel) } -func validateAuthenticationSpecUpdate(newspec, oldspec configv1.AuthenticationSpec) field.ErrorList { - return validateAuthenticationSpec(newspec) +func validateAuthenticationSpecUpdate(ctx context.Context, newspec, oldspec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { + return validateAuthenticationSpec(ctx, newspec, cel) } -func validateAuthenticationSpec(spec configv1.AuthenticationSpec) field.ErrorList { +func validateAuthenticationSpec(ctx context.Context, spec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { errs := field.ErrorList{} specField := field.NewPath("spec") @@ -121,14 +197,238 @@ func validateAuthenticationSpec(spec configv1.AuthenticationSpec) field.ErrorLis spec.WebhookTokenAuthenticator, fmt.Sprintf("this field cannot be set with the %q .spec.type", spec.Type), )) } - } errs = append(errs, crvalidation.ValidateConfigMapReference(specField.Child("oauthMetadata"), spec.OAuthMetadata, false)...) + // Perform External OIDC Provider related validations + // ---------------- + + // There is currently no guarantee that these fields are not set when the spec.Type is != OIDC. + // To ensure we are enforcing approriate admission validations at all times, just always iterate through the list + // of OIDC Providers and perform the validations. + // If/when the openshift/api admission validations are updated to enforce that this field is not configured + // when Type != OIDC, this loop should be a no-op due to an empty list. + for i, provider := range spec.OIDCProviders { + errs = append(errs, validateOIDCProvider(ctx, specField.Child("oidcProviders").Index(i), cel, provider)...) + } + // ---------------- + return errs } func validateAuthenticationStatus(status configv1.AuthenticationStatus) field.ErrorList { return crvalidation.ValidateConfigMapReference(field.NewPath("status", "integratedOAuthMetadata"), status.IntegratedOAuthMetadata, false) } + +type costRecorder struct { + Recordings []costRecording +} + +func (cr *costRecorder) AddRecording(field *field.Path, cost uint64) { + cr.Recordings = append(cr.Recordings, costRecording{ + Field: field, + Cost: cost, + }) +} + +type costRecording struct { + Field *field.Path + Cost uint64 +} + +func validateOIDCProvider(ctx context.Context, path *field.Path, cel *celStore, provider configv1.OIDCProvider) field.ErrorList { + costRecorder := &costRecorder{} + + errs := validateClaimMappings(ctx, path, cel, costRecorder, provider.ClaimMappings) + + var totalCELExpressionCost uint64 = 0 + + for _, recording := range costRecorder.Recordings { + totalCELExpressionCost = addCost(totalCELExpressionCost, recording.Cost) + } + + if totalCELExpressionCost > wholeResourceExcessiveCostThreshold { + costlyExpressions := getNMostCostlyExpressions(costlyExpressionWarningCount, costRecorder.Recordings...) + warn := fmt.Sprintf("runtime cost of all CEL expressions exceeds %d points. top %d most costly expressions: %v", wholeResourceExcessiveCostThreshold, len(costlyExpressions), costlyExpressions) + warning.AddWarning(ctx, "", warn) + klog.Warning(warn) + } + + return errs +} + +// addCost adds a cost value to a total value, +// returning the resulting value. +// addCost handles integer overflow errors +// by just always returning the maximum uint64 +// value if an overflow would occur. +func addCost(total, cost uint64) uint64 { + if total > math.MaxUint64-cost { + return math.MaxUint64 + } + + return total + cost +} + +func getNMostCostlyExpressions(n int, records ...costRecording) []costRecording { + // sort in descending order of cost + slices.SortFunc(records, func(a, b costRecording) int { + return cmp.Compare(b.Cost, a.Cost) + }) + + // safely get the N most expensive cost records + if len(records) > n { + return records[:n] + } + + return records +} + +func validateClaimMappings(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, claimMappings configv1.TokenClaimMappings) field.ErrorList { + path = path.Child("claimMappings") + + out := field.ErrorList{} + + out = append(out, validateUIDClaimMapping(ctx, path, cel, costRecorder, claimMappings.UID)...) + out = append(out, validateExtraClaimMapping(ctx, path, cel, costRecorder, claimMappings.Extra...)...) + + return out +} + +func validateUIDClaimMapping(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, uid *configv1.TokenClaimOrExpressionMapping) field.ErrorList { + if uid == nil { + return nil + } + + out := field.ErrorList{} + + if uid.Expression != "" { + childPath := path.Child("uid", "expression") + + out = append(out, validateCELExpression(ctx, cel, costRecorder, childPath, &authenticationcel.ClaimMappingExpression{ + Expression: uid.Expression, + })...) + } + + return out +} + +func validateExtraClaimMapping(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, extras ...configv1.ExtraMapping) field.ErrorList { + out := field.ErrorList{} + for i, extra := range extras { + out = append(out, validateExtra(ctx, path.Child("extra").Index(i), cel, costRecorder, extra)...) + } + + return out +} + +func validateExtra(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, extra configv1.ExtraMapping) field.ErrorList { + childPath := path.Child("valueExpression") + + return validateCELExpression(ctx, cel, costRecorder, childPath, &authenticationcel.ExtraMappingExpression{ + Key: extra.Key, + Expression: extra.ValueExpression, + }) +} + +type celCompileResult struct { + err error + cost uint64 +} + +func validateCELExpression(ctx context.Context, cel *celStore, costRecorder *costRecorder, path *field.Path, accessor authenticationcel.ExpressionAccessor) field.ErrorList { + // if context has been canceled, don't try to compile any expressions + if err := ctx.Err(); err != nil { + return field.ErrorList{field.InternalError(path, err)} + } + + result, err, _ := cel.compilingGroup.Do(accessor.GetExpression(), func() (interface{}, error) { + // if the expression is not currently being compiled, it might have already been compiled + if val, ok := cel.compiledStore.Get(accessor.GetExpression()); ok { + res, ok := val.(celCompileResult) + if !ok { + return nil, fmt.Errorf("expected return value from cache of compiled expressions to be of type celCompileResult but was %T", val) + } + + return res, nil + } + + // expression is not currently being compiled, and has not been compiled before (or has been long enough since it was last compiled that we dropped it). + // Let's compile it. + + // Asynchronously handle excessive compilation time so we + // can still log a warning in the event the process has died + // before compilation of the expression has finished. + warningChan := make(chan string, 1) + timer := cel.timerFactory.Timer(excessiveCompileDuration, func() { + defer close(warningChan) + warn := fmt.Sprintf("cel expression %q took excessively long to compile (%s)", accessor.GetExpression(), excessiveCompileDuration) + klog.Warning(warn) + warningChan <- warn + }) + + compRes, compErr := cel.compiler.CompileClaimsExpression(accessor) + + timer.Stop() + + res := celCompileResult{ + err: compErr, + } + + if compRes.AST != nil && compErr == nil { + cost, err := checker.Cost(compRes.AST.NativeRep(), &library.CostEstimator{ + SizeEstimator: cel.sizeEstimator, + }) + // Because we are only warning on excessive cost, we shouldn't prevent the create/update of the resource if we can successfully + // compile the expression but are unable to estimate the cost. The Structured Authentication Configuration feature does not + // gate on cost of expressions, so we are doing a best-effort warning here. + // Instead, default to our best estimate of the worst case cost. + if err != nil { + klog.Errorf("unable to estimate cost for expression %q: %v. Defaulting cost to %d", accessor.GetExpression(), err, fixedSize) + cost = checker.CostEstimate{Max: fixedSize} + } + + res.cost = cost.Max + } + + // check if we received a warning related to excessive compile time. If not, continue + select { + case warn := <-warningChan: + warning.AddWarning(ctx, "", warn) + default: + } + + cel.compiledStore.Add(accessor.GetExpression(), res) + + return res, nil + }) + if err != nil { + return field.ErrorList{field.InternalError(path, fmt.Errorf("running compilation of expression %q: %v", accessor.GetExpression(), err))} + } + + compileRes, ok := result.(celCompileResult) + if !ok { + return field.ErrorList{field.InternalError(path, fmt.Errorf("expected result to be of type celCompileResult, but got %T", result))} + } + + if compileRes.err != nil { + return field.ErrorList{field.Invalid(path, accessor.GetExpression(), compileRes.err.Error())} + } + + costRecorder.AddRecording(path, compileRes.cost) + + return nil +} + +type fixedSizeEstimator struct { + size uint64 +} + +func (fcse *fixedSizeEstimator) EstimateSize(element checker.AstNode) *checker.SizeEstimate { + return &checker.SizeEstimate{Min: fcse.size, Max: fcse.size} +} + +func (fcse *fixedSizeEstimator) EstimateCallCost(function, overloadID string, target *checker.AstNode, args []checker.AstNode) *checker.CallEstimate { + return nil +} diff --git a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication_test.go b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication_test.go index d93f3f67f6..daf0642db5 100644 --- a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication_test.go +++ b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication_test.go @@ -1,10 +1,18 @@ package authentication import ( + "context" + "errors" + "strings" "testing" + "time" configv1 "github.com/openshift/api/config/v1" + "golang.org/x/sync/singleflight" "k8s.io/apimachinery/pkg/util/validation/field" + authenticationcel "k8s.io/apiserver/pkg/authentication/cel" + "k8s.io/apiserver/pkg/warning" + "k8s.io/utils/lru" ) func TestFailValidateAuthenticationSpec(t *testing.T) { @@ -49,10 +57,50 @@ func TestFailValidateAuthenticationSpec(t *testing.T) { errorType: field.ErrorTypeInvalid, errorField: "spec.webhookTokenAuthenticator", }, + "invalid UID CEL expression": { + spec: configv1.AuthenticationSpec{ + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + UID: &configv1.TokenClaimOrExpressionMapping{ + Expression: "!@^#&(!^@(*#&(", + }, + }, + }, + }, + }, + errorType: field.ErrorTypeInvalid, + errorField: "spec.oidcProviders[0].claimMappings.uid.expression", + }, + "invalid Extra CEL expression": { + spec: configv1.AuthenticationSpec{ + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + Extra: []configv1.ExtraMapping{ + { + Key: "foo/bar", + ValueExpression: "!@*(&#^(!@*)&^&", + }, + }, + }, + }, + }, + }, + errorType: field.ErrorTypeInvalid, + errorField: "spec.oidcProviders[0].claimMappings.extra[0].valueExpression", + }, } for tcName, tc := range errorCases { - errs := validateAuthenticationSpec(tc.spec) + errs := validateAuthenticationSpec(context.TODO(), tc.spec, &celStore{ + compiler: authenticationcel.NewDefaultCompiler(), + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(100), + timerFactory: &excessiveCompileTimerFactory{}, + }) if (len(errs) > 0) != (len(tc.errorType) != 0) { t.Errorf("'%s': expected failure: %t, got: %t", tcName, len(tc.errorType) != 0, len(errs) > 0) } @@ -109,10 +157,42 @@ func TestSucceedValidateAuthenticationSpec(t *testing.T) { {KubeConfig: configv1.SecretNameReference{Name: "thisisawebhook33"}}, }, }, + "valid uid CEL expression": { + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + UID: &configv1.TokenClaimOrExpressionMapping{ + Expression: "claims.uid", + }, + }, + }, + }, + }, + "valid Extra CEL expression": { + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + Extra: []configv1.ExtraMapping{ + { + Key: "foo/bar", + ValueExpression: "claims.roles", + }, + }, + }, + }, + }, + }, } for tcName, s := range successCases { - errs := validateAuthenticationSpec(s) + errs := validateAuthenticationSpec(context.TODO(), s, &celStore{ + compiler: authenticationcel.NewDefaultCompiler(), + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(100), + timerFactory: &excessiveCompileTimerFactory{}, + }) if len(errs) != 0 { t.Errorf("'%s': expected success, but failed: %v", tcName, errs.ToAggregate().Error()) } @@ -175,5 +255,457 @@ func TestSucceedValidateAuthenticationStatus(t *testing.T) { t.Errorf("'%s': expected success, but failed: %v", tcName, errs.ToAggregate().Error()) } } +} + +func TestValidateCELExpression(t *testing.T) { + type testcase struct { + name string + cel func() *celStore + ctx func() context.Context + shouldErr bool + shouldWarn bool + } + + expression := &authenticationcel.ClaimMappingExpression{ + Expression: `["foo", "bar"].exists(x, x == "foo")`, + } + + testcases := []testcase{ + { + name: "does not return a warning when excessive compilation timer is not triggered", + cel: func() *celStore { + return &celStore{ + compiler: &mockCompiler{ + err: nil, + }, + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &mockTimerFactory{ + trigger: false, + }, + } + }, + ctx: func() context.Context { return context.TODO() }, + }, + { + name: "returns a warning when excessive compilation timer is triggered", + cel: func() *celStore { + return &celStore{ + compiler: &mockCompiler{ + err: nil, + }, + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &mockTimerFactory{ + trigger: true, + }, + } + }, + ctx: func() context.Context { return context.TODO() }, + shouldWarn: true, + }, + { + name: "still returns error if excessive compilation timer is triggered and errors out", + cel: func() *celStore { + return &celStore{ + compiler: &mockCompiler{ + err: errors.New("boom"), + }, + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &mockTimerFactory{ + trigger: true, + }, + } + }, + ctx: func() context.Context { return context.TODO() }, + shouldWarn: true, + shouldErr: true, + }, + { + name: "returns an error if the context has been canceled", + cel: func() *celStore { + return &celStore{ + compiler: &mockCompiler{ + err: nil, + }, + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &mockTimerFactory{ + trigger: false, + }, + } + }, + ctx: func() context.Context { + ctx, cancel := context.WithCancel(context.TODO()) + cancel() + return ctx + }, + shouldErr: true, + }, + { + name: "returns already compiled expression results if the expression has been compiled before", + cel: func() *celStore { + compiledLRU := lru.New(1) + res := celCompileResult{ + err: errors.New("boom"), + } + compiledLRU.Add(expression.Expression, res) + + return &celStore{ + compiler: nil, // should never end up calling this + compilingGroup: new(singleflight.Group), + compiledStore: compiledLRU, + timerFactory: &mockTimerFactory{ + trigger: false, + }, + } + }, + ctx: func() context.Context { return context.TODO() }, + shouldErr: true, + shouldWarn: false, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + warningRecorder := &mockWarningRecorder{} + ctx := warning.WithWarningRecorder(tc.ctx(), warningRecorder) + err := validateCELExpression(ctx, tc.cel(), &costRecorder{}, field.NewPath("^"), expression) + if tc.shouldErr != (err != nil) { + t.Fatalf("error expectation does not match actual. expected? %v . received: %v", tc.shouldErr, err) + } + + if tc.shouldWarn != (len(warningRecorder.warnings) > 0) { + t.Fatalf("warning expectation does not match actual. expected? %v . received: %v", tc.shouldWarn, warningRecorder.warnings) + } + }) + } +} + +type mockCompiler struct { + receiver chan error + err error + useDelegate bool + delegate authenticationcel.Compiler + called int +} + +func (mc *mockCompiler) CompileClaimsExpression(expressionAccessor authenticationcel.ExpressionAccessor) (authenticationcel.CompilationResult, error) { + mc.called += 1 + if mc.receiver != nil { + err := <-mc.receiver + return authenticationcel.CompilationResult{}, err + } + return authenticationcel.CompilationResult{}, mc.err +} + +func (mc *mockCompiler) CompileUserExpression(expressionAccessor authenticationcel.ExpressionAccessor) (authenticationcel.CompilationResult, error) { + mc.called += 1 + if mc.receiver != nil { + err := <-mc.receiver + return authenticationcel.CompilationResult{}, err + } + return authenticationcel.CompilationResult{}, mc.err +} + +type mockTimerFactory struct { + trigger bool +} + +func (mct *mockTimerFactory) Timer(_ time.Duration, do func()) timer { + if mct.trigger { + do() + return &mockTimer{done: true} + } + + return &mockTimer{done: false} +} + +type mockTimer struct { + done bool +} + +func (mt *mockTimer) Stop() bool { + return mt.done +} + +type mockCompiledExpressionStore struct { + adds int + gets int + delegate *lru.Cache +} + +func (mces *mockCompiledExpressionStore) Add(key lru.Key, value interface{}) { + mces.adds += 1 + + if mces.delegate != nil { + mces.delegate.Add(key, value) + } +} + +func (mces *mockCompiledExpressionStore) Get(key lru.Key) (interface{}, bool) { + mces.gets += 1 + + if mces.delegate != nil { + return mces.delegate.Get(key) + } + + return nil, false +} + +// signallingSingleFlightGroup is an implementation +// of the singleFlightDoer interface that is used to +// exercise the behavior of a singleflight.Group deduplicating +// work when multiple goroutines attempt to compile the same +// CEL expression +type signallingSingleFlightGroup struct { + singleflight.Group + + // ready is a channel in which the signallingSingleFlightGroup + // can send a signal that it has started work for a key-func pair + ready chan struct{} +} + +func (ssfg *signallingSingleFlightGroup) Do(key string, fn func() (any, error)) (any, error, bool) { + c := ssfg.DoChan(key, fn) + ssfg.ready <- struct{}{} + res := <-c + return res.Val, res.Err, res.Shared +} + +// TestValidateCELExpressionDeduplicatesWork ensures +// that we only do work to compile a CEL expression across +// goroutines once. +// We do this by: +// 1. Mocking the compiler such that it blocks +// until it receives a signal on a channel. +// 2. Mocking the singleFlightDoer with a singleFlightDoer that sends +// a signal on a channel when work has been started on the singleflight.Group +// 3. Spinning N goroutines to compile the same expression, where N is an arbitary number of duplicates +// 4. Waiting until we have received a signal from each spun goroutine that it has started compilation +// of the CEL expression +// 5. Sending an error on the channel the mock compiler is blocking on +// +// This ensures that all spun goroutines are actively "compiling" the CEL +// expression before we tell the compiler to complete compilation. This means +// the first goroutine to actually call the compiler.CompileClaimsExpression method +// will hog the singleflight.Group and the rest of the goroutines will wait for +// it's results. +func TestValidateCELExpressionDeduplicatesWork(t *testing.T) { + // [1] Mock the compiler and have it block until + // we send an error on a channel + receiver := make(chan error) + mCompiler := &mockCompiler{ + receiver: receiver, + } + + mCompiledExpressionStore := &mockCompiledExpressionStore{ + delegate: lru.New(1), + } + + // [2] Mock the singleFlightDoer and send a signal on + // a channel when work has been started on the singleflight.Group + ready := make(chan struct{}) + ssfg := &signallingSingleFlightGroup{ + ready: ready, + } + + cel := &celStore{ + compiler: mCompiler, + compilingGroup: ssfg, + compiledStore: mCompiledExpressionStore, + timerFactory: &mockTimerFactory{ + trigger: false, + }, + } + + expression := &authenticationcel.ClaimMappingExpression{ + Expression: `["foo", "bar"].exists(x, x == "foo")`, + } + + results := make(chan field.ErrorList) + fieldPath := field.NewPath("^") + + // [3] Spin N goroutines to compile the same expression + duplicates := 2 + for range duplicates { + go func() { + results <- validateCELExpression(context.TODO(), cel, &costRecorder{}, fieldPath, expression) + }() + } + + // [4] Wait for N goroutines to be reported as having + // started work via the singleflight.Group + for range duplicates { + <-ready + } + + // [5] Send an error on the channel the mock compiler is blocking on + expectedErr := errors.New("boom") + receiver <- expectedErr + + expectedFieldError := field.ErrorList{field.Invalid(fieldPath, expression.Expression, expectedErr.Error())} + + // singleflight.Group will return the results of the first call to all + // goroutines waiting for the work to be finished. + // Check to ensure all goroutines reported the same results. + for range duplicates { + result := <-results + if result.ToAggregate().Error() != expectedFieldError.ToAggregate().Error() { + t.Fatalf("expected all results to have error %v but got a result with a different error of %v", expectedFieldError.ToAggregate(), result.ToAggregate()) + } + } + + // The mock compiler should have only been called a single time because only the + // first call to the singleflight.Group for the CEL expression should + // have resulted in an actual call to the compiler. + if mCompiler.called == 0 { + t.Fatal("expected compiler to be called, but it was not") + } + + if mCompiler.called > 1 { + t.Fatalf("expected compiler to be called once, but it was called %d times", mCompiler.called) + } + + // The mock cache should have only been called a single time because only the + // first call to the singleflight.Group for the CEL expression should + // have resulted in an actual check to see if the expression has previously been compiled. + if mCompiledExpressionStore.gets == 0 { + t.Fatal("expected cache to have been hit one time, but was never hit") + } + + if mCompiledExpressionStore.gets > 1 { + t.Fatalf("expected cache to have been hit one time, but was hit %d times", mCompiledExpressionStore.gets) + } +} + +func TestValidAuthenticationSpecWithExcessivelyLongCELExpressionCompileTime(t *testing.T) { + authn := configv1.AuthenticationSpec{ + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + UID: &configv1.TokenClaimOrExpressionMapping{ + Expression: "claims.foo", + }, + }, + }, + }, + } + + warningRecorder := &mockWarningRecorder{} + ctx := warning.WithWarningRecorder(context.TODO(), warningRecorder) + + errs := validateAuthenticationSpec(ctx, authn, &celStore{ + compiler: &mockCompiler{}, + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &mockTimerFactory{ + trigger: true, + }, + }) + + if len(errs) > 0 { + t.Fatalf("should not have received any errors, but got: %v", errs.ToAggregate()) + } + + if len(warningRecorder.warnings) != 1 { + t.Fatalf("expected to receive one warning about excessively long cel compilation time, got: %v", warningRecorder.warnings) + } + + if !strings.Contains(warningRecorder.warnings[0], "took excessively long to compile") { + t.Fatalf("expected warning to mention excessively long compile time but instead got: %s", warningRecorder.warnings[0]) + } +} + +func TestValidAuthenticationSpecWithExcessiveCELExpressionRuntimeCost(t *testing.T) { + authn := configv1.AuthenticationSpec{ + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + UID: &configv1.TokenClaimOrExpressionMapping{ + Expression: "claims.map(x, x+x)", + }, + }, + }, + }, + } + + warningRecorder := &mockWarningRecorder{} + ctx := warning.WithWarningRecorder(context.TODO(), warningRecorder) + + errs := validateAuthenticationSpec(ctx, authn, &celStore{ + compiler: authenticationcel.NewDefaultCompiler(), + compilingGroup: new(singleflight.Group), + compiledStore: lru.New(1), + timerFactory: &excessiveCompileTimerFactory{}, + sizeEstimator: &fixedSizeEstimator{ + size: 100000, // enough to blow the whole resource cost warning threshold + }, + }) + + if len(errs) > 0 { + t.Fatalf("should not have received any errors, but got: %v", errs.ToAggregate()) + } + + if len(warningRecorder.warnings) != 1 { + t.Fatalf("expected to receive one warning about excessive runtime cost, got: %v", warningRecorder.warnings) + } + + if !strings.Contains(warningRecorder.warnings[0], "runtime cost of all CEL expressions exceeds") { + t.Fatalf("expected warning to mention excessive runtime cost but instead got: %s", warningRecorder.warnings[0]) + } +} + +func TestValidAuthenticationSpecNoExcessiveCELExpressionRuntimeCostWithSimpleExpressions(t *testing.T) { + authn := configv1.AuthenticationSpec{ + Type: "OIDC", + OIDCProviders: []configv1.OIDCProvider{ + { + ClaimMappings: configv1.TokenClaimMappings{ + UID: &configv1.TokenClaimOrExpressionMapping{ + Expression: "claims.sub", + }, + Extra: []configv1.ExtraMapping{ + { + Key: "test.io/role", + ValueExpression: "claims.role", + }, + { + Key: "test.io/country", + ValueExpression: "claims.country", + }, + // A bit more complex expression + { + Key: "test.io/org", + ValueExpression: "claims.email.endsWith('@test.io') ? 'testOrg' : 'acquiredOrg'", + }, + }, + }, + }, + }, + } + + warningRecorder := &mockWarningRecorder{} + ctx := warning.WithWarningRecorder(context.TODO(), warningRecorder) + + errs := validateAuthenticationSpec(ctx, authn, defaultCelStore()) + + if len(errs) > 0 { + t.Fatalf("should not have received any errors, but got: %v", errs.ToAggregate()) + } + + if len(warningRecorder.warnings) > 0 { + t.Fatalf("should not have received any warnings, but got: %v", warningRecorder.warnings) + } +} + +type mockWarningRecorder struct { + warnings []string +} +func (mwr *mockWarningRecorder) AddWarning(agent, text string) { + mwr.warnings = append(mwr.warnings, text) } diff --git a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission.go b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission.go new file mode 100644 index 0000000000..e3bda3dbe0 --- /dev/null +++ b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission.go @@ -0,0 +1,179 @@ +package performantsecuritypolicy + +import ( + "context" + "fmt" + "io" + + openshiftfeatures "github.com/openshift/api/features" + corev1 "k8s.io/api/core/v1" + "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/admission/initializer" + "k8s.io/apiserver/pkg/audit" + "k8s.io/apiserver/pkg/warning" + "k8s.io/client-go/informers" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/component-base/featuregate" + "k8s.io/klog/v2" + kapi "k8s.io/kubernetes/pkg/apis/core" +) + +const ( + PluginName = "storage.openshift.io/PerformantSecurityPolicy" + fsGroupChangePolicyLabel = "storage.openshift.io/fsgroup-change-policy" + selinuxChangePolicyLabel = "storage.openshift.io/selinux-change-policy" + + warningFormat = "found %s label with invalid %s: %s on %s namespace" +) + +var ( + _ = initializer.WantsExternalKubeInformerFactory(&performantSecurityPolicy{}) + _ = admission.MutationInterface(&performantSecurityPolicy{}) + _ = initializer.WantsFeatures(&performantSecurityPolicy{}) + + fsGroupPolicyPodAuditLabel = fmt.Sprintf("%s-pod", fsGroupChangePolicyLabel) + selinuxPolicyPodAuditLabel = fmt.Sprintf("%s-pod", selinuxChangePolicyLabel) +) + +func Register(plugins *admission.Plugins) { + plugins.Register(PluginName, + func(config io.Reader) (admission.Interface, error) { + return &performantSecurityPolicy{ + Handler: admission.NewHandler(admission.Create), + }, nil + }) +} + +// performantSecurityPolicy checks and applies if a default FSGroupChangePolicy and SELinuxChangePolicy +// should be applied to the pod. +type performantSecurityPolicy struct { + *admission.Handler + storagePerformantSecurityPolicyFeatureEnabled bool + nsLister corev1listers.NamespaceLister +} + +// SetExternalKubeInformerFactory registers an informer +func (c *performantSecurityPolicy) SetExternalKubeInformerFactory(kubeInformers informers.SharedInformerFactory) { + c.nsLister = kubeInformers.Core().V1().Namespaces().Lister() + c.SetReadyFunc(func() bool { + return kubeInformers.Core().V1().Namespaces().Informer().HasSynced() + }) +} + +func (c *performantSecurityPolicy) InspectFeatureGates(featureGates featuregate.FeatureGate) { + c.storagePerformantSecurityPolicyFeatureEnabled = featureGates.Enabled(featuregate.Feature(openshiftfeatures.FeatureGateStoragePerformantSecurityPolicy)) +} + +func (c *performantSecurityPolicy) ValidateInitialization() error { + if c.nsLister == nil { + return fmt.Errorf("%s plugin needs a namespace lister", PluginName) + } + return nil +} + +func (c *performantSecurityPolicy) Admit(ctx context.Context, attributes admission.Attributes, _ admission.ObjectInterfaces) error { + if !c.storagePerformantSecurityPolicyFeatureEnabled { + return nil + } + + if !c.WaitForReady() { + return admission.NewForbidden(attributes, fmt.Errorf("not yet ready to handle request")) + } + + if attributes.GetResource().GroupResource() != kapi.Resource("pods") || + len(attributes.GetSubresource()) > 0 { + return nil + } + + pod, ok := attributes.GetObject().(*kapi.Pod) + if !ok { + return admission.NewForbidden(attributes, fmt.Errorf("unexpected object: %#v", attributes.GetObject())) + } + + ns, err := c.nsLister.Get(pod.Namespace) + if err != nil { + return fmt.Errorf("error listing pod namespace: %v", err) + } + podNameKey := fmt.Sprintf("%s/%s", attributes.GetName(), attributes.GetNamespace()) + + currentFSGroupChangePolicy := extractCurrentFSGroupChangePolicy(pod) + if currentFSGroupChangePolicy == nil { + currentFSGroupChangePolicy = getDefaultFSGroupChangePolicy(ctx, ns) + if currentFSGroupChangePolicy != nil { + klog.V(4).Infof("Setting default FSGroupChangePolicy %s for pod %s", *currentFSGroupChangePolicy, podNameKey) + audit.AddAuditAnnotations(ctx, fsGroupChangePolicyLabel, string(*currentFSGroupChangePolicy), fsGroupPolicyPodAuditLabel, podNameKey) + if pod.Spec.SecurityContext != nil { + pod.Spec.SecurityContext.FSGroupChangePolicy = currentFSGroupChangePolicy + } else { + pod.Spec.SecurityContext = &kapi.PodSecurityContext{ + FSGroupChangePolicy: currentFSGroupChangePolicy, + } + } + } + } + + currentSELinuxChangePolicy := extractCurrentSELinuxChangePolicy(pod) + if currentSELinuxChangePolicy == nil { + currentSELinuxChangePolicy = getDefaultSELinuxChangePolicy(ctx, ns) + if currentSELinuxChangePolicy != nil { + klog.V(4).Infof("Setting default SELinuxChangePolicy %s for pod %s", *currentSELinuxChangePolicy, podNameKey) + audit.AddAuditAnnotations(ctx, selinuxChangePolicyLabel, string(*currentSELinuxChangePolicy), selinuxPolicyPodAuditLabel, podNameKey) + if pod.Spec.SecurityContext != nil { + pod.Spec.SecurityContext.SELinuxChangePolicy = currentSELinuxChangePolicy + } else { + pod.Spec.SecurityContext = &kapi.PodSecurityContext{ + SELinuxChangePolicy: currentSELinuxChangePolicy, + } + } + } + } + return nil +} + +func extractCurrentSELinuxChangePolicy(pod *kapi.Pod) *kapi.PodSELinuxChangePolicy { + if pod.Spec.SecurityContext != nil { + return pod.Spec.SecurityContext.SELinuxChangePolicy + } + + return nil +} + +func extractCurrentFSGroupChangePolicy(pod *kapi.Pod) *kapi.PodFSGroupChangePolicy { + if pod.Spec.SecurityContext != nil { + return pod.Spec.SecurityContext.FSGroupChangePolicy + } + return nil +} + +func getDefaultFSGroupChangePolicy(ctx context.Context, ns *corev1.Namespace) *kapi.PodFSGroupChangePolicy { + fsGroupPolicy, ok := ns.Labels[fsGroupChangePolicyLabel] + if !ok { + return nil + } + policy := kapi.PodFSGroupChangePolicy(fsGroupPolicy) + + if policy == kapi.FSGroupChangeOnRootMismatch || policy == kapi.FSGroupChangeAlways { + return &policy + } + klog.Warningf("found %s label with invalid fsGroupPolicy: %s", fsGroupChangePolicyLabel, fsGroupPolicy) + warning.AddWarning(ctx, "", fmt.Sprintf(warningFormat, fsGroupChangePolicyLabel, "fsGroupPolicy", fsGroupPolicy, ns.Name)) + return nil +} + +func getDefaultSELinuxChangePolicy(ctx context.Context, ns *corev1.Namespace) *kapi.PodSELinuxChangePolicy { + selinuxChangePolicy, ok := ns.Labels[selinuxChangePolicyLabel] + if !ok { + return nil + } + + policy := kapi.PodSELinuxChangePolicy(selinuxChangePolicy) + + if policy == kapi.SELinuxChangePolicyMountOption || policy == kapi.SELinuxChangePolicyRecursive { + return &policy + } + + klog.Warningf("found %s label with invalid selinuxPolicy: %s", selinuxChangePolicyLabel, selinuxChangePolicy) + warning.AddWarning(ctx, "", fmt.Sprintf(warningFormat, selinuxChangePolicyLabel, "selinuxPolicy", selinuxChangePolicy, ns.Name)) + return nil + +} diff --git a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission_test.go b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission_test.go new file mode 100644 index 0000000000..92dff82c20 --- /dev/null +++ b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy/admission_test.go @@ -0,0 +1,225 @@ +package performantsecuritypolicy + +import ( + "context" + "testing" + + openshiftfeatures "github.com/openshift/api/features" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apiserver/pkg/admission" + utilfeature "k8s.io/apiserver/pkg/util/feature" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/component-base/featuregate" + featuregatetesting "k8s.io/component-base/featuregate/testing" + kapi "k8s.io/kubernetes/pkg/apis/core" + _ "k8s.io/kubernetes/pkg/features" +) + +func TestAdmit(t *testing.T) { + type testCase struct { + name string + pod *kapi.Pod + ns *corev1.Namespace + expectedPod *kapi.Pod + expectError bool + featureGateEnabled bool + } + + onRootMismatchPolicy := kapi.FSGroupChangeOnRootMismatch + alwaysFSGroupChangePolicy := kapi.FSGroupChangeAlways + selinuxRecursive := kapi.SELinuxChangePolicyRecursive + selinuxMountOption := kapi.SELinuxChangePolicyMountOption + + testCases := []testCase{ + { + name: "when feature gate is disabled, no changes are made", + featureGateEnabled: false, + pod: getPod(nil, nil), + expectedPod: getPod(nil, nil), + + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + }), + }, + { + name: "when feature gate is enabled, FSGroupChangePolicy is set to OnRootMismatch", + featureGateEnabled: true, + pod: getPod(nil, nil), + expectedPod: getPod(&onRootMismatchPolicy, nil), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + }), + }, + { + name: "when feature is enabled, but namespace label for fsgroupchangepolicy has invalid value, no changes are made", + pod: getPod(nil, nil), + featureGateEnabled: true, + expectedPod: getPod(nil, nil), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "InvalidValue", + }), + }, + { + name: "when feature is enabled, but pod already specifies different fsgroupchangepolicy", + pod: getPod(&alwaysFSGroupChangePolicy, nil), + featureGateEnabled: true, + expectedPod: getPod(&alwaysFSGroupChangePolicy, nil), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + }), + }, + { + name: "when feature is enabled and selinuxchangepolicy is set to Recursive", + featureGateEnabled: true, + pod: getPod(nil, nil), + expectedPod: getPod(nil, &selinuxRecursive), + expectError: false, + ns: getNamespace(map[string]string{ + selinuxChangePolicyLabel: "Recursive", + }), + }, + { + name: "when feature is enabled and selinuxchangepolicy is set to MountOption", + featureGateEnabled: true, + pod: getPod(nil, nil), + expectedPod: getPod(nil, &selinuxMountOption), + expectError: false, + ns: getNamespace(map[string]string{ + selinuxChangePolicyLabel: "MountOption", + }), + }, + { + name: "when feature is enabled, but pod already specifies different selinuxchangepolicy", + pod: getPod(nil, &selinuxRecursive), + featureGateEnabled: true, + expectedPod: getPod(nil, &selinuxRecursive), + expectError: false, + ns: getNamespace(map[string]string{ + selinuxChangePolicyLabel: "MountOption", + }), + }, + { + name: "when feature is enabled and both fsgroupchangepolicy and selinuxchangepolicy are set", + featureGateEnabled: true, + pod: getPod(nil, nil), + expectedPod: getPod(&onRootMismatchPolicy, &selinuxMountOption), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + selinuxChangePolicyLabel: "MountOption", + }), + }, + { + name: "when feature is enabled and both fsgroupchangepolicy and selinuxchangepolicy are set, but pod already specifies different policies", + pod: getPod(&alwaysFSGroupChangePolicy, &selinuxRecursive), + featureGateEnabled: true, + expectedPod: getPod(&alwaysFSGroupChangePolicy, &selinuxRecursive), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + selinuxChangePolicyLabel: "MountOption", + }), + }, + { + name: "when feature is enabled and both fsgroupchangepolicy and selinuxchangepolicy are set, but selinux lable has invalid value", + pod: getPod(nil, nil), + featureGateEnabled: true, + expectedPod: getPod(&onRootMismatchPolicy, nil), + expectError: false, + ns: getNamespace(map[string]string{ + fsGroupChangePolicyLabel: "OnRootMismatch", + selinuxChangePolicyLabel: "InvalidValue", + }), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, featuregate.Feature(openshiftfeatures.FeatureGateStoragePerformantSecurityPolicy), tc.featureGateEnabled) + + psp := &performantSecurityPolicy{} + psp.nsLister = fakeNamespaceLister(tc.ns) + psp.Handler = admission.NewHandler(admission.Create) + + psp.InspectFeatureGates(utilfeature.DefaultFeatureGate) + + if err := psp.ValidateInitialization(); err != nil { + t.Fatalf("failed to validate initialization: %v", err) + } + + namespaceName := tc.ns.Name + podName := tc.pod.Name + gvr := kapi.Resource("pods").WithVersion("version") + attrs := admission.NewAttributesRecord(tc.pod, nil, schema.GroupVersionKind{}, namespaceName, podName, gvr, "", admission.Create, nil, false, nil) + + err := psp.Admit(context.Background(), attrs, nil) + if (err != nil) != tc.expectError { + t.Errorf("expected error: %v, got: %v", tc.expectError, err) + } + if !tc.expectError { + currentFSGroupChangePolicy := getPodFSGroupChangePolicy(tc.pod) + expectedFSGroupChangePolicy := getPodFSGroupChangePolicy(tc.expectedPod) + + if currentFSGroupChangePolicy != expectedFSGroupChangePolicy { + t.Errorf("expected FSGroupChangePolicy %s, got %s", expectedFSGroupChangePolicy, currentFSGroupChangePolicy) + } + + currentSELinuxChangePolicy := getPodSELinuxChangePolicy(tc.pod) + expectedSELinuxChangePolicy := getPodSELinuxChangePolicy(tc.expectedPod) + if currentSELinuxChangePolicy != expectedSELinuxChangePolicy { + t.Errorf("expected SELinuxChangePolicy %s, got %s", expectedSELinuxChangePolicy, currentSELinuxChangePolicy) + } + } + }) + } +} + +func getPod(fsGroupChangePolicy *kapi.PodFSGroupChangePolicy, selinuxChangePolicy *kapi.PodSELinuxChangePolicy) *kapi.Pod { + return &kapi.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + }, + Spec: kapi.PodSpec{ + SecurityContext: &kapi.PodSecurityContext{ + FSGroupChangePolicy: fsGroupChangePolicy, + SELinuxChangePolicy: selinuxChangePolicy, + }, + }, + } +} + +func fakeNamespaceLister(ns *corev1.Namespace) corev1listers.NamespaceLister { + indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + _ = indexer.Add(ns) + return corev1listers.NewNamespaceLister(indexer) +} + +func getNamespace(labels map[string]string) *corev1.Namespace { + return &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-namespace", + Labels: labels, + }, + } +} + +func getPodFSGroupChangePolicy(pod *kapi.Pod) kapi.PodFSGroupChangePolicy { + if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.FSGroupChangePolicy != nil { + return *pod.Spec.SecurityContext.FSGroupChangePolicy + } + return "" +} + +func getPodSELinuxChangePolicy(pod *kapi.Pod) kapi.PodSELinuxChangePolicy { + if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxChangePolicy != nil { + return *pod.Spec.SecurityContext.SELinuxChangePolicy + } + return "" +} diff --git a/deps/github.com/openshift/kubernetes/pkg/controller/job/job_controller.go b/deps/github.com/openshift/kubernetes/pkg/controller/job/job_controller.go index 303d5c3706..b28293e605 100644 --- a/deps/github.com/openshift/kubernetes/pkg/controller/job/job_controller.go +++ b/deps/github.com/openshift/kubernetes/pkg/controller/job/job_controller.go @@ -537,6 +537,12 @@ func (jm *Controller) deleteJob(logger klog.Logger, obj interface{}) { } } jm.enqueueLabelSelector(jobObj) + + key := cache.MetaObjectToName(jobObj).String() + err := jm.podBackoffStore.removeBackoffRecord(key) + if err != nil { + utilruntime.HandleError(fmt.Errorf("error removing backoff record %w", err)) + } } func (jm *Controller) enqueueLabelSelector(jobObj *batch.Job) { diff --git a/deps/github.com/openshift/kubernetes/pkg/features/openshift_features.go b/deps/github.com/openshift/kubernetes/pkg/features/openshift_features.go index 2ed4e14b85..51bbe0b37b 100644 --- a/deps/github.com/openshift/kubernetes/pkg/features/openshift_features.go +++ b/deps/github.com/openshift/kubernetes/pkg/features/openshift_features.go @@ -7,6 +7,7 @@ import ( var RouteExternalCertificate featuregate.Feature = "RouteExternalCertificate" var MinimumKubeletVersion featuregate.Feature = "MinimumKubeletVersion" +var StoragePerformantSecurityPolicy featuregate.Feature = "StoragePerformantSecurityPolicy" // registerOpenshiftFeatures injects openshift-specific feature gates func registerOpenshiftFeatures() { @@ -18,4 +19,8 @@ func registerOpenshiftFeatures() { defaultVersionedKubernetesFeatureGates[MinimumKubeletVersion] = featuregate.VersionedSpecs{ {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha}, } + // Introduced in 4.20 + defaultVersionedKubernetesFeatureGates[StoragePerformantSecurityPolicy] = featuregate.VersionedSpecs{ + {Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Alpha}, + } } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS b/deps/github.com/openshift/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS index d484fa4fc2..a654af9e87 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS @@ -13,5 +13,6 @@ reviewers: approvers: - sjenning - mrunalp + - rphillips component: node diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager.go index 5287ba169b..2eb701c0b9 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager.go @@ -109,14 +109,20 @@ func (m *manager) GetContainerResourceAllocation(podUID types.UID, containerName // UpdatePodFromAllocation overwrites the pod spec with the allocation. // This function does a deep copy only if updates are needed. func (m *manager) UpdatePodFromAllocation(pod *v1.Pod) (*v1.Pod, bool) { - // TODO(tallclair): This clones the whole cache, but we only need 1 pod. - allocs := m.allocated.GetPodResourceInfoMap() - return updatePodFromAllocation(pod, allocs) + if pod == nil { + return pod, false + } + + allocated, ok := m.allocated.GetPodResourceInfo(pod.UID) + if !ok { + return pod, false + } + + return updatePodFromAllocation(pod, allocated) } -func updatePodFromAllocation(pod *v1.Pod, allocs state.PodResourceInfoMap) (*v1.Pod, bool) { - allocated, found := allocs[pod.UID] - if !found { +func updatePodFromAllocation(pod *v1.Pod, allocated state.PodResourceInfo) (*v1.Pod, bool) { + if pod == nil { return pod, false } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager_test.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager_test.go index c9cbc177ec..b18258c081 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/allocation_manager_test.go @@ -103,50 +103,44 @@ func TestUpdatePodFromAllocation(t *testing.T) { tests := []struct { name string pod *v1.Pod - allocs state.PodResourceInfoMap + allocated state.PodResourceInfo expectPod *v1.Pod expectUpdate bool }{{ name: "steady state", pod: pod, - allocs: state.PodResourceInfoMap{ - pod.UID: state.PodResourceInfo{ - ContainerResources: map[string]v1.ResourceRequirements{ - "c1": *pod.Spec.Containers[0].Resources.DeepCopy(), - "c2": *pod.Spec.Containers[1].Resources.DeepCopy(), - "c1-restartable-init": *pod.Spec.InitContainers[0].Resources.DeepCopy(), - "c1-init": *pod.Spec.InitContainers[1].Resources.DeepCopy(), - }, + allocated: state.PodResourceInfo{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c1": *pod.Spec.Containers[0].Resources.DeepCopy(), + "c2": *pod.Spec.Containers[1].Resources.DeepCopy(), + "c1-restartable-init": *pod.Spec.InitContainers[0].Resources.DeepCopy(), + "c1-init": *pod.Spec.InitContainers[1].Resources.DeepCopy(), }, }, expectUpdate: false, }, { name: "no allocations", pod: pod, - allocs: state.PodResourceInfoMap{}, + allocated: state.PodResourceInfo{}, expectUpdate: false, }, { name: "missing container allocation", pod: pod, - allocs: state.PodResourceInfoMap{ - pod.UID: state.PodResourceInfo{ - ContainerResources: map[string]v1.ResourceRequirements{ - "c2": *pod.Spec.Containers[1].Resources.DeepCopy(), - }, + allocated: state.PodResourceInfo{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c2": *pod.Spec.Containers[1].Resources.DeepCopy(), }, }, expectUpdate: false, }, { name: "resized container", pod: pod, - allocs: state.PodResourceInfoMap{ - pod.UID: state.PodResourceInfo{ - ContainerResources: map[string]v1.ResourceRequirements{ - "c1": *resizedPod.Spec.Containers[0].Resources.DeepCopy(), - "c2": *resizedPod.Spec.Containers[1].Resources.DeepCopy(), - "c1-restartable-init": *resizedPod.Spec.InitContainers[0].Resources.DeepCopy(), - "c1-init": *resizedPod.Spec.InitContainers[1].Resources.DeepCopy(), - }, + allocated: state.PodResourceInfo{ + ContainerResources: map[string]v1.ResourceRequirements{ + "c1": *resizedPod.Spec.Containers[0].Resources.DeepCopy(), + "c2": *resizedPod.Spec.Containers[1].Resources.DeepCopy(), + "c1-restartable-init": *resizedPod.Spec.InitContainers[0].Resources.DeepCopy(), + "c1-init": *resizedPod.Spec.InitContainers[1].Resources.DeepCopy(), }, }, expectUpdate: true, @@ -156,7 +150,7 @@ func TestUpdatePodFromAllocation(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pod := test.pod.DeepCopy() - allocatedPod, updated := updatePodFromAllocation(pod, test.allocs) + allocatedPod, updated := updatePodFromAllocation(pod, test.allocated) if test.expectUpdate { assert.True(t, updated, "updated") diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state.go index 96a2421f08..8022e10413 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state.go @@ -50,6 +50,7 @@ func (pr PodResourceInfoMap) Clone() PodResourceInfoMap { type Reader interface { GetContainerResources(podUID types.UID, containerName string) (v1.ResourceRequirements, bool) GetPodResourceInfoMap() PodResourceInfoMap + GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) } type writer interface { diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go index f6c5ce78c4..f41415c015 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go @@ -112,13 +112,20 @@ func (sc *stateCheckpoint) GetContainerResources(podUID types.UID, containerName return sc.cache.GetContainerResources(podUID, containerName) } -// GetPodResourceInfoMap returns current pod resource information +// GetPodResourceInfoMap returns current pod resource information map func (sc *stateCheckpoint) GetPodResourceInfoMap() PodResourceInfoMap { sc.mux.RLock() defer sc.mux.RUnlock() return sc.cache.GetPodResourceInfoMap() } +// GetPodResourceInfo returns current pod resource information +func (sc *stateCheckpoint) GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) { + sc.mux.RLock() + defer sc.mux.RUnlock() + return sc.cache.GetPodResourceInfo(podUID) +} + // SetContainerResoruces sets resources information for a pod's container func (sc *stateCheckpoint) SetContainerResources(podUID types.UID, containerName string, resources v1.ResourceRequirements) error { sc.mux.Lock() @@ -172,6 +179,10 @@ func (sc *noopStateCheckpoint) GetPodResourceInfoMap() PodResourceInfoMap { return nil } +func (sc *noopStateCheckpoint) GetPodResourceInfo(_ types.UID) (PodResourceInfo, bool) { + return PodResourceInfo{}, false +} + func (sc *noopStateCheckpoint) SetContainerResources(_ types.UID, _ string, _ v1.ResourceRequirements) error { return nil } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_mem.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_mem.go index e7e44503c6..e4b5210524 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_mem.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/allocation/state/state_mem.go @@ -65,6 +65,14 @@ func (s *stateMemory) GetPodResourceInfoMap() PodResourceInfoMap { return s.podResources.Clone() } +func (s *stateMemory) GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) { + s.RLock() + defer s.RUnlock() + + resourceInfo, ok := s.podResources[podUID] + return resourceInfo, ok +} + func (s *stateMemory) SetContainerResources(podUID types.UID, containerName string, resources v1.ResourceRequirements) error { s.Lock() defer s.Unlock() diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1.go index d69aba1e88..2a1525bae3 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/klog/v2" podutil "k8s.io/kubernetes/pkg/api/v1/pod" kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/metrics" @@ -36,17 +37,21 @@ type v1PodResourcesServer struct { cpusProvider CPUsProvider memoryProvider MemoryProvider dynamicResourcesProvider DynamicResourcesProvider + useActivePods bool } // NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider // with device information provided by the DevicesProvider func NewV1PodResourcesServer(providers PodResourcesProviders) podresourcesv1.PodResourcesListerServer { + useActivePods := true + klog.InfoS("podresources", "method", "list", "useActivePods", useActivePods) return &v1PodResourcesServer{ podsProvider: providers.Pods, devicesProvider: providers.Devices, cpusProvider: providers.Cpus, memoryProvider: providers.Memory, dynamicResourcesProvider: providers.DynamicResources, + useActivePods: useActivePods, } } @@ -55,7 +60,13 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *podresourcesv1.Lis metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc() metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc() - pods := p.podsProvider.GetPods() + var pods []*v1.Pod + if p.useActivePods { + pods = p.podsProvider.GetActivePods() + } else { + pods = p.podsProvider.GetPods() + } + podResources := make([]*podresourcesv1.PodResources, len(pods)) p.devicesProvider.UpdateAllocatedDevices() diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1_test.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1_test.go index aa1dffd4d0..9ed00a1fbf 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/server_v1_test.go @@ -19,10 +19,12 @@ package podresources import ( "context" "fmt" + "sort" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/mock" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -221,6 +223,7 @@ func TestListPodResourcesV1(t *testing.T) { mockDynamicResourcesProvider := podresourcetest.NewMockDynamicResourcesProvider(t) mockPodsProvider.EXPECT().GetPods().Return(tc.pods).Maybe() + mockPodsProvider.EXPECT().GetActivePods().Return(tc.pods).Maybe() mockDevicesProvider.EXPECT().GetDevices(string(podUID), containerName).Return(tc.devices).Maybe() mockCPUsProvider.EXPECT().GetCPUs(string(podUID), containerName).Return(tc.cpus).Maybe() mockMemoryProvider.EXPECT().GetMemory(string(podUID), containerName).Return(tc.memory).Maybe() @@ -249,6 +252,159 @@ func TestListPodResourcesV1(t *testing.T) { } } +func makePod(idx int) *v1.Pod { + podNamespace := "pod-namespace" + podName := fmt.Sprintf("pod-name-%d", idx) + podUID := types.UID(fmt.Sprintf("pod-uid-%d", idx)) + containerName := fmt.Sprintf("container-name-%d", idx) + containers := []v1.Container{ + { + Name: containerName, + }, + } + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: podNamespace, + UID: podUID, + }, + Spec: v1.PodSpec{ + Containers: containers, + }, + } +} + +func collectNamespacedNamesFromPods(pods []*v1.Pod) []string { + ret := make([]string, 0, len(pods)) + for _, pod := range pods { + ret = append(ret, pod.Namespace+"/"+pod.Name) + } + sort.Strings(ret) + return ret +} + +func collectNamespacedNamesFromPodResources(prs []*podresourcesapi.PodResources) []string { + ret := make([]string, 0, len(prs)) + for _, pr := range prs { + ret = append(ret, pr.Namespace+"/"+pr.Name) + } + sort.Strings(ret) + return ret +} + +func TestListPodResourcesUsesOnlyActivePodsV1(t *testing.T) { + numaID := int64(1) + + // we abuse the fact that we don't care about the assignments, + // so we reuse the same for all pods which is actually wrong. + devs := []*podresourcesapi.ContainerDevices{ + { + ResourceName: "resource", + DeviceIds: []string{"dev0"}, + Topology: &podresourcesapi.TopologyInfo{Nodes: []*podresourcesapi.NUMANode{{ID: numaID}}}, + }, + } + + cpus := []int64{1, 9} + + mems := []*podresourcesapi.ContainerMemory{ + { + MemoryType: "memory", + Size_: 1073741824, + Topology: &podresourcesapi.TopologyInfo{Nodes: []*podresourcesapi.NUMANode{{ID: numaID}}}, + }, + { + MemoryType: "hugepages-1Gi", + Size_: 1073741824, + Topology: &podresourcesapi.TopologyInfo{Nodes: []*podresourcesapi.NUMANode{{ID: numaID}}}, + }, + } + + for _, tc := range []struct { + desc string + pods []*v1.Pod + activePods []*v1.Pod + }{ + { + desc: "no pods", + pods: []*v1.Pod{}, + activePods: []*v1.Pod{}, + }, + { + desc: "no differences", + pods: []*v1.Pod{ + makePod(1), + makePod(2), + makePod(3), + makePod(4), + makePod(5), + }, + activePods: []*v1.Pod{ + makePod(1), + makePod(2), + makePod(3), + makePod(4), + makePod(5), + }, + }, + { + desc: "some terminated pods", + pods: []*v1.Pod{ + makePod(1), + makePod(2), + makePod(3), + makePod(4), + makePod(5), + makePod(6), + makePod(7), + }, + activePods: []*v1.Pod{ + makePod(1), + makePod(3), + makePod(4), + makePod(5), + makePod(6), + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + mockDevicesProvider := podresourcetest.NewMockDevicesProvider(t) + mockPodsProvider := podresourcetest.NewMockPodsProvider(t) + mockCPUsProvider := podresourcetest.NewMockCPUsProvider(t) + mockMemoryProvider := podresourcetest.NewMockMemoryProvider(t) + mockDynamicResourcesProvider := podresourcetest.NewMockDynamicResourcesProvider(t) + + mockPodsProvider.EXPECT().GetPods().Return(tc.pods).Maybe() + mockPodsProvider.EXPECT().GetActivePods().Return(tc.activePods).Maybe() + mockDevicesProvider.EXPECT().GetDevices(mock.Anything, mock.Anything).Return(devs).Maybe() + mockCPUsProvider.EXPECT().GetCPUs(mock.Anything, mock.Anything).Return(cpus).Maybe() + mockMemoryProvider.EXPECT().GetMemory(mock.Anything, mock.Anything).Return(mems).Maybe() + mockDevicesProvider.EXPECT().UpdateAllocatedDevices().Return().Maybe() + mockCPUsProvider.EXPECT().GetAllocatableCPUs().Return([]int64{}).Maybe() + mockDevicesProvider.EXPECT().GetAllocatableDevices().Return([]*podresourcesapi.ContainerDevices{}).Maybe() + mockMemoryProvider.EXPECT().GetAllocatableMemory().Return([]*podresourcesapi.ContainerMemory{}).Maybe() + + providers := PodResourcesProviders{ + Pods: mockPodsProvider, + Devices: mockDevicesProvider, + Cpus: mockCPUsProvider, + Memory: mockMemoryProvider, + DynamicResources: mockDynamicResourcesProvider, + } + server := NewV1PodResourcesServer(providers) + resp, err := server.List(context.TODO(), &podresourcesapi.ListPodResourcesRequest{}) + if err != nil { + t.Errorf("want err = %v, got %q", nil, err) + } + expectedNames := collectNamespacedNamesFromPods(tc.activePods) + gotNames := collectNamespacedNamesFromPodResources(resp.GetPodResources()) + if diff := cmp.Diff(expectedNames, gotNames, cmpopts.EquateEmpty()); diff != "" { + t.Fatal(diff) + } + }) + } +} + func TestListPodResourcesWithInitContainersV1(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.KubeletPodResourcesDynamicResources, true) @@ -423,6 +579,7 @@ func TestListPodResourcesWithInitContainersV1(t *testing.T) { mockDynamicResourcesProvider := podresourcetest.NewMockDynamicResourcesProvider(t) mockPodsProvider.EXPECT().GetPods().Return(tc.pods).Maybe() + mockPodsProvider.EXPECT().GetActivePods().Return(tc.pods).Maybe() tc.mockFunc(tc.pods, mockDevicesProvider, mockCPUsProvider, mockMemoryProvider, mockDynamicResourcesProvider) providers := PodResourcesProviders{ diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/testing/pods_provider.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/testing/pods_provider.go index e0e23736e6..57189793be 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/testing/pods_provider.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/testing/pods_provider.go @@ -37,6 +37,53 @@ func (_m *MockPodsProvider) EXPECT() *MockPodsProvider_Expecter { return &MockPodsProvider_Expecter{mock: &_m.Mock} } +// GetActivePods provides a mock function with no fields +func (_m *MockPodsProvider) GetActivePods() []*v1.Pod { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for GetActivePods") + } + + var r0 []*v1.Pod + if rf, ok := ret.Get(0).(func() []*v1.Pod); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*v1.Pod) + } + } + + return r0 +} + +// MockPodsProvider_GetActivePods_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetActivePods' +type MockPodsProvider_GetActivePods_Call struct { + *mock.Call +} + +// GetActivePods is a helper method to define mock.On call +func (_e *MockPodsProvider_Expecter) GetActivePods() *MockPodsProvider_GetActivePods_Call { + return &MockPodsProvider_GetActivePods_Call{Call: _e.mock.On("GetActivePods")} +} + +func (_c *MockPodsProvider_GetActivePods_Call) Run(run func()) *MockPodsProvider_GetActivePods_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockPodsProvider_GetActivePods_Call) Return(_a0 []*v1.Pod) *MockPodsProvider_GetActivePods_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockPodsProvider_GetActivePods_Call) RunAndReturn(run func() []*v1.Pod) *MockPodsProvider_GetActivePods_Call { + _c.Call.Return(run) + return _c +} + // GetPodByName provides a mock function with given fields: namespace, name func (_m *MockPodsProvider) GetPodByName(namespace string, name string) (*v1.Pod, bool) { ret := _m.Called(namespace, name) diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/types.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/types.go index ee1269d969..66d7c6cfda 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/types.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/apis/podresources/types.go @@ -34,6 +34,7 @@ type DevicesProvider interface { // PodsProvider knows how to provide the pods admitted by the node type PodsProvider interface { + GetActivePods() []*v1.Pod GetPods() []*v1.Pod GetPodByName(namespace, name string) (*v1.Pod, bool) } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/images/image_gc_manager.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/images/image_gc_manager.go index 6065037622..03823deca2 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/images/image_gc_manager.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/images/image_gc_manager.go @@ -521,7 +521,10 @@ func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo, if isRuntimeClassInImageCriAPIEnabled { imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage) } + + im.imageRecordsLock.Lock() delete(im.imageRecords, imageKey) + im.imageRecordsLock.Unlock() metrics.ImageGarbageCollectedTotal.WithLabelValues(reason).Inc() return err diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go index 520da786d6..385179a648 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go @@ -3173,6 +3173,22 @@ func (kl *Kubelet) ListenAndServeReadOnly(address net.IP, port uint, tp trace.Tr server.ListenAndServeKubeletReadOnlyServer(kl, kl.resourceAnalyzer, kl.containerManager.GetHealthCheckers(), kl.flagz, address, port, tp) } +type kubeletPodsProvider struct { + kl *Kubelet +} + +func (pp *kubeletPodsProvider) GetActivePods() []*v1.Pod { + return pp.kl.GetActivePods() +} + +func (pp *kubeletPodsProvider) GetPods() []*v1.Pod { + return pp.kl.podManager.GetPods() +} + +func (pp *kubeletPodsProvider) GetPodByName(namespace, name string) (*v1.Pod, bool) { + return pp.kl.podManager.GetPodByName(namespace, name) +} + // ListenAndServePodResources runs the kubelet podresources grpc service func (kl *Kubelet) ListenAndServePodResources() { endpoint, err := util.LocalEndpoint(kl.getPodResourcesDir(), podresources.Socket) @@ -3182,7 +3198,7 @@ func (kl *Kubelet) ListenAndServePodResources() { } providers := podresources.PodResourcesProviders{ - Pods: kl.podManager, + Pods: &kubeletPodsProvider{kl: kl}, Devices: kl.containerManager, Cpus: kl.containerManager, Memory: kl.containerManager, diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go index 92321bd9bd..c449a3df35 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -137,7 +137,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, // If pod has exclusive cpu and the container in question has integer cpu requests // the cfs quota will not be enforced disableCPUQuota := utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableCPUQuotaWithExclusiveCPUs) && m.containerManager.ContainerHasExclusiveCPUs(pod, container) - klog.V(2).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) + klog.V(5).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) lcr := m.calculateLinuxResources(cpuRequest, cpuLimit, memoryLimit, disableCPUQuota) lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container, diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go index ebf8d4e620..bd2472199d 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go @@ -59,8 +59,8 @@ func (m *kubeGenericRuntimeManager) calculateSandboxResources(pod *v1.Pod) *runt // If pod has exclusive cpu the sandbox will not have cfs quote enforced disableCPUQuota := utilfeature.DefaultFeatureGate.Enabled(features.DisableCPUQuotaWithExclusiveCPUs) && m.containerManager.PodHasExclusiveCPUs(pod) - klog.V(2).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) + klog.V(5).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) return m.calculateLinuxResources(cpuRequest, lim.Cpu(), lim.Memory(), disableCPUQuota) } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed.go index 4063d5381d..d9266e440f 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed.go @@ -117,14 +117,36 @@ func GenerateResourceName(workloadName string) v1.ResourceName { func updateContainers(workloadName string, pod *v1.Pod) error { updateContainer := func(container *v1.Container) error { if container.Resources.Requests == nil { - return fmt.Errorf("managed container %v does not have Resource.Requests", container.Name) + // Nothing to modify, but that is OK, it will not + // change the QoS class of the modified Pod + return nil } - if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { + + _, cpuOk := container.Resources.Requests[v1.ResourceCPU] + _, memoryOk := container.Resources.Requests[v1.ResourceMemory] + + // It is possible memory is configured using limits only and that implies + // requests with the same value, check for that in case memory requests + // are not present by themselves. + if !memoryOk && container.Resources.Limits != nil { + _, memoryOk = container.Resources.Limits[v1.ResourceMemory] + } + + // When both cpu and memory requests are missing, there is nothing + // to do + if !cpuOk && !memoryOk { + return nil + } + + // Both memory and cpu have to be set to make sure stripping them + // will not change the QoS class of the Pod + if !cpuOk { return fmt.Errorf("managed container %v does not have cpu requests", container.Name) } - if _, ok := container.Resources.Requests[v1.ResourceMemory]; !ok { + if !memoryOk { return fmt.Errorf("managed container %v does not have memory requests", container.Name) } + if container.Resources.Limits == nil { container.Resources.Limits = v1.ResourceList{} } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed_test.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed_test.go index e4973f8a01..3a2f2b3a18 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/managed/managed_test.go @@ -19,17 +19,6 @@ func TestModifyStaticPodForPinnedManagementErrorStates(t *testing.T) { pod *v1.Pod expectedError error }{ - { - pod: createPod(workloadAnnotations, nil, - &v1.Container{ - Name: "nginx", - Image: "test/image", - Resources: v1.ResourceRequirements{ - Requests: nil, - }, - }), - expectedError: fmt.Errorf("managed container nginx does not have Resource.Requests"), - }, { pod: createPod(workloadAnnotations, nil, &v1.Container{ @@ -129,6 +118,7 @@ func TestStaticPodManaged(t *testing.T) { pod *v1.Pod expectedAnnotations map[string]string isGuaranteed bool + isBestEffort bool }{ { pod: &v1.Pod{ @@ -168,6 +158,47 @@ func TestStaticPodManaged(t *testing.T) { "resources.workload.openshift.io/nginx": `{"cpushares":102}`, }, }, + { + pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + Annotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "test/image", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceMemory): resource.MustParse("100m"), + v1.ResourceName(v1.ResourceCPU): resource.MustParse("100m"), + }, + Limits: v1.ResourceList{ + v1.ResourceName(v1.ResourceMemory): resource.MustParse("100m"), + }, + }, + }, + }, + SecurityContext: &v1.PodSecurityContext{}, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expectedAnnotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + "resources.workload.openshift.io/nginx": `{"cpushares":102}`, + }, + }, { pod: &v1.Pod{ TypeMeta: metav1.TypeMeta{ @@ -270,6 +301,164 @@ func TestStaticPodManaged(t *testing.T) { "resources.workload.openshift.io/c1": `{"cpushares":20,"cpulimit":100}`, }, }, + { + pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + Annotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{}, + }, + }, + SecurityContext: &v1.PodSecurityContext{}, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expectedAnnotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + isBestEffort: true, + }, + { + pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + Annotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("dummy"): resource.MustParse("20m"), + }, + }, + }, + }, + SecurityContext: &v1.PodSecurityContext{}, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expectedAnnotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + isBestEffort: true, + }, + { + pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + Annotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{}, + }, + }, + InitContainers: []v1.Container{ + { + Name: "ic1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{}, + }, + }, + SecurityContext: &v1.PodSecurityContext{}, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expectedAnnotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + isBestEffort: true, + }, + { + pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + Annotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("dummy"): resource.MustParse("20m"), + }, + }, + }, + }, + InitContainers: []v1.Container{ + { + Name: "ic1", + Image: "test/nginx", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName("dummy"): resource.MustParse("20m"), + }, + }, + }, + }, + SecurityContext: &v1.PodSecurityContext{}, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expectedAnnotations: map[string]string{ + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, + }, + isBestEffort: true, + }, { pod: &v1.Pod{ TypeMeta: metav1.TypeMeta{ @@ -481,15 +670,24 @@ func TestStaticPodManaged(t *testing.T) { if container.Resources.Requests.Cpu().String() != "0" && !tc.isGuaranteed { t.Errorf("cpu requests should be 0 got %v", container.Resources.Requests.Cpu().String()) } - if container.Resources.Requests.Memory().String() == "0" && !tc.isGuaranteed { - t.Errorf("memory requests were %v but should be %v", container.Resources.Requests.Memory().String(), container.Resources.Requests.Memory().String()) + if container.Resources.Requests.Memory().String() == "0" && !tc.isGuaranteed && !tc.isBestEffort { + t.Errorf("memory requests were %v but should be %v in container %v", container.Resources.Requests.Memory().String(), container.Resources.Requests.Memory().String(), container.Name) } - if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed { + if container.Resources.Requests.Memory().String() != "0" && !tc.isGuaranteed && tc.isBestEffort { + t.Errorf("memory requests should be 0 got %v", container.Resources.Requests.Memory().String()) + } + if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed && !tc.isBestEffort { t.Errorf("managed capacity label missing from pod %v and container %v", tc.pod.Name, container.Name) } - if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed { + if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed && !tc.isBestEffort { t.Errorf("managed capacity label missing from pod %v and container %v limits", tc.pod.Name, container.Name) } + if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; exists && tc.isBestEffort { + t.Errorf("managed capacity label present in best-effort pod %v and container %v requests", tc.pod.Name, container.Name) + } + if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; exists && tc.isBestEffort { + t.Errorf("managed capacity label present in best-effort pod %v and container %v limits", tc.pod.Name, container.Name) + } } } } diff --git a/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy.go b/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy.go index 1de03e459d..71cd60c059 100644 --- a/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy.go +++ b/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy.go @@ -379,6 +379,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt isUncountedTerminatedPodsChanged := !apiequality.Semantic.DeepEqual(oldJob.Status.UncountedTerminatedPods, newJob.Status.UncountedTerminatedPods) isReadyChanged := !ptr.Equal(oldJob.Status.Ready, newJob.Status.Ready) isTerminatingChanged := !ptr.Equal(oldJob.Status.Terminating, newJob.Status.Terminating) + isSuspendedWithZeroCompletions := ptr.Equal(newJob.Spec.Suspend, ptr.To(true)) && ptr.Equal(newJob.Spec.Completions, ptr.To[int32](0)) return batchvalidation.JobStatusValidationOptions{ // We allow to decrease the counter for succeeded pods for jobs which @@ -394,7 +395,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt RejectFailedJobWithoutFailureTarget: isJobFailedChanged || isFailedIndexesChanged, RejectCompleteJobWithoutSuccessCriteriaMet: isJobCompleteChanged || isJobSuccessCriteriaMetChanged, RejectFinishedJobWithActivePods: isJobFinishedChanged || isActiveChanged, - RejectFinishedJobWithoutStartTime: isJobFinishedChanged || isStartTimeChanged, + RejectFinishedJobWithoutStartTime: (isJobFinishedChanged || isStartTimeChanged) && !isSuspendedWithZeroCompletions, RejectFinishedJobWithUncountedTerminatedPods: isJobFinishedChanged || isUncountedTerminatedPodsChanged, RejectStartTimeUpdateForUnsuspendedJob: isStartTimeChanged, RejectCompletionTimeBeforeStartTime: isStartTimeChanged || isCompletionTimeChanged, diff --git a/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy_test.go b/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy_test.go index 1c766c4c9f..69b6f7fbd7 100644 --- a/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/registry/batch/job/strategy_test.go @@ -3535,6 +3535,36 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) { {Type: field.ErrorTypeInvalid, Field: "status.ready"}, }, }, + "valid transition to Complete for suspended Job with completions=0; without startTime": { + enableJobManagedBy: true, + job: &batch.Job{ + ObjectMeta: validObjectMeta, + Spec: batch.JobSpec{ + Completions: ptr.To[int32](0), + Suspend: ptr.To(true), + }, + }, + newJob: &batch.Job{ + ObjectMeta: validObjectMeta, + Spec: batch.JobSpec{ + Completions: ptr.To[int32](0), + Suspend: ptr.To(true), + }, + Status: batch.JobStatus{ + CompletionTime: &now, + Conditions: []batch.JobCondition{ + { + Type: batch.JobSuccessCriteriaMet, + Status: api.ConditionTrue, + }, + { + Type: batch.JobComplete, + Status: api.ConditionTrue, + }, + }, + }, + }, + }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation_test.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation_test.go index 690f831af3..70aa2ea266 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation_test.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/compilation_test.go @@ -1872,6 +1872,18 @@ func TestCostEstimation(t *testing.T) { setMaxElements: 1000, expectedSetCost: 401, }, + { + name: "IntOrString type with quantity rule", + schemaGenerator: func(max *int64) *schema.Structural { + intOrString := intOrStringType() + intOrString = withRule(intOrString, "isQuantity(self)") + intOrString = withMaxLength(intOrString, max) + return &intOrString + }, + expectedCalcCost: 314574, + setMaxElements: 20, + expectedSetCost: 9, + }, } for _, testCase := range cases { t.Run(testCase.name, func(t *testing.T) { diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/model/schemas_test.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/model/schemas_test.go index 99c09a99af..9afdd74703 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/model/schemas_test.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel/model/schemas_test.go @@ -34,8 +34,8 @@ func TestSchemaDeclType(t *testing.T) { if cust.TypeName() != "object" { t.Errorf("incorrect type name, got %v, wanted object", cust.TypeName()) } - if len(cust.Fields) != 4 { - t.Errorf("incorrect number of fields, got %d, wanted 4", len(cust.Fields)) + if len(cust.Fields) != 5 { + t.Errorf("incorrect number of fields, got %d, wanted 5", len(cust.Fields)) } for _, f := range cust.Fields { prop, found := ts.Properties[f.Name] @@ -70,6 +70,13 @@ func TestSchemaDeclType(t *testing.T) { } } } + if prop.ValueValidation != nil && prop.ValueValidation.MaxLength != nil { + if f.Type.MaxElements != 4*(*prop.ValueValidation.MaxLength) { + // When converting maxLength to maxElements, it's based on the number of bytes.] + // Worst case is that one rune is 4 bytes, so maxElements should be 4x maxLength. + t.Errorf("field maxElements does not match property 4x maxLength. field: %s, maxElements: %d, maxLength: %d", f.Name, f.Type.MaxElements, *prop.ValueValidation.MaxLength) + } + } } if ts.ValueValidation != nil { for _, name := range ts.ValueValidation.Required { @@ -137,6 +144,7 @@ func testSchema() *schema.Structural { // properties: // name: // type: string + // maxLength: 256 // nested: // type: object // properties: @@ -166,6 +174,12 @@ func testSchema() *schema.Structural { // format: int64 // default: 1 // enum: [1,2,3] + // intOrString: + // x-kubernetes-int-or-string: true + // anyOf: + // - type: "integer" + // - type: "string" + // maxLength: 20 ts := &schema.Structural{ Generic: schema.Generic{ Type: "object", @@ -175,6 +189,9 @@ func testSchema() *schema.Structural { Generic: schema.Generic{ Type: "string", }, + ValueValidation: &schema.ValueValidation{ + MaxLength: maxPtr(256), + }, }, "value": { Generic: schema.Generic{ @@ -245,6 +262,26 @@ func testSchema() *schema.Structural { }, }, }, + "intOrString": { + Extensions: schema.Extensions{ + XIntOrString: true, + }, + ValueValidation: &schema.ValueValidation{ + MaxLength: maxPtr(20), + AnyOf: []schema.NestedValueValidation{ + { + ForbiddenGenerics: schema.Generic{ + Type: "integer", + }, + }, + { + ForbiddenGenerics: schema.Generic{ + Type: "string", + }, + }, + }, + }, + }, }, } return ts diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/cel/common/schemas.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/cel/common/schemas.go index 19392babeb..909284166a 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/cel/common/schemas.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/cel/common/schemas.go @@ -55,8 +55,15 @@ func SchemaDeclType(s Schema, isResourceRoot bool) *apiservercel.DeclType { // `type(intOrStringField) == int ? intOrStringField < 5 : double(intOrStringField.replace('%', '')) < 0.5 // dyn := apiservercel.NewSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1) // smallest value for a serialized x-kubernetes-int-or-string is 0 - // handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string - dyn.MaxElements = maxRequestSizeBytes - 2 + + // If the schema has a maxlength constraint, bound the max elements based on the max length. + // Otherwise, fallback to the max request size. + if s.MaxLength() != nil { + dyn.MaxElements = estimateMaxElementsFromMaxLength(s) + } else { + dyn.MaxElements = estimateMaxStringLengthPerRequest(s) + } + return dyn } @@ -159,11 +166,7 @@ func SchemaDeclType(s Schema, isResourceRoot bool) *apiservercel.DeclType { strWithMaxLength := apiservercel.NewSimpleTypeWithMinSize("string", cel.StringType, types.String(""), apiservercel.MinStringSize) if s.MaxLength() != nil { - // multiply the user-provided max length by 4 in the case of an otherwise-untyped string - // we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points, - // but we need to reason about length for things like request size, so we use bytes in this code (and an individual - // unicode code point can be up to 4 bytes long) - strWithMaxLength.MaxElements = zeroIfNegative(*s.MaxLength()) * 4 + strWithMaxLength.MaxElements = estimateMaxElementsFromMaxLength(s) } else { if len(s.Enum()) > 0 { strWithMaxLength.MaxElements = estimateMaxStringEnumLength(s) @@ -228,6 +231,7 @@ func WithTypeAndObjectMeta(s *spec.Schema) *spec.Schema { // must only be called on schemas of type "string" or x-kubernetes-int-or-string: true func estimateMaxStringLengthPerRequest(s Schema) int64 { if s.IsXIntOrString() { + // handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string return maxRequestSizeBytes - 2 } switch s.Format() { @@ -272,3 +276,13 @@ func estimateMaxAdditionalPropertiesFromMinSize(minSize int64) int64 { // subtract 2 to account for { and } return (maxRequestSizeBytes - 2) / keyValuePairSize } + +// estimateMaxElementsFromMaxLength estimates the maximum number of elements for a string schema +// that is bound with a maxLength constraint. +func estimateMaxElementsFromMaxLength(s Schema) int64 { + // multiply the user-provided max length by 4 in the case of an otherwise-untyped string + // we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points, + // but we need to reason about length for things like request size, so we use bytes in this code (and an individual + // unicode code point can be up to 4 bytes long) + return zeroIfNegative(*s.MaxLength()) * 4 +} diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher_whitebox_test.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher_whitebox_test.go index e7fccdab26..6bd17feba1 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher_whitebox_test.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher_whitebox_test.go @@ -662,6 +662,7 @@ func TestMatchExactResourceVersionFallback(t *testing.T) { } for _, tc := range tcs { t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ListFromCacheSnapshot, true) backingStorage := &dummyStorage{} expectStoreRequests := 0 backingStorage.getListFn = func(_ context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error { @@ -759,6 +760,125 @@ func TestGetListNonRecursiveCacheBypass(t *testing.T) { } } +func TestGetListNonRecursiveCacheWithConsistentListFromCache(t *testing.T) { + // Set feature gates once at the beginning since we only care about ConsistentListFromCache=true and ListFromCacheSnapshot=false + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ConsistentListFromCache, true) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ListFromCacheSnapshot, false) + forceRequestWatchProgressSupport(t) + + tests := []struct { + name string + consistentListFromCache bool + expectGetListCallCount int + expectGetCurrentRV bool + injectRVError bool + expectedError error + }{ + { + name: "ConsistentListFromCache enabled - served from cache", + consistentListFromCache: true, + expectGetListCallCount: 1, + expectGetCurrentRV: true, + injectRVError: false, + expectedError: nil, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var getListCount, getCurrentRVCount int + backingStorage := &dummyStorage{} + + backingStorage.getListFn = func(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error { + getListCount++ + if tc.injectRVError { + return errDummy + } + podList := listObj.(*example.PodList) + podList.ListMeta = metav1.ListMeta{ResourceVersion: "100"} + return nil + } + + backingStorage.getRVFn = func(ctx context.Context) (uint64, error) { + getCurrentRVCount++ + rv := uint64(100) + err := error(nil) + if tc.injectRVError { + err = errDummy + return 0, err + } + return rv, nil + } + + cacher, v, err := newTestCacher(backingStorage) + if err != nil { + t.Fatalf("Couldn't create cacher: %v", err) + } + defer cacher.Stop() + + // Wait for cacher to be ready before injecting errors + if err := cacher.ready.wait(context.Background()); err != nil { + t.Fatalf("unexpected error waiting for the cache to be ready: %v", err) + } + delegator := NewCacheDelegator(cacher, backingStorage) + defer delegator.Stop() + + // Setup test object + key := "pods/ns" + input := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "ns"}} + if err := v.UpdateObject(input, 100); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // Put object into the store + if err := cacher.watchCache.Add(input); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + pred := storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.Everything(), + Limit: 500, + } + result := &example.PodList{} + + // Make the list call with empty RV - delegator will get current RV and use it + err = delegator.GetList(context.TODO(), key, storage.ListOptions{ + ResourceVersion: "", + Predicate: pred, + Recursive: true, + }, result) + + // Verify error matches expectation + if !errors.Is(err, tc.expectedError) { + t.Errorf("Expected error %v, got: %v", tc.expectedError, err) + } + + // Verify the correct storage method was called + if getListCount != tc.expectGetListCallCount { + t.Errorf("Expected GetList to be called %d times, but it was called %d times", tc.expectGetListCallCount, getListCount) + } + if tc.expectGetCurrentRV && getCurrentRVCount == 0 { + t.Error("Expected GetCurrentResourceVersion to be called, but it wasn't") + } + if !tc.expectGetCurrentRV && getCurrentRVCount > 0 { + t.Errorf("Expected GetCurrentResourceVersion not to be called, but it was called %d times", getCurrentRVCount) + } + + // For successful cache reads, verify the resource version + if err == nil { + resultRV, err := cacher.versioner.ParseResourceVersion(result.ResourceVersion) + if err != nil { + t.Fatalf("Failed to parse result resource version: %v", err) + } + expectedRV := uint64(100) + if resultRV != expectedRV { + t.Errorf("Expected RV %d but got %d", expectedRV, resultRV) + } + } + }) + } +} func TestGetCacheBypass(t *testing.T) { backingStorage := &dummyStorage{} cacher, _, err := newTestCacher(backingStorage) diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go index ac17fb1c88..10d2ce4c81 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/storage/cacher/delegator.go @@ -206,6 +206,7 @@ func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.L return c.storage.GetList(ctx, key, opts, listObj) } } + fallbackOpts := opts if result.ConsistentRead { listRV, err = c.storage.GetCurrentResourceVersion(ctx) if err != nil { @@ -213,20 +214,28 @@ func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.L } // Setting resource version for consistent read in cache based on current ResourceVersion in etcd. opts.ResourceVersion = strconv.FormatInt(int64(listRV), 10) + // If continue is not set, we need to set the resource version match to ResourceVersionMatchNotOlderThan to serve latest from cache + if opts.Predicate.Continue == "" { + opts.ResourceVersionMatch = metav1.ResourceVersionMatchNotOlderThan + } } err = c.cacher.GetList(ctx, key, opts, listObj) success := "true" fallback := "false" if err != nil { - if errors.IsResourceExpired(err) { - return c.storage.GetList(ctx, key, opts, listObj) + // ResourceExpired error occurs when attempting to list from cache with a specific resourceVersion + // that is no longer available in the cache. With ListFromCacheSnapshot feature (1.34+), we can + // serve exact resourceVersion requests from cache if available, falling back to storage only when + // the requested version is expired. + if errors.IsResourceExpired(err) && utilfeature.DefaultFeatureGate.Enabled(features.ListFromCacheSnapshot) { + return c.storage.GetList(ctx, key, fallbackOpts, listObj) } if result.ConsistentRead { + // IsTooLargeResourceVersion occurs when the requested RV is higher than cache's current RV + // and cache hasn't caught up within the timeout period. Fall back to etcd. if storage.IsTooLargeResourceVersion(err) { fallback = "true" - // Reset resourceVersion during fallback from consistent read. - opts.ResourceVersion = "" - err = c.storage.GetList(ctx, key, opts, listObj) + err = c.storage.GetList(ctx, key, fallbackOpts, listObj) } if err != nil { success = "false" diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook.go index b03640ae8d..8552e91eb5 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook.go @@ -83,6 +83,7 @@ func NewGenericWebhook(scheme *runtime.Scheme, codecFactory serializer.CodecFact clientConfig := rest.CopyConfig(config) codec := codecFactory.LegacyCodec(groupVersions...) + clientConfig.ContentType = runtime.ContentTypeJSON clientConfig.ContentConfig.NegotiatedSerializer = serializer.NegotiatedSerializerWrapper(runtime.SerializerInfo{Serializer: codec}) clientConfig.Wrap(x509metrics.NewDeprecatedCertificateRoundTripperWrapperConstructor( diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook_test.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook_test.go index 068c6821e5..dba0e3ed6e 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook_test.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver/pkg/util/webhook/webhook_test.go @@ -23,6 +23,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "net" "net/http" "net/http/httptest" @@ -33,11 +34,15 @@ import ( "strings" "testing" "time" + "unicode/utf8" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/util/wait" + exampleinstall "k8s.io/apiserver/pkg/apis/example/install" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" v1 "k8s.io/client-go/tools/clientcmd/api/v1" @@ -927,3 +932,57 @@ func getSingleCounterValueFromRegistry(t *testing.T, r metrics.Gatherer, name st return -1 } + +func TestRESTConfigContentType(t *testing.T) { + server, err := newTestServer(clientCert, clientKey, caCert, func(w http.ResponseWriter, r *http.Request) { + if got := r.Header.Get("Content-Type"); got != runtime.ContentTypeJSON { + t.Errorf("expected request content-type: want %q got %q", runtime.ContentTypeJSON, got) + } + body, err := io.ReadAll(r.Body) + if err != nil { + t.Errorf("failed to read request body: %v", err) + return + } + if err := json.Unmarshal(body, new(any)); err != nil { + switch { + case len(body) == 0: + t.Log("empty request body") + case utf8.Valid(body): + t.Logf("request body: %s", string(body)) + default: + t.Logf("request body: 0x%x", body) + } + t.Errorf("failed to unmarshal request body as json: %v", err) + } + }) + if err != nil { + t.Errorf("failed to create server: %v", err) + return + } + defer server.Close() + + config := &rest.Config{ + ContentConfig: rest.ContentConfig{ + ContentType: "foo/bar", + }, + Host: server.URL, + TLSClientConfig: rest.TLSClientConfig{ + CAData: caCert, + CertData: clientCert, + KeyData: clientKey, + }, + } + + scheme := runtime.NewScheme() + exampleinstall.Install(scheme) + codecs := serializer.NewCodecFactory(scheme) + groupVersions := []schema.GroupVersion{examplev1.SchemeGroupVersion} + wh, err := NewGenericWebhook(scheme, codecs, config, groupVersions, retryBackoff) + if err != nil { + t.Fatalf("failed to create the webhook: %v", err) + } + + if err := wh.RestClient.Post().Body(&examplev1.Pod{}).Do(context.TODO()).Error(); err != nil { + t.Fatalf("failed to complete request: %v", err) + } +} diff --git a/deps/github.com/openshift/kubernetes/test/e2e/common/node/lifecycle_hook.go b/deps/github.com/openshift/kubernetes/test/e2e/common/node/lifecycle_hook.go index a666b87f1f..794913c1d4 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/common/node/lifecycle_hook.go +++ b/deps/github.com/openshift/kubernetes/test/e2e/common/node/lifecycle_hook.go @@ -32,6 +32,7 @@ import ( e2epod "k8s.io/kubernetes/test/e2e/framework/pod" imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" + "k8s.io/utils/ptr" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -551,7 +552,7 @@ func validDuration(duration time.Duration, low, high int64) bool { return duration >= time.Second*time.Duration(low) && duration <= time.Second*time.Duration(high) } -var _ = SIGDescribe(feature.PodLifecycleSleepAction, func() { +var _ = SIGDescribe("Lifecycle Sleep Hook", func() { f := framework.NewDefaultFramework("pod-lifecycle-sleep-action") f.NamespacePodSecurityLevel = admissionapi.LevelBaseline var podClient *e2epod.PodClient @@ -560,73 +561,148 @@ var _ = SIGDescribe(feature.PodLifecycleSleepAction, func() { ginkgo.BeforeEach(func(ctx context.Context) { podClient = e2epod.NewPodClient(f) }) + + var finalizer = "test/finalizer" + /* + Release : v1.34 + Testname: Pod Lifecycle, prestop sleep hook + Description: When a pre-stop handler is specified in the container lifecycle using a 'Sleep' action, then the handler MUST be invoked before the container is terminated. A test pod will be created to verify if its termination time aligns with the sleep time specified when it is terminated. + */ ginkgo.It("valid prestop hook using sleep action", func(ctx context.Context) { + const sleepSeconds = 50 + const gracePeriod = 100 lifecycle := &v1.Lifecycle{ PreStop: &v1.LifecycleHandler{ - Sleep: &v1.SleepAction{Seconds: 5}, + Sleep: &v1.SleepAction{Seconds: sleepSeconds}, }, } - podWithHook := getPodWithHook("pod-with-prestop-sleep-hook", imageutils.GetPauseImageName(), lifecycle) + name := "pod-with-prestop-sleep-hook" + podWithHook := getPodWithHook(name, imageutils.GetPauseImageName(), lifecycle) + podWithHook.Finalizers = append(podWithHook.Finalizers, finalizer) + podWithHook.Spec.TerminationGracePeriodSeconds = ptr.To[int64](gracePeriod) ginkgo.By("create the pod with lifecycle hook using sleep action") - podClient.CreateSync(ctx, podWithHook) + p := podClient.CreateSync(ctx, podWithHook) + defer podClient.RemoveFinalizer(ctx, name, finalizer) ginkgo.By("delete the pod with lifecycle hook using sleep action") - start := time.Now() - podClient.DeleteSync(ctx, podWithHook.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete) - cost := time.Since(start) - // cost should be - // longer than 5 seconds (pod should sleep for 5 seconds) - // shorter than gracePeriodSeconds (default 30 seconds here) - if !validDuration(cost, 5, 30) { - framework.Failf("unexpected delay duration before killing the pod, cost = %v", cost) + _ = podClient.Delete(ctx, podWithHook.Name, metav1.DeleteOptions{}) + p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) + if err != nil { + framework.Failf("failed getting pod after deletion") + } + // deletionTimestamp equals to delete_time + tgps + // TODO: reduce sleep_seconds and tgps after issues.k8s.io/132205 is solved + // we get deletionTimestamp before container become terminated here because of issues.k8s.io/132205 + deletionTS := p.DeletionTimestamp.Time + if err := e2epod.WaitForContainerTerminated(ctx, f.ClientSet, p.Namespace, p.Name, name, sleepSeconds*2*time.Second); err != nil { + framework.Failf("failed waiting for container terminated") + } + + p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) + if err != nil { + framework.Failf("failed getting pod after deletion") + } + // finishAt equals to delete_time + sleep_duration + finishAt := p.Status.ContainerStatuses[0].State.Terminated.FinishedAt + + // sleep_duration = (delete_time + sleep_duration) - (delete_time + tgps) + tgps + sleepDuration := finishAt.Sub(deletionTS) + time.Second*gracePeriod + + // sleep_duration should be + // longer than 50 seconds (pod should sleep for 50 seconds) + // shorter than gracePeriodSeconds (100 seconds here) + if !validDuration(sleepDuration, sleepSeconds, gracePeriod) { + framework.Failf("unexpected delay duration before killing the pod, finishAt = %v, deletionAt= %v", finishAt, deletionTS) } }) + /* + Release : v1.34 + Testname: Pod Lifecycle, prestop sleep hook with low gracePeriodSeconds + Description: When a pre-stop handler is specified in the container lifecycle using a 'Sleep' action, then the handler MUST be invoked before the container is terminated. A test pod will be created, and its `gracePeriodSeconds` will be modified to a value less than the sleep time before termination. The termination time will then be checked to ensure it aligns with the `gracePeriodSeconds` value. + */ ginkgo.It("reduce GracePeriodSeconds during runtime", func(ctx context.Context) { + const sleepSeconds = 50 lifecycle := &v1.Lifecycle{ PreStop: &v1.LifecycleHandler{ - Sleep: &v1.SleepAction{Seconds: 15}, + Sleep: &v1.SleepAction{Seconds: sleepSeconds}, }, } - podWithHook := getPodWithHook("pod-with-prestop-sleep-hook", imageutils.GetPauseImageName(), lifecycle) + name := "pod-with-prestop-sleep-hook" + podWithHook := getPodWithHook(name, imageutils.GetPauseImageName(), lifecycle) + podWithHook.Finalizers = append(podWithHook.Finalizers, finalizer) + podWithHook.Spec.TerminationGracePeriodSeconds = ptr.To[int64](100) ginkgo.By("create the pod with lifecycle hook using sleep action") - podClient.CreateSync(ctx, podWithHook) + p := podClient.CreateSync(ctx, podWithHook) + defer podClient.RemoveFinalizer(ctx, name, finalizer) ginkgo.By("delete the pod with lifecycle hook using sleep action") - start := time.Now() - podClient.DeleteSync(ctx, podWithHook.Name, *metav1.NewDeleteOptions(2), f.Timeouts.PodDelete) - cost := time.Since(start) - // cost should be - // longer than 2 seconds (we change gracePeriodSeconds to 2 seconds here, and it's less than sleep action) + + const gracePeriod = 30 + _ = podClient.Delete(ctx, podWithHook.Name, *metav1.NewDeleteOptions(gracePeriod)) + p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) + if err != nil { + framework.Failf("failed getting pod after deletion") + } + // deletionTimestamp equals to delete_time + tgps + // TODO: reduce sleep_seconds and tgps after issues.k8s.io/132205 is solved + // we get deletionTimestamp before container become terminated here because of issues.k8s.io/132205 + deletionTS := p.DeletionTimestamp.Time + if err := e2epod.WaitForContainerTerminated(ctx, f.ClientSet, p.Namespace, p.Name, name, sleepSeconds*2*time.Second); err != nil { + framework.Failf("failed waiting for container terminated") + } + p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) + if err != nil { + framework.Failf("failed getting pod after deletion") + } + // finishAt equals to delete_time + sleep_duration + finishAt := p.Status.ContainerStatuses[0].State.Terminated.FinishedAt + + // sleep_duration = (delete_time + sleep_duration) - (delete_time + tgps) + tgps + sleepDuration := finishAt.Sub(deletionTS) + time.Second*gracePeriod + // sleep_duration should be + // longer than 30 seconds (we change gracePeriodSeconds to 30 seconds here, and it's less than sleep action) // shorter than sleep action (to make sure it doesn't take effect) - if !validDuration(cost, 2, 15) { - framework.Failf("unexpected delay duration before killing the pod, cost = %v", cost) + if !validDuration(sleepDuration, gracePeriod, sleepSeconds) { + framework.Failf("unexpected delay duration before killing the pod, finishAt = %v, deletionAt= %v", finishAt, deletionTS) } }) + /* + Release : v1.34 + Testname: Pod Lifecycle, prestop sleep hook with erroneous startup command + Description: When a pre-stop handler is specified in the container lifecycle using a 'Sleep' action, then the handler MUST be invoked before the container is terminated. A test pod with an erroneous startup command will be created, and upon termination, it will be checked whether it ignored the sleep time. + */ ginkgo.It("ignore terminated container", func(ctx context.Context) { + const sleepSeconds = 10 + const gracePeriod = 30 lifecycle := &v1.Lifecycle{ PreStop: &v1.LifecycleHandler{ - Sleep: &v1.SleepAction{Seconds: 20}, + Sleep: &v1.SleepAction{Seconds: sleepSeconds}, }, } name := "pod-with-prestop-sleep-hook" podWithHook := getPodWithHook(name, imageutils.GetE2EImage(imageutils.BusyBox), lifecycle) + podWithHook.Spec.TerminationGracePeriodSeconds = ptr.To[int64](gracePeriod) podWithHook.Spec.Containers[0].Command = []string{"/bin/sh"} podWithHook.Spec.Containers[0].Args = []string{"-c", "exit 0"} podWithHook.Spec.RestartPolicy = v1.RestartPolicyNever ginkgo.By("create the pod with lifecycle hook using sleep action") p := podClient.Create(ctx, podWithHook) - framework.ExpectNoError(e2epod.WaitForContainerTerminated(ctx, f.ClientSet, f.Namespace.Name, p.Name, name, 3*time.Minute)) - ginkgo.By("delete the pod with lifecycle hook using sleep action") - start := time.Now() - podClient.DeleteSync(ctx, podWithHook.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete) - cost := time.Since(start) + defer podClient.DeleteSync(ctx, podWithHook.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete) + framework.ExpectNoError(e2epod.WaitForContainerTerminated(ctx, f.ClientSet, f.Namespace.Name, p.Name, name, gracePeriod*time.Second)) + + p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) + if err != nil { + framework.Failf("failed getting pod after deletion") + } + finishAt := p.Status.ContainerStatuses[0].State.Terminated.FinishedAt + startedAt := p.Status.ContainerStatuses[0].State.Terminated.StartedAt + cost := finishAt.Sub(startedAt.Time) // cost should be // shorter than sleep action (container is terminated and sleep action should be ignored) - if !validDuration(cost, 0, 15) { + if !validDuration(cost, 0, sleepSeconds) { framework.Failf("unexpected delay duration before killing the pod, cost = %v", cost) } }) - }) }) diff --git a/deps/github.com/openshift/kubernetes/test/e2e/framework/pod/resize.go b/deps/github.com/openshift/kubernetes/test/e2e/framework/pod/resize.go index af8d43ac34..02cb63853f 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/framework/pod/resize.go +++ b/deps/github.com/openshift/kubernetes/test/e2e/framework/pod/resize.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "strconv" "strings" @@ -321,16 +322,17 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework } tc := makeResizableContainer(ci) if tc.Resources.Limits != nil || tc.Resources.Requests != nil { - var expectedCPUShares int64 + var expectedCPUShares, v1expectedCPUShares, newExpectedCPUShares int64 var expectedMemLimitString string expectedMemLimitInBytes := tc.Resources.Limits.Memory().Value() cpuRequest := tc.Resources.Requests.Cpu() cpuLimit := tc.Resources.Limits.Cpu() if cpuRequest.IsZero() && !cpuLimit.IsZero() { - expectedCPUShares = int64(kubecm.MilliCPUToShares(cpuLimit.MilliValue())) + v1expectedCPUShares = int64(kubecm.MilliCPUToShares(cpuLimit.MilliValue())) } else { - expectedCPUShares = int64(kubecm.MilliCPUToShares(cpuRequest.MilliValue())) + v1expectedCPUShares = int64(kubecm.MilliCPUToShares(cpuRequest.MilliValue())) } + expectedCPUShares = v1expectedCPUShares expectedCPULimits := GetCPULimitCgroupExpectations(cpuLimit) expectedMemLimitString = strconv.FormatInt(expectedMemLimitInBytes, 10) @@ -340,14 +342,17 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework } // convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight value // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 - expectedCPUShares = int64(1 + ((expectedCPUShares-2)*9999)/262142) + expectedCPUShares = int64(1 + ((v1expectedCPUShares-2)*9999)/262142) + // TODO(atokubi): This is required to fix https://github.com/kubernetes/kubernetes/pull/132791 + // This should be dropped in 4.21, because 4.21(=1.34) fix would be a carry pulled from 1.35 + newExpectedCPUShares = ConvertCPUSharesToCgroupV2Value(v1expectedCPUShares) } if expectedMemLimitString != "0" { errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupMemLimit, expectedMemLimitString)) } errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPULimit, expectedCPULimits...)) - errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPURequest, strconv.FormatInt(expectedCPUShares, 10))) + errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPURequest, strconv.FormatInt(expectedCPUShares, 10), strconv.FormatInt(newExpectedCPUShares, 10))) // TODO(vinaykul,InPlacePodVerticalScaling): Verify oom_score_adj when runc adds support for updating it // See https://github.com/opencontainers/runc/pull/4669 } @@ -355,6 +360,34 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework return utilerrors.NewAggregate(errs) } +// ConvertCPUSharesToCgroupV2Value converts CPU shares, used by cgroup v1, +// to CPU weight, used by cgroup v2. +// +// Cgroup v1 CPU shares has a range of [2^1...2^18], i.e. [2...262144], +// and the default value is 1024. +// +// Cgroup v2 CPU weight has a range of [10^0...10^4], i.e. [1...10000], +// and the default value is 100. +// +// This function is identical to https://github.com/opencontainers/cgroups/blob/a3e2ecd1f756a19cee15f85b96337a59c3b5337b/utils.go#L417-L441 +func ConvertCPUSharesToCgroupV2Value(cpuShares int64) int64 { + // The value of 0 means "unset". + if cpuShares == 0 { + return 0 + } + if cpuShares <= 2 { + return 1 + } + if cpuShares >= 262144 { + return 10000 + } + l := math.Log2(float64(cpuShares)) + // Quadratic function which fits min, max, and default. + exponent := (l*l+125*l)/612.0 - 7.0/34.0 + + return int64(math.Ceil(math.Pow(10, exponent))) +} + func verifyPodRestarts(f *framework.Framework, pod *v1.Pod, wantInfo []ResizableContainerInfo) error { ginkgo.GinkgoHelper() diff --git a/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/provisioning.go b/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/provisioning.go index bb220b3ade..a564a31cee 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/provisioning.go +++ b/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/provisioning.go @@ -19,6 +19,8 @@ package testsuites import ( "context" "fmt" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + crdclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "strconv" "strings" "sync" @@ -297,12 +299,34 @@ func (p *provisioningTestSuite) DefineTests(driver storageframework.TestDriver, framework.ExpectNoError(err) ginkgo.DeferCleanup(f.DeleteNamespace, valNamespace.Name) - ginkgo.By("Deploying validator") valManifests := []string{ - "test/e2e/testing-manifests/storage-csi/any-volume-datasource/crd/populator.storage.k8s.io_volumepopulators.yaml", "test/e2e/testing-manifests/storage-csi/any-volume-datasource/volume-data-source-validator/rbac-data-source-validator.yaml", "test/e2e/testing-manifests/storage-csi/any-volume-datasource/volume-data-source-validator/setup-data-source-validator.yaml", } + + crdManifestPath := "test/e2e/testing-manifests/storage-csi/any-volume-datasource/crd/populator.storage.k8s.io_volumepopulators.yaml" + crdItems, err := storageutils.LoadFromManifests(crdManifestPath) + framework.ExpectNoError(err, "Failed to load VolumePopulator CRD manifest") + gomega.Expect(crdItems).To(gomega.HaveLen(1), "Expected exactly one CRD in manifest") + + crd, ok := crdItems[0].(*apiextensionsv1.CustomResourceDefinition) + gomega.Expect(ok).To(gomega.BeTrueBecause("Resource in loaded manifest file is not a CustomResourceDefinition: %s", crdManifestPath)) + + config, err := framework.LoadConfig() + framework.ExpectNoError(err) + apiExtensionClient, err := crdclientset.NewForConfig(config) + framework.ExpectNoError(err) + + ginkgo.By(fmt.Sprintf("Checking if %s CRD exists", crd.Name)) + _, err = apiExtensionClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, crd.Name, metav1.GetOptions{}) + if err != nil && apierrors.IsNotFound(err) { + ginkgo.By("VolumePopulator CRD not found, test will create it and remove when done") + valManifests = append(valManifests, crdManifestPath) + } else if err != nil { + framework.ExpectNoError(err, "Error checking for VolumePopulator CRD existence") + } + + ginkgo.By("Deploying validator") err = storageutils.CreateFromManifests(ctx, f, valNamespace, func(item interface{}) error { return nil }, valManifests...) diff --git a/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/volume_group_snapshottable.go b/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/volume_group_snapshottable.go index ebbd9cc619..0707f557bb 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/volume_group_snapshottable.go +++ b/deps/github.com/openshift/kubernetes/test/e2e/storage/testsuites/volume_group_snapshottable.go @@ -196,7 +196,7 @@ func (s *VolumeGroupSnapshottableTestSuite) DefineTests(driver storageframework. volumeHandle := volume.(map[string]interface{})["volumeHandle"].(string) err = framework.Gomega().Expect(volumeHandle).NotTo(gomega.BeNil()) framework.ExpectNoError(err, "failed to get volume handle from volume") - uid := snapshot.VGSContent.Object["metadata"].(map[string]interface{})["uid"].(string) + uid := snapshot.VGS.Object["metadata"].(map[string]interface{})["uid"].(string) err = framework.Gomega().Expect(uid).NotTo(gomega.BeNil()) framework.ExpectNoError(err, "failed to get uuid from content") volumeSnapshotName := fmt.Sprintf("snapshot-%x", sha256.Sum256([]byte( diff --git a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/csi-hostpath-plugin.yaml b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/csi-hostpath-plugin.yaml index 666dd4e0f7..35dc8c4b29 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/csi-hostpath-plugin.yaml +++ b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/csi-hostpath-plugin.yaml @@ -354,7 +354,7 @@ spec: name: socket-dir - name: csi-snapshotter - image: registry.k8s.io/sig-storage/csi-snapshotter:v8.2.0 + image: registry.k8s.io/sig-storage/csi-snapshotter:v8.3.0 args: - -v=5 - --csi-address=/csi/csi.sock diff --git a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/run_group_snapshot_e2e.sh b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/run_group_snapshot_e2e.sh index 0ff1f9cb58..f62b40d848 100755 --- a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/run_group_snapshot_e2e.sh +++ b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/external-snapshotter/volume-group-snapshots/run_group_snapshot_e2e.sh @@ -266,6 +266,8 @@ run_tests() { export KUBE_CONTAINER_RUNTIME=remote export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock export KUBE_CONTAINER_RUNTIME_NAME=containerd + export SNAPSHOTTER_VERSION="${SNAPSHOTTER_VERSION:-v8.3.0}" + echo "SNAPSHOTTER_VERSION is $SNAPSHOTTER_VERSION" # ginkgo can take forever to exit, so we run it in the background and save the # PID, bash will not run traps while waiting on a process, but it will while # running a builtin like `wait`, saving the PID also allows us to forward the @@ -278,10 +280,10 @@ run_tests() { kubectl apply -f test/e2e/testing-manifests/storage-csi/external-snapshotter/groupsnapshot.storage.k8s.io_volumegroupsnapshotcontents.yaml || exit 1 kubectl apply -f test/e2e/testing-manifests/storage-csi/external-snapshotter/groupsnapshot.storage.k8s.io_volumegroupsnapshots.yaml || exit 1 - kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/refs/tags/v8.2.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml || exit 1 - curl -s https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/refs/tags/v8.2.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml | \ + kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/refs/tags/"${SNAPSHOTTER_VERSION}"/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml || exit 1 + curl -s https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/refs/tags/"${SNAPSHOTTER_VERSION}"/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml | \ awk '/--leader-election=true/ {print; print " - \"--feature-gates=CSIVolumeGroupSnapshot=true\""; next}1' | \ -sed 's|image: registry.k8s.io/sig-storage/snapshot-controller:v8.0.1|image: registry.k8s.io/sig-storage/snapshot-controller:v8.2.0|' | \ +sed "s|image: registry.k8s.io/sig-storage/snapshot-controller:.*|image: registry.k8s.io/sig-storage/snapshot-controller:${SNAPSHOTTER_VERSION}|" | \ kubectl apply -f - || exit 1 diff --git a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/gce-pd/controller_ss.yaml b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/gce-pd/controller_ss.yaml index 86d07389f1..ed77430bf0 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/gce-pd/controller_ss.yaml +++ b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/gce-pd/controller_ss.yaml @@ -21,7 +21,7 @@ spec: serviceAccountName: csi-gce-pd-controller-sa containers: - name: csi-snapshotter - image: registry.k8s.io/sig-storage/csi-snapshotter:v8.2.0 + image: registry.k8s.io/sig-storage/csi-snapshotter:v8.3.0 args: - "--v=5" - "--csi-address=/csi/csi.sock" diff --git a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/hostpath/hostpath/csi-hostpath-plugin.yaml b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/hostpath/hostpath/csi-hostpath-plugin.yaml index 3671e05c18..a5af9814a8 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/hostpath/hostpath/csi-hostpath-plugin.yaml +++ b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/hostpath/hostpath/csi-hostpath-plugin.yaml @@ -354,7 +354,7 @@ spec: name: socket-dir - name: csi-snapshotter - image: registry.k8s.io/sig-storage/csi-snapshotter:v8.2.0 + image: registry.k8s.io/sig-storage/csi-snapshotter:v8.3.0 args: - -v=5 - --csi-address=/csi/csi.sock diff --git a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/mock/csi-mock-driver-snapshotter.yaml b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/mock/csi-mock-driver-snapshotter.yaml index 2be6611d3e..cf5bbba29a 100644 --- a/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/mock/csi-mock-driver-snapshotter.yaml +++ b/deps/github.com/openshift/kubernetes/test/e2e/testing-manifests/storage-csi/mock/csi-mock-driver-snapshotter.yaml @@ -15,7 +15,7 @@ spec: serviceAccountName: csi-mock containers: - name: csi-snapshotter - image: registry.k8s.io/sig-storage/csi-snapshotter:v8.2.0 + image: registry.k8s.io/sig-storage/csi-snapshotter:v8.3.0 args: - "--v=5" - "--csi-address=$(ADDRESS)" diff --git a/deps/github.com/openshift/kubernetes/test/integration/apiserver/cel/validatingadmissionpolicy_test.go b/deps/github.com/openshift/kubernetes/test/integration/apiserver/cel/validatingadmissionpolicy_test.go index ecd5115d26..7d7832bc18 100644 --- a/deps/github.com/openshift/kubernetes/test/integration/apiserver/cel/validatingadmissionpolicy_test.go +++ b/deps/github.com/openshift/kubernetes/test/integration/apiserver/cel/validatingadmissionpolicy_test.go @@ -3059,6 +3059,7 @@ func createAndWaitReadyNamespacedWithWarnHandler(t *testing.T, client clientset. testMarkerName = testMarkerNameAnnotation } + //nolint:staticcheck // SA1019 skip linter to allow cherrypick. marker := &v1.Endpoints{ObjectMeta: metav1.ObjectMeta{Name: testMarkerName, Namespace: ns, Labels: matchLabels}} defer func() { err := client.CoreV1().Endpoints(ns).Delete(context.TODO(), marker.Name, metav1.DeleteOptions{}) diff --git a/deps/github.com/openshift/kubernetes/test/integration/job/job_test.go b/deps/github.com/openshift/kubernetes/test/integration/job/job_test.go index dcfd86edeb..ff15d6b038 100644 --- a/deps/github.com/openshift/kubernetes/test/integration/job/job_test.go +++ b/deps/github.com/openshift/kubernetes/test/integration/job/job_test.go @@ -2282,6 +2282,103 @@ func TestManagedBy_Reenabling(t *testing.T) { }) } +// TestImmediateJobRecreation verifies that the replacement Job creates the Pods +// quickly after re-creation, see https://github.com/kubernetes/kubernetes/issues/132042. +func TestImmediateJobRecreation(t *testing.T) { + // set the backoff delay very high to make sure the test does not pass waiting long on asserts + t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, 2*wait.ForeverTestTimeout)) + closeFn, restConfig, clientSet, ns := setup(t, "recreate-job-immediately") + t.Cleanup(closeFn) + ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig) + t.Cleanup(cancel) + + baseJob := batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns.Name, + }, + Spec: batchv1.JobSpec{ + Completions: ptr.To[int32](1), + Parallelism: ptr.To[int32](1), + Template: v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "main-container", + Image: "foo", + }, + }, + }, + }, + }, + } + jobSpec := func(idx int) batchv1.Job { + spec := baseJob.DeepCopy() + spec.Name = fmt.Sprintf("test-job-%d", idx) + return *spec + } + + var jobObjs []*batchv1.Job + // We create multiple Jobs to make the repro more likely. In particular, we need + // more Jobs than the number of Job controller workers to make it very unlikely + // that syncJob executes (and cleans the in-memory state) before the corresponding + // replacement Jobs are created. + for i := 0; i < 3; i++ { + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, ptr.To(jobSpec(i))) + if err != nil { + t.Fatalf("Error %v when creating the job %q", err, klog.KObj(jobObj)) + } + jobObjs = append(jobObjs, jobObj) + } + + for _, jobObj := range jobObjs { + validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 1, + Ready: ptr.To[int32](0), + Terminating: ptr.To[int32](0), + }) + + if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil { + t.Fatalf("Error %v when setting phase %s on the pod of job %v", err, v1.PodFailed, klog.KObj(jobObj)) + } + + // Await to account for the failed Pod + validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 1, + Ready: ptr.To[int32](0), + Terminating: ptr.To[int32](0), + }) + } + + for i := 0; i < len(jobObjs); i++ { + jobObj := jobObjs[i] + jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace) + if err := jobClient.Delete(ctx, jobObj.Name, metav1.DeleteOptions{ + // Use propagationPolicy=background so that we don't need to wait for the job object to be gone. + PropagationPolicy: ptr.To(metav1.DeletePropagationBackground), + }); err != nil { + t.Fatalf("Error %v when deleting the job %v", err, klog.KObj(jobObj)) + } + + // re-create the job immediately + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, ptr.To(jobSpec(i))) + if err != nil { + t.Fatalf("Error %q while creating the job %q", err, klog.KObj(jobObj)) + } + jobObjs[i] = jobObj + } + + // total timeout (3*5s) is less than 2*ForeverTestTimeout. + for _, jobObj := range jobObjs { + // wait maks 5s for the Active=1. This assert verifies that the backoff + // delay is not applied to the replacement instance of the Job. + validateJobsPodsStatusOnlyWithTimeout(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 1, + Ready: ptr.To[int32](0), + Terminating: ptr.To[int32](0), + }, 5*time.Second) + } +} + // TestManagedBy_RecreatedJob verifies that the Job controller skips // reconciliation of a job with managedBy field, when this is a recreated job, // and there is still a pending sync queued for the previous job. @@ -3984,6 +4081,29 @@ func TestSuspendJob(t *testing.T) { } } +// TestSuspendJobWithZeroCompletions verifies the suspended Job with +// completions=0 is marked as Complete. +func TestSuspendJobWithZeroCompletions(t *testing.T) { + closeFn, restConfig, clientSet, ns := setup(t, "suspended-with-zero-completions") + t.Cleanup(closeFn) + ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig) + t.Cleanup(func() { + cancel() + }) + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ + Spec: batchv1.JobSpec{ + Completions: ptr.To[int32](0), + Suspend: ptr.To(true), + }, + }) + if err != nil { + t.Fatalf("Failed to create Job: %v", err) + } + for _, condition := range []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete} { + validateJobCondition(ctx, t, clientSet, jobObj, condition) + } +} + func TestSuspendJobControllerRestart(t *testing.T) { closeFn, restConfig, clientSet, ns := setup(t, "suspend") t.Cleanup(closeFn) diff --git a/docs/user/howto_metrics_server.md b/docs/user/howto_metrics_server.md new file mode 100644 index 0000000000..0fc76f58a5 --- /dev/null +++ b/docs/user/howto_metrics_server.md @@ -0,0 +1,113 @@ +# Deploying metrics-server in MicroShift +This document describes the basic workflow and changes to deploy [metrics-server](https://github.com/kubernetes-sigs/metrics-server) in MicroShift. + +## Create MicroShift cluster +Use the instructions in the [Getting Started with MicroShift](getting_started.md) document to configure a virtual machine running MicroShift. + +Log into the virtual machine and run the following commands to configure the MicroShift access and check if the PODs are up and running. + +``` +mkdir ~/.kube +sudo cat /var/lib/microshift/resources/kubeadmin/kubeconfig > ~/.kube/config +oc get pods -A +``` + +## Install metrics-server +The [metrics-server](https://github.com/kubernetes-sigs/metrics-server) has a [ready-to-apply yaml file](https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml), but we need to change some parts of it to use the correct certificate authorities for Kubelet. + +These are shared as a ConfigMap: +```bash +$ oc get configmap -n kube-system kubelet-client-ca -o yaml +apiVersion: v1 +data: + ca.crt: | + ***redacted*** +kind: ConfigMap +metadata: + creationTimestamp: "2025-08-19T09:53:48Z" + name: kubelet-client-ca + namespace: kube-system + resourceVersion: "511" + uid: a530b86b-de6e-41d6-ba5e-333f9eebce65 +``` + +In order to use them, we need to mount it as a volume for the metrics-server and use the CA as an argument for the Deployment's command. +```bash +curl -sL https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml | \ +yq 'select(.kind == "Deployment") |= ( +.spec.template.spec.volumes += [{"name": "ca-bundle", "configMap": {"name": "kubelet-client-ca"}}] | +.spec.template.spec.containers[0].volumeMounts += [{"name": "ca-bundle", "mountPath": "/var/run/secrets/kubernetes.io/certs/ca.crt", "subPath": "ca.crt", "readOnly": true}] | +.spec.template.spec.containers[0].args += "--kubelet-certificate-authority=/var/run/secrets/kubernetes.io/certs/ca.crt" +)' | \ +oc apply -f - +``` + +Verify that the application started successfully in the `kube-system` namespace. +```bash +$ oc get pod -n kube-system +NAME READY STATUS RESTARTS AGE +csi-snapshot-controller-56d8f77b99-l5plk 1/1 Running 0 20m +metrics-server-85679c99-gh8v5 1/1 Running 0 30s +``` + +The deployment exposes the metrics API from the apiserver, allowing `top` commands to work: +```bash +$ kubectl top node +NAME CPU(cores) CPU(%) MEMORY(bytes) MEMORY(%) +microshift-dev 187m 4% 1531Mi 41% + +$ kubectl top pod -A +NAMESPACE NAME CPU(cores) MEMORY(bytes) +kube-system csi-snapshot-controller-56d8f77b99-l5plk 1m 9Mi +kube-system metrics-server-85679c99-gh8v5 5m 19Mi +openshift-dns dns-default-l6wl2 3m 32Mi +openshift-dns node-resolver-p8xtp 0m 2Mi +openshift-ingress router-default-67fc5ddcf9-8qrmr 1m 34Mi +openshift-ovn-kubernetes ovnkube-master-dl2ck 10m 208Mi +openshift-ovn-kubernetes ovnkube-node-xmhc8 1m 6Mi +openshift-service-ca service-ca-5dcff54cc7-cf9ht 3m 25Mi +openshift-storage lvms-operator-cf9d8978d-l4bcc 4m 2 +``` + +It is also possible to access the raw API: +```bash +$ oc get --raw /apis/metrics.k8s.io/v1beta1/nodes | jq +{ + "kind": "NodeMetricsList", + "apiVersion": "metrics.k8s.io/v1beta1", + "metadata": {}, + "items": [ + { + "metadata": { + "name": "microshift-dev", + "creationTimestamp": "2025-08-19T10:17:12Z", + "labels": { + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "microshift-dev", + "kubernetes.io/os": "linux", + "node-role.kubernetes.io/control-plane": "", + "node-role.kubernetes.io/master": "", + "node-role.kubernetes.io/worker": "", + "node.kubernetes.io/instance-type": "rhde", + "node.openshift.io/os_id": "rhel", + "topology.topolvm.io/node": "microshift-dev" + } + }, + "timestamp": "2025-08-19T10:17:01Z", + "window": "10.019s", + "usage": { + "cpu": "232316598n", + "memory": "1555732Ki" + } + } + ] +} +``` + +## Cleanup +For deleting all resources we do not need to customize the manifests, so a simple command will do: +```bash +oc delete -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml +``` diff --git a/etcd/cmd/microshift-etcd/run.go b/etcd/cmd/microshift-etcd/run.go index 1638494a19..c0a1204a19 100644 --- a/etcd/cmd/microshift-etcd/run.go +++ b/etcd/cmd/microshift-etcd/run.go @@ -22,6 +22,12 @@ import ( "k8s.io/klog/v2" ) +const ( + // MaxLearners determines the maximum number of etcd learners in the cluster. This is + // needed to support topology transitions without losing high availability. + MaxLearners = 2 +) + func NewRunEtcdCommand() *cobra.Command { cmd := &cobra.Command{ Use: "run", @@ -95,6 +101,8 @@ func (s *EtcdService) configure(cfg *config.Config) { s.etcdCfg.PeerTLSInfo.CertFile = cryptomaterial.PeerCertPath(etcdPeerCertDir) s.etcdCfg.PeerTLSInfo.KeyFile = cryptomaterial.PeerKeyPath(etcdPeerCertDir) s.etcdCfg.PeerTLSInfo.TrustedCAFile = etcdSignerCertPath + + s.etcdCfg.ExperimentalMaxLearners = MaxLearners } func (s *EtcdService) Run() error { diff --git a/etcd/go.mod b/etcd/go.mod index a1cd2c7cba..cec06d5eed 100644 --- a/etcd/go.mod +++ b/etcd/go.mod @@ -15,11 +15,11 @@ require ( github.com/openshift/build-machinery-go v0.0.0-20250602125535-1b6d00b8c37c github.com/spf13/cobra v1.9.1 go.etcd.io/etcd/server/v3 v3.5.21 - k8s.io/apimachinery v1.33.2 - k8s.io/cli-runtime v1.33.2 - k8s.io/component-base v1.33.2 + k8s.io/apimachinery v1.33.3 + k8s.io/cli-runtime v1.33.3 + k8s.io/component-base v1.33.3 k8s.io/klog/v2 v2.130.1 - k8s.io/kubectl v1.33.2 + k8s.io/kubectl v1.33.3 sigs.k8s.io/yaml v1.5.0 ) @@ -48,8 +48,8 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20250115164207-1a7da9e5054f // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect - k8s.io/apiserver v1.33.2 // indirect - k8s.io/kubelet v1.33.2 // indirect + k8s.io/apiserver v1.33.3 // indirect + k8s.io/kubelet v1.33.3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect ) @@ -138,8 +138,8 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v1.33.2 // indirect - k8s.io/client-go v1.33.2 // indirect + k8s.io/api v1.33.3 // indirect + k8s.io/client-go v1.33.3 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect @@ -150,12 +150,12 @@ require ( replace ( github.com/onsi/ginkgo/v2 => github.com/openshift/onsi-ginkgo/v2 v2.6.1-0.20250416174521-4eb003743b54 // from kubernetes - go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/pkg/v3 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/raft/v3 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/server/v3 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd + go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/pkg/v3 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/raft/v3 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/server/v3 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd ) replace ( diff --git a/etcd/go.sum b/etcd/go.sum index 668322da45..2367ec32af 100644 --- a/etcd/go.sum +++ b/etcd/go.sum @@ -167,18 +167,18 @@ github.com/openshift/api v0.0.0-20250722054545-bc3bc4882520 h1:VPH+9zGL1LsRsEjkl github.com/openshift/api v0.0.0-20250722054545-bc3bc4882520/go.mod h1:SPLf21TYPipzCO67BURkCfK6dcIIxx0oNRVWaOyRcXM= github.com/openshift/build-machinery-go v0.0.0-20250602125535-1b6d00b8c37c h1:gJvhduWIrpzoUTwrJjjeul+hGETKkhRhEZosBg/X3Hg= github.com/openshift/build-machinery-go v0.0.0-20250602125535-1b6d00b8c37c/go.mod h1:8jcm8UPtg2mCAsxfqKil1xrmRMI3a+XU2TZ9fF8A7TE= -github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:VJkieEgHkLuR7FsXua509Zl/MiSJYoDoYtyeGfIXiBA= -github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= -github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:vQoJkeJhBLemRPTJwG9sMpOIRg7n/8JOZs5iY0pLQkk= -github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= -github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:710Y8IQ8Y4uIF/UBvRWKqQ5QLllXhlD7rpsJ4DWQmoY= -github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= -github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:BD9mcIWv49DoMoe9mZ9dvq/+AVOwNwuKN9F2Iqe3qrg= -github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:wpZx8Egv1g4y+N7JAsqi2zoUiBIUWznLjqJbylDjWgU= -github.com/openshift/etcd/raft/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:PMnPXZQ3XTMe60e6P+eJuKmPRxVFm+W5VVadRmYalZk= -github.com/openshift/etcd/raft/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:fmcuY5R2SNkklU4+fKVBQi2biVp5vafMrWUEj4TJ4Cs= -github.com/openshift/etcd/server/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:oVJWM6N0SWzxBUnwEcQVPevxFGhmk7Bi9d8y/MOYt0U= -github.com/openshift/etcd/server/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:G1mOzdwuzKT1VRL7SqRchli/qcFrtLBTAQ4lV20sXXo= +github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:Xty9QBBJwUlug0FGix5rwjOg7rIVnmfbsg4dEaBBFM4= +github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= +github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:XYHqfMdW35QxSQ6/BWJNF84YPEKRIxvvyAUgfhpo78k= +github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= +github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:Znju+Dw/y/JqFUeguSebbEygYif1ov5kk2cZoi1AHRw= +github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= +github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:WA1ti00Y3ubyLJ6FfpwnPmjF7p+49XtHf5aM5vfHO3w= +github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:wpZx8Egv1g4y+N7JAsqi2zoUiBIUWznLjqJbylDjWgU= +github.com/openshift/etcd/raft/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:BBShdR2ME8P+LhSDUnGBZBC4N+HiKhOgYv0YCL8tME0= +github.com/openshift/etcd/raft/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:fmcuY5R2SNkklU4+fKVBQi2biVp5vafMrWUEj4TJ4Cs= +github.com/openshift/etcd/server/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:+PyIwOJxcs0fBgdb42Spv+QvcQX6L3pnWfxK8doRRyo= +github.com/openshift/etcd/server/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:G1mOzdwuzKT1VRL7SqRchli/qcFrtLBTAQ4lV20sXXo= github.com/openshift/library-go v0.0.0-20250711143941-47604345e7ea h1:0BNis5UGo5Z7J9GtRY1nw/pt8hWxIZqvfqnqH3eV5cs= github.com/openshift/library-go v0.0.0-20250711143941-47604345e7ea/go.mod h1:tptKNust9MdRI0p90DoBSPHIrBa9oh+Rok59tF0vT8c= github.com/openshift/onsi-ginkgo/v2 v2.6.1-0.20250416174521-4eb003743b54 h1:ehXndVZfIk/fo18YJCMJ+6b8HL8tzqjP7yWgchMnfCc= diff --git a/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go b/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go index 1909e8cdba..de3b459118 100644 --- a/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go +++ b/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/pipeline.go @@ -165,7 +165,7 @@ func (p *pipeline) post(data []byte) (err error) { p.picker.unreachable(u) // errMemberRemoved is a critical error since a removed member should // always be stopped. So we use reportCriticalError to report it to errorc. - if err == errMemberRemoved || err == ErrClusterIDMismatch { + if err == errMemberRemoved { reportCriticalError(err, p.errorc) } return err diff --git a/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go b/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go index 0eddd266ba..b022c68fb0 100644 --- a/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go +++ b/etcd/vendor/go.etcd.io/etcd/server/v3/etcdserver/raft.go @@ -19,7 +19,6 @@ import ( "expvar" "fmt" "log" - "math/rand/v2" "sort" "sync" "time" @@ -579,7 +578,7 @@ func restartAsStandaloneNode(cfg config.ServerConfig, snapshot *raftpb.Snapshot, if snapshot != nil { walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term } - w, id, _, st, ents := readWAL(cfg.Logger, cfg.WALDir(), walsnap, cfg.UnsafeNoFsync) + w, id, cid, st, ents := readWAL(cfg.Logger, cfg.WALDir(), walsnap, cfg.UnsafeNoFsync) // discard the previously uncommitted entries for i, ent := range ents { @@ -605,12 +604,8 @@ func restartAsStandaloneNode(cfg config.ServerConfig, snapshot *raftpb.Snapshot, ) ents = append(ents, toAppEnts...) - cl := membership.NewCluster(cfg.Logger, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)) - cid := types.ID(rand.Uint64()) - cl.SetID(id, cid) - // force commit newly appended entries - err := w.SaveWithMetadata(raftpb.HardState{}, toAppEnts, &pb.Metadata{NodeID: uint64(id), ClusterID: uint64(cid)}) + err := w.Save(raftpb.HardState{}, toAppEnts) if err != nil { cfg.Logger.Fatal("failed to save hard state and entries", zap.Error(err)) } @@ -632,6 +627,8 @@ func restartAsStandaloneNode(cfg config.ServerConfig, snapshot *raftpb.Snapshot, zap.Uint64("commit-index", st.Commit), ) + cl := membership.NewCluster(cfg.Logger, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)) + cl.SetID(id, cid) s := raft.NewMemoryStorage() if snapshot != nil { s.ApplySnapshot(*snapshot) diff --git a/etcd/vendor/go.etcd.io/etcd/server/v3/wal/wal.go b/etcd/vendor/go.etcd.io/etcd/server/v3/wal/wal.go index f315176ce2..69011025fa 100644 --- a/etcd/vendor/go.etcd.io/etcd/server/v3/wal/wal.go +++ b/etcd/vendor/go.etcd.io/etcd/server/v3/wal/wal.go @@ -27,8 +27,6 @@ import ( "sync" "time" - "go.etcd.io/etcd/api/v3/etcdserverpb" - "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/pbutil" "go.etcd.io/etcd/raft/v3" @@ -44,7 +42,6 @@ const ( stateType crcType snapshotType - metadataModType // warnSyncDuration is the amount of time allotted to an fsync before // logging a warning @@ -65,7 +62,6 @@ var ( ErrSnapshotNotFound = errors.New("wal: snapshot not found") ErrSliceOutOfRange = errors.New("wal: slice bounds out of range") ErrDecoderNotFound = errors.New("wal: decoder not found") - ErrNoMetadata = errors.New("wal: no metadata found") crcTable = crc32.MakeTable(crc32.Castagnoli) ) @@ -475,18 +471,6 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb. } metadata = rec.Data - case metadataModType: - if metadata == nil { - state.Reset() - return nil, state, nil, ErrNoMetadata - } - - var meta, metaMod etcdserverpb.Metadata - pbutil.MustUnmarshal(&meta, metadata) - pbutil.MustUnmarshal(&metaMod, rec.Data) - meta.ClusterID = metaMod.ClusterID - metadata = pbutil.MustMarshal(&meta) - case crcType: crc := decoder.crc.Sum32() // current crc of decoder must match the crc of the record. @@ -945,25 +929,12 @@ func (w *WAL) saveState(s *raftpb.HardState) error { return w.encoder.encode(rec) } -func (w *WAL) SaveMetadata(metadata *etcdserverpb.Metadata) error { - b := pbutil.MustMarshal(metadata) - rec := &walpb.Record{Type: metadataModType, Data: b} - if err := w.encoder.encode(rec); err != nil { - return err - } - return nil -} - func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error { - return w.SaveWithMetadata(st, ents, nil) -} - -func (w *WAL) SaveWithMetadata(st raftpb.HardState, ents []raftpb.Entry, metadata *etcdserverpb.Metadata) error { w.mu.Lock() defer w.mu.Unlock() // short cut, do not call sync - if metadata == nil && raft.IsEmptyHardState(st) && len(ents) == 0 { + if raft.IsEmptyHardState(st) && len(ents) == 0 { return nil } @@ -975,13 +946,6 @@ func (w *WAL) SaveWithMetadata(st raftpb.HardState, ents []raftpb.Entry, metadat return err } } - - if metadata != nil { - if err := w.SaveMetadata(metadata); err != nil { - return err - } - } - if err := w.saveState(&st); err != nil { return err } diff --git a/etcd/vendor/modules.txt b/etcd/vendor/modules.txt index 770a4e7162..0de9a43eb0 100644 --- a/etcd/vendor/modules.txt +++ b/etcd/vendor/modules.txt @@ -283,7 +283,7 @@ github.com/xlab/treeprint # go.etcd.io/bbolt v1.3.11 ## explicit; go 1.22 go.etcd.io/bbolt -# go.etcd.io/etcd/api/v3 v3.5.21 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/api/v3 v3.5.21 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/api/v3/authpb go.etcd.io/etcd/api/v3/etcdserverpb @@ -292,7 +292,7 @@ go.etcd.io/etcd/api/v3/membershippb go.etcd.io/etcd/api/v3/mvccpb go.etcd.io/etcd/api/v3/v3rpc/rpctypes go.etcd.io/etcd/api/v3/version -# go.etcd.io/etcd/client/pkg/v3 v3.5.21 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/client/pkg/v3 v3.5.21 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/client/pkg/v3/fileutil go.etcd.io/etcd/client/pkg/v3/logutil @@ -305,14 +305,14 @@ go.etcd.io/etcd/client/pkg/v3/types # go.etcd.io/etcd/client/v2 v2.305.21 ## explicit; go 1.23.0 go.etcd.io/etcd/client/v2 -# go.etcd.io/etcd/client/v3 v3.5.21 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/client/v3 v3.5.21 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/client/v3 go.etcd.io/etcd/client/v3/concurrency go.etcd.io/etcd/client/v3/credentials go.etcd.io/etcd/client/v3/internal/endpoint go.etcd.io/etcd/client/v3/internal/resolver -# go.etcd.io/etcd/pkg/v3 v3.5.21 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/pkg/v3 v3.5.21 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/pkg/v3/adt go.etcd.io/etcd/pkg/v3/contention @@ -329,14 +329,14 @@ go.etcd.io/etcd/pkg/v3/runtime go.etcd.io/etcd/pkg/v3/schedule go.etcd.io/etcd/pkg/v3/traceutil go.etcd.io/etcd/pkg/v3/wait -# go.etcd.io/etcd/raft/v3 v3.5.21 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/raft/v3 v3.5.21 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/raft/v3 go.etcd.io/etcd/raft/v3/confchange go.etcd.io/etcd/raft/v3/quorum go.etcd.io/etcd/raft/v3/raftpb go.etcd.io/etcd/raft/v3/tracker -# go.etcd.io/etcd/server/v3 v3.5.21 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/server/v3 v3.5.21 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/server/v3/auth go.etcd.io/etcd/server/v3/config @@ -638,7 +638,7 @@ gopkg.in/natefinch/lumberjack.v2 # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 -# k8s.io/api v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/api +# k8s.io/api v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/api ## explicit; go 1.24.0 k8s.io/api/admission/v1 k8s.io/api/admission/v1beta1 @@ -700,7 +700,7 @@ k8s.io/api/storage/v1 k8s.io/api/storage/v1alpha1 k8s.io/api/storage/v1beta1 k8s.io/api/storagemigration/v1alpha1 -# k8s.io/apimachinery v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery +# k8s.io/apimachinery v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery ## explicit; go 1.24.0 k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors @@ -763,18 +763,18 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/netutil k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver +# k8s.io/apiserver v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver ## explicit; go 1.24.0 k8s.io/apiserver/pkg/apis/audit k8s.io/apiserver/pkg/apis/audit/v1 k8s.io/apiserver/pkg/authentication/user -# k8s.io/cli-runtime v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime +# k8s.io/cli-runtime v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime ## explicit; go 1.24.0 k8s.io/cli-runtime/pkg/genericclioptions k8s.io/cli-runtime/pkg/genericiooptions k8s.io/cli-runtime/pkg/printers k8s.io/cli-runtime/pkg/resource -# k8s.io/client-go v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go +# k8s.io/client-go v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go ## explicit; go 1.24.0 k8s.io/client-go/applyconfigurations/admissionregistration/v1 k8s.io/client-go/applyconfigurations/admissionregistration/v1alpha1 @@ -935,7 +935,7 @@ k8s.io/client-go/util/jsonpath k8s.io/client-go/util/keyutil k8s.io/client-go/util/watchlist k8s.io/client-go/util/workqueue -# k8s.io/component-base v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base +# k8s.io/component-base v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base ## explicit; go 1.24.0 k8s.io/component-base/cli k8s.io/component-base/cli/flag @@ -972,7 +972,7 @@ k8s.io/kube-openapi/pkg/spec3 k8s.io/kube-openapi/pkg/util/proto k8s.io/kube-openapi/pkg/util/proto/validation k8s.io/kube-openapi/pkg/validation/spec -# k8s.io/kubectl v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl +# k8s.io/kubectl v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl ## explicit; go 1.24.0 k8s.io/kubectl/pkg/cmd/util k8s.io/kubectl/pkg/scheme @@ -982,7 +982,7 @@ k8s.io/kubectl/pkg/util/openapi k8s.io/kubectl/pkg/util/templates k8s.io/kubectl/pkg/util/term k8s.io/kubectl/pkg/validation -# k8s.io/kubelet v1.33.2 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet +# k8s.io/kubelet v1.33.3 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet ## explicit; go 1.24.0 k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1 # k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 @@ -1095,12 +1095,12 @@ sigs.k8s.io/yaml/goyaml.v3 # github.com/openshift/microshift/pkg/config => ../pkg/config # github.com/openshift/microshift/pkg/util/cryptomaterial => ../pkg/util/cryptomaterial # github.com/onsi/ginkgo/v2 => github.com/openshift/onsi-ginkgo/v2 v2.6.1-0.20250416174521-4eb003743b54 -# go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/pkg/v3 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/raft/v3 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/server/v3 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/pkg/v3 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/raft/v3 => github.com/openshift/etcd/raft/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/server/v3 => github.com/openshift/etcd/server/v3 v3.5.1-0.20250829062802-9c065d4d842c # k8s.io/api => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/api # k8s.io/apiextensions-apiserver => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver # k8s.io/apimachinery => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery diff --git a/go.mod b/go.mod index c6e8790467..3f8f64460f 100644 --- a/go.mod +++ b/go.mod @@ -38,17 +38,17 @@ require ( github.com/prometheus/prometheus v0.302.1 github.com/squat/generic-device-plugin v0.0.0-20250710162141-0f7fddf166f1 gopkg.in/yaml.v2 v2.4.0 - k8s.io/api v1.33.2 - k8s.io/apiextensions-apiserver v1.33.2 - k8s.io/apimachinery v1.33.2 - k8s.io/apiserver v1.33.2 - k8s.io/cli-runtime v1.33.2 - k8s.io/client-go v1.33.2 - k8s.io/cloud-provider v1.33.2 - k8s.io/component-base v1.33.2 - k8s.io/kube-aggregator v1.33.2 - k8s.io/kubectl v1.33.2 - k8s.io/kubelet v1.33.2 + k8s.io/api v1.33.3 + k8s.io/apiextensions-apiserver v1.33.3 + k8s.io/apimachinery v1.33.3 + k8s.io/apiserver v1.33.3 + k8s.io/cli-runtime v1.33.3 + k8s.io/client-go v1.33.3 + k8s.io/cloud-provider v1.33.3 + k8s.io/component-base v1.33.3 + k8s.io/kube-aggregator v1.33.3 + k8s.io/kubectl v1.33.3 + k8s.io/kubelet v1.33.3 k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 sigs.k8s.io/kube-storage-version-migrator v0.0.6-0.20230721195810-5c8923c5ff96 sigs.k8s.io/kustomize/api v0.19.0 @@ -162,21 +162,21 @@ require ( gopkg.in/go-jose/go-jose.v2 v2.6.3 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - k8s.io/cluster-bootstrap v1.33.2 // indirect - k8s.io/component-helpers v1.33.2 // indirect - k8s.io/controller-manager v1.33.2 // indirect - k8s.io/cri-api v1.33.2 // indirect - k8s.io/cri-client v1.33.2 // indirect - k8s.io/csi-translation-lib v1.33.2 // indirect - k8s.io/dynamic-resource-allocation v1.33.2 // indirect - k8s.io/endpointslice v1.33.2 // indirect - k8s.io/externaljwt v1.33.2 // indirect - k8s.io/kms v1.33.2 // indirect - k8s.io/kube-controller-manager v1.33.2 // indirect - k8s.io/kube-scheduler v1.33.2 // indirect - k8s.io/metrics v1.33.2 // indirect - k8s.io/mount-utils v1.33.2 // indirect - k8s.io/pod-security-admission v1.33.2 // indirect + k8s.io/cluster-bootstrap v1.33.3 // indirect + k8s.io/component-helpers v1.33.3 // indirect + k8s.io/controller-manager v1.33.3 // indirect + k8s.io/cri-api v1.33.3 // indirect + k8s.io/cri-client v1.33.3 // indirect + k8s.io/csi-translation-lib v1.33.3 // indirect + k8s.io/dynamic-resource-allocation v1.33.3 // indirect + k8s.io/endpointslice v1.33.3 // indirect + k8s.io/externaljwt v1.33.3 // indirect + k8s.io/kms v1.33.3 // indirect + k8s.io/kube-controller-manager v1.33.3 // indirect + k8s.io/kube-scheduler v1.33.3 // indirect + k8s.io/metrics v1.33.3 // indirect + k8s.io/mount-utils v1.33.3 // indirect + k8s.io/pod-security-admission v1.33.3 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect @@ -263,7 +263,7 @@ replace ( ) replace ( - go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd - go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc // from etcd + go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd + go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c // from etcd ) diff --git a/go.sum b/go.sum index c0800abefb..95d065000d 100644 --- a/go.sum +++ b/go.sum @@ -263,12 +263,12 @@ github.com/openshift/client-go v0.0.0-20250710075018-396b36f983ee h1:tOtrrxfDEW8 github.com/openshift/client-go v0.0.0-20250710075018-396b36f983ee/go.mod h1:zhRiYyNMk89llof2qEuGPWPD+joQPhCRUc2IK0SB510= github.com/openshift/cluster-policy-controller v0.0.0-20250725081630-3e7538547c8f h1:ZlSflmSPh5wdgrmuG4n/UWWtGZQgggIqWG8+EXiFXpo= github.com/openshift/cluster-policy-controller v0.0.0-20250725081630-3e7538547c8f/go.mod h1:977DPepMAwnGNdJQdAzVSL+Qpem/aLS9d9sGnt1yEsg= -github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:VJkieEgHkLuR7FsXua509Zl/MiSJYoDoYtyeGfIXiBA= -github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= -github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:vQoJkeJhBLemRPTJwG9sMpOIRg7n/8JOZs5iY0pLQkk= -github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= -github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc h1:710Y8IQ8Y4uIF/UBvRWKqQ5QLllXhlD7rpsJ4DWQmoY= -github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= +github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:Xty9QBBJwUlug0FGix5rwjOg7rIVnmfbsg4dEaBBFM4= +github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= +github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:XYHqfMdW35QxSQ6/BWJNF84YPEKRIxvvyAUgfhpo78k= +github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= +github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c h1:Znju+Dw/y/JqFUeguSebbEygYif1ov5kk2cZoi1AHRw= +github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= github.com/openshift/kubernetes-kube-storage-version-migrator v0.0.3-0.20250729145742-0f8a4eb84ace h1:Y2Hi+vtF5uBy+UjQTV4w59HNCRKb4JYsJGMjXlpDgDw= github.com/openshift/kubernetes-kube-storage-version-migrator v0.0.3-0.20250729145742-0f8a4eb84ace/go.mod h1:o5cKv/pQ+exEYKq97WapNa5cxSPxuwBezHZHNW5RNRo= github.com/openshift/library-go v0.0.0-20250711143941-47604345e7ea h1:0BNis5UGo5Z7J9GtRY1nw/pt8hWxIZqvfqnqH3eV5cs= diff --git a/packaging/blueprint/blueprint.toml.template b/packaging/blueprint/blueprint.toml.template index 4c3815b8c4..47d7675e8c 100644 --- a/packaging/blueprint/blueprint.toml.template +++ b/packaging/blueprint/blueprint.toml.template @@ -9,27 +9,15 @@ name = "microshift" version = "${REPLACE_USHIFT_VERSION}" [[packages]] -name = "microshift-greenboot" -version = "${REPLACE_USHIFT_VERSION}" - -[[packages]] -name = "microshift-networking" -version = "${REPLACE_USHIFT_VERSION}" - -[[packages]] -name = "microshift-selinux" +name = "microshift-release-info" version = "${REPLACE_USHIFT_VERSION}" [customizations.services] enabled = ["microshift"] -[customizations.firewall] -ports = ["22:tcp", "80:tcp", "443:tcp", "5353:udp", "6443:tcp", "30000-32767:tcp", "30000-32767:udp"] - [customizations.firewall.services] -enabled = ["mdns", "ssh", "http", "https"] +enabled = ["ssh"] [[customizations.firewall.zones]] name = "trusted" sources = ["10.42.0.0/16", "169.254.169.1"] - diff --git a/packaging/crio.conf.d/10-microshift_amd64.conf b/packaging/crio.conf.d/10-microshift_amd64.conf index ea1fdfc212..da070edd52 100644 --- a/packaging/crio.conf.d/10-microshift_amd64.conf +++ b/packaging/crio.conf.d/10-microshift_amd64.conf @@ -2,6 +2,6 @@ # for community builds on top of OKD, this setting has no effect [crio.image] global_auth_file="/etc/crio/openshift-pull-secret" -pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:57287ba005a5a5ab513a645134676759f134be56e2108f39a5a3c51ecac71d98" +pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:d6a7df177c6efee91051d9b09f600751b7685855f1791be9a08f777921daaa9d" pause_image_auth_file = "/etc/crio/openshift-pull-secret" pause_command = "/usr/bin/pod" diff --git a/packaging/crio.conf.d/10-microshift_arm64.conf b/packaging/crio.conf.d/10-microshift_arm64.conf index 4130242435..6cf93ca4ee 100644 --- a/packaging/crio.conf.d/10-microshift_arm64.conf +++ b/packaging/crio.conf.d/10-microshift_arm64.conf @@ -2,6 +2,6 @@ # for community builds on top of OKD, this setting has no effect [crio.image] global_auth_file="/etc/crio/openshift-pull-secret" -pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca9bd83ae2bb56f52208d769ec111a84b92b0590425baeafe842cef440b44037" +pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e7c93974ccc75b84c699b7f1ead135226bfe0689689d3f0d1d7d621ed5b5a7ab" pause_image_auth_file = "/etc/crio/openshift-pull-secret" pause_command = "/usr/bin/pod" diff --git a/packaging/greenboot/microshift-running-check-ai-model-serving.sh b/packaging/greenboot/microshift-running-check-ai-model-serving.sh deleted file mode 100644 index 8d467e8d97..0000000000 --- a/packaging/greenboot/microshift-running-check-ai-model-serving.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -# -# AI Model Serving for MicroShift-specific functionality used in Greenboot health check procedures. -# -# If 'microshift-ai-model-serving' RPM is installed, health check needs to include resources -# from the 'redhat-ods-applications' namespace. -# -set -eu -o pipefail - -SCRIPT_NAME=$(basename "$0") - -# Source the MicroShift health check functions library -# shellcheck source=packaging/greenboot/functions.sh -source /usr/share/microshift/functions/greenboot.sh - -# Exit if the current user is not 'root' -if [ "$(id -u)" -ne 0 ] ; then - echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" - exit 1 -fi - -exit_if_fail_marker_exists - -echo "STARTED" - -# Print the boot variable status -print_boot_status - -# Set the wait timeout for the current check based on the boot counter -WAIT_TIMEOUT_SECS=$(get_wait_timeout) - -if ! microshift healthcheck \ - -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" \ - --timeout="${WAIT_TIMEOUT_SECS}s" \ - --namespace redhat-ods-applications \ - --deployments kserve-controller-manager; then - create_fail_marker_and_exit -fi diff --git a/packaging/greenboot/microshift-running-check-cert-manager.sh b/packaging/greenboot/microshift-running-check-cert-manager.sh deleted file mode 100755 index 11be3a14d8..0000000000 --- a/packaging/greenboot/microshift-running-check-cert-manager.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# -# MicroShift Cert-Manager-specific functionality used in Greenboot health check procedures. -# -# If 'microshift-cert-manager' RPM is installed, health check needs to include resources -# from the 'cert-manager' namespace. -# -set -eu -o pipefail - -SCRIPT_NAME=$(basename "$0") - -# Source the MicroShift health check functions library -# shellcheck source=packaging/greenboot/functions.sh -source /usr/share/microshift/functions/greenboot.sh - -# Exit if the current user is not 'root' -if [ "$(id -u)" -ne 0 ] ; then - echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" - exit 1 -fi - -exit_if_fail_marker_exists - -echo "STARTED" - -# Print the boot variable status -print_boot_status - -# Set the wait timeout for the current check based on the boot counter -WAIT_TIMEOUT_SECS=$(get_wait_timeout) - -if ! microshift healthcheck \ - -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" \ - --namespace cert-manager \ - --deployments cert-manager,cert-manager-webhook,cert-manager-cainjector; then - create_fail_marker_and_exit -fi diff --git a/packaging/greenboot/microshift-running-check-gateway-api.sh b/packaging/greenboot/microshift-running-check-gateway-api.sh deleted file mode 100644 index 16ac1682af..0000000000 --- a/packaging/greenboot/microshift-running-check-gateway-api.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# -# MicroShift Gateway API-specific functionality used in Greenboot health check procedures. -# -# If 'microshift-gateway-api' RPM is installed, health check needs to include resources -# from the 'openshift-gateway-api' namespace. -# -set -eu -o pipefail - -SCRIPT_NAME=$(basename "$0") - -# Source the MicroShift health check functions library -# shellcheck source=packaging/greenboot/functions.sh -source /usr/share/microshift/functions/greenboot.sh - -# Exit if the current user is not 'root' -if [ "$(id -u)" -ne 0 ] ; then - echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" - exit 1 -fi - -exit_if_fail_marker_exists - -echo "STARTED" - -# Print the boot variable status -print_boot_status - -# Set the wait timeout for the current check based on the boot counter -WAIT_TIMEOUT_SECS=$(get_wait_timeout) - -if ! microshift healthcheck \ - -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" \ - --namespace openshift-gateway-api \ - --deployments servicemesh-operator3,istiod-openshift-gateway-api; then - create_fail_marker_and_exit -fi diff --git a/packaging/greenboot/microshift-running-check-multus.sh b/packaging/greenboot/microshift-running-check-multus.sh deleted file mode 100755 index 01914faf1e..0000000000 --- a/packaging/greenboot/microshift-running-check-multus.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# -# MicroShift Multus-specific functionality used in Greenboot health check procedures. -# -# If 'microshift-multus' RPM is installed, health check needs to include resources -# from the 'openshift-multus' namespace. -# -set -eu -o pipefail - -SCRIPT_NAME=$(basename "$0") - -# Source the MicroShift health check functions library -# shellcheck source=packaging/greenboot/functions.sh -source /usr/share/microshift/functions/greenboot.sh - -# Exit if the current user is not 'root' -if [ "$(id -u)" -ne 0 ] ; then - echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" - exit 1 -fi - -exit_if_fail_marker_exists - -echo "STARTED" - -# Print the boot variable status -print_boot_status - -# Set the wait timeout for the current check based on the boot counter -WAIT_TIMEOUT_SECS=$(get_wait_timeout) - -if ! microshift healthcheck \ - -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" \ - --namespace openshift-multus \ - --daemonsets multus,dhcp-daemon; then - create_fail_marker_and_exit -fi diff --git a/packaging/greenboot/microshift-running-check-olm.sh b/packaging/greenboot/microshift-running-check-olm.sh deleted file mode 100755 index 8a10d360f8..0000000000 --- a/packaging/greenboot/microshift-running-check-olm.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# -# MicroShift OLM-specific functionality used in Greenboot health check procedures. -# -# If 'microshift-olm' RPM is installed, health check needs to include resources -# from the 'openshift-operator-lifecycle-manager' namespace. -# -set -eu -o pipefail - -SCRIPT_NAME=$(basename "$0") - -# Source the MicroShift health check functions library -# shellcheck source=packaging/greenboot/functions.sh -source /usr/share/microshift/functions/greenboot.sh - -# Exit if the current user is not 'root' -if [ "$(id -u)" -ne 0 ] ; then - echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" - exit 1 -fi - -exit_if_fail_marker_exists - -echo "STARTED" - -# Print the boot variable status -print_boot_status - -# Set the wait timeout for the current check based on the boot counter -WAIT_TIMEOUT_SECS=$(get_wait_timeout) - -if ! microshift healthcheck \ - -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" \ - --namespace openshift-operator-lifecycle-manager \ - --deployments olm-operator,catalog-operator; then - create_fail_marker_and_exit -fi diff --git a/packaging/rpm/microshift.spec b/packaging/rpm/microshift.spec index 213ec6d683..5937fbb19c 100644 --- a/packaging/rpm/microshift.spec +++ b/packaging/rpm/microshift.spec @@ -400,16 +400,6 @@ install -p -m644 packaging/systemd/microshift-ovs-init.service %{buildroot}%{_un install -p -m755 packaging/systemd/configure-ovs.sh %{buildroot}%{_bindir}/configure-ovs.sh install -p -m755 packaging/systemd/configure-ovs-microshift.sh %{buildroot}%{_bindir}/configure-ovs-microshift.sh -# Avoid firewalld manipulation and flushing of iptable rules, -# this is a workaround for https://issues.redhat.com/browse/NP-641 -# It will trigger some warnings on the selinux audit log when restarting firewalld. -# In the future firewalld should stop flushing iptables unless we use any firewalld rule with direct -# iptables rules, once that's available in RHEL we can remove this workaround -# see https://github.com/firewalld/firewalld/issues/863#issuecomment-1407059938 - -mkdir -p -m755 %{buildroot}%{_sysconfdir}/systemd/system/firewalld.service.d -install -p -m644 packaging/systemd/firewalld-no-iptables.conf %{buildroot}%{_sysconfdir}/systemd/system/firewalld.service.d/firewalld-no-iptables.conf - mkdir -p -m755 %{buildroot}/var/lib/kubelet/pods install -d %{buildroot}%{_datadir}/selinux/packages/%{selinuxtype} @@ -430,7 +420,6 @@ install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/001-microshi # Copy all the OLM manifests except the arch specific ones install -p -m644 assets/optional/operator-lifecycle-manager/0000* %{buildroot}/%{_prefix}/lib/microshift/manifests.d/001-microshift-olm install -p -m644 assets/optional/operator-lifecycle-manager/kustomization.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/001-microshift-olm -install -p -m755 packaging/greenboot/microshift-running-check-olm.sh %{buildroot}%{_sysconfdir}/greenboot/check/required.d/50_microshift_running_check_olm.sh %ifarch %{arm} aarch64 cat assets/optional/operator-lifecycle-manager/kustomization.aarch64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/001-microshift-olm/kustomization.yaml @@ -447,7 +436,6 @@ install -p -m644 assets/optional/operator-lifecycle-manager/release-olm-{x86_64, # multus install -d -m755 %{buildroot}%{_sysconfdir}/microshift/config.d install -p -m644 packaging/microshift/dropins/enable-multus.yaml %{buildroot}%{_sysconfdir}/microshift/config.d/00-enable-multus.yaml -install -p -m755 packaging/greenboot/microshift-running-check-multus.sh %{buildroot}%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_multus.sh install -p -m755 packaging/crio.conf.d/12-microshift-multus.conf %{buildroot}%{_sysconfdir}/crio/crio.conf.d/12-microshift-multus.conf # multus-release-info @@ -528,7 +516,6 @@ install -p -m755 packaging/tuned/microshift-tuned.py %{buildroot}%{_bindir}/micr install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api install -p -m644 assets/optional/gateway-api/0* %{buildroot}/%{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api install -p -m644 assets/optional/gateway-api/kustomization.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api -install -p -m755 packaging/greenboot/microshift-running-check-gateway-api.sh %{buildroot}%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_gateway_api.sh %ifarch %{arm} aarch64 cat assets/optional/gateway-api/kustomization.aarch64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api/kustomization.yaml @@ -581,7 +568,6 @@ install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/050-microshi install -p -m644 assets/optional/ai-model-serving/runtimes/*.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes rm -v %{buildroot}/%{_prefix}/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/kustomization.x86_64.yaml -install -p -m755 packaging/greenboot/microshift-running-check-ai-model-serving.sh %{buildroot}%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_ai_model_serving.sh cat assets/optional/ai-model-serving/runtimes/kustomization.x86_64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/kustomization.yaml %endif @@ -614,7 +600,6 @@ install -p -m644 assets/optional/cert-manager/manager/*.yaml %{buildroot}/%{_pre install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager/rbac install -p -m644 assets/optional/cert-manager/rbac/*.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager/rbac install -p -m644 assets/optional/cert-manager/kustomization.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager -install -p -m755 packaging/greenboot/microshift-running-check-cert-manager.sh %{buildroot}%{_sysconfdir}/greenboot/check/required.d/60_microshift_running_check_cert_manager.sh # cert-manager-release-info mkdir -p -m755 %{buildroot}%{_datadir}/microshift/release @@ -727,7 +712,6 @@ fi %{_sysconfdir}/crio/crio.conf.d/11-microshift-ovn.conf %{_sysconfdir}/systemd/system/ovs-vswitchd.service.d/microshift-cpuaffinity.conf %{_sysconfdir}/systemd/system/ovsdb-server.service.d/microshift-cpuaffinity.conf -%{_sysconfdir}/systemd/system/firewalld.service.d/firewalld-no-iptables.conf # OpensvSwitch oneshot configuration script which handles ovn-k8s gateway mode setup %{_unitdir}/microshift-ovs-init.service @@ -744,14 +728,12 @@ fi %files olm %dir %{_prefix}/lib/microshift/manifests.d/001-microshift-olm %{_prefix}/lib/microshift/manifests.d/001-microshift-olm/* -%{_sysconfdir}/greenboot/check/required.d/50_microshift_running_check_olm.sh %files olm-release-info %{_datadir}/microshift/release/release-olm-{x86_64,aarch64}.json %files multus %{_sysconfdir}/microshift/config.d/00-enable-multus.yaml -%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_multus.sh %{_sysconfdir}/crio/crio.conf.d/12-microshift-multus.conf %files multus-release-info @@ -790,7 +772,6 @@ fi %files gateway-api %dir %{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api %{_prefix}/lib/microshift/manifests.d/000-microshift-gateway-api/* -%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_gateway_api.sh %files gateway-api-release-info %{_datadir}/microshift/release/release-gateway-api-{x86_64,aarch64}.json @@ -802,7 +783,6 @@ fi %dir %{_prefix}/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes %{_prefix}/lib/microshift/manifests.d/010-microshift-ai-model-serving-kserve/* %{_prefix}/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/* -%{_sysconfdir}/greenboot/check/required.d/41_microshift_running_check_ai_model_serving.sh %endif %files ai-model-serving-release-info @@ -819,7 +799,6 @@ fi %files cert-manager %dir %{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager %{_prefix}/lib/microshift/manifests.d/060-microshift-cert-manager/* -%{_sysconfdir}/greenboot/check/required.d/60_microshift_running_check_cert_manager.sh %files cert-manager-release-info %{_datadir}/microshift/release/release-cert-manager-{x86_64,aarch64}.json @@ -828,6 +807,12 @@ fi # Use Git command to generate the log and replace the VERSION string # LANG=C git log --date="format:%a %b %d %Y" --pretty="tformat:* %cd %an <%ae> VERSION%n- %s%n" packaging/rpm/microshift.spec %changelog +* Mon Aug 11 2025 Patryk Matuszak 4.20.0 +- Remove healthcheck scripts: optional MicroShift workloads are now part of healthcheck command + +* Wed Aug 06 2025 Evgeny Slutsky 4.20.0 +- Remove firewalld service override configuration to avoid flushing of iptables + * Thu Jul 24 2025 Evgeny Slutsky 4.20.0 - Update microshift-cert-manager with greenboot script diff --git a/packaging/systemd/firewalld-no-iptables.conf b/packaging/systemd/firewalld-no-iptables.conf deleted file mode 100644 index 18be549f64..0000000000 --- a/packaging/systemd/firewalld-no-iptables.conf +++ /dev/null @@ -1,3 +0,0 @@ -# This override avoids firewalld flushing of iptables -[Service] -InaccessiblePaths=/usr/sbin/xtables-nft-multi diff --git a/pkg/cmd/healthcheck.go b/pkg/cmd/healthcheck.go index 20b188604d..bacfa9c46e 100644 --- a/pkg/cmd/healthcheck.go +++ b/pkg/cmd/healthcheck.go @@ -3,7 +3,6 @@ package cmd import ( "context" "fmt" - "os" "time" "github.com/openshift/microshift/pkg/healthcheck" @@ -40,10 +39,6 @@ Checking health of a custom workloads can be achieved in two ways: `, RunE: func(cmd *cobra.Command, args []string) error { - if os.Geteuid() > 0 { - return fmt.Errorf("command must be run with root privileges") - } - if namespace != "" && custom != "" { return fmt.Errorf("only --namespace or --custom can be provided") } diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index 2857c69eb4..f994c67254 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -210,6 +210,7 @@ func RunMicroshift(cfg *config.Config) error { util.Must(m.AddService(controllers.NewInfrastructureServices(cfg))) util.Must(m.AddService(controllers.NewClusterPolicyController(cfg))) util.Must(m.AddService(controllers.NewVersionManager(cfg))) + util.Must(m.AddService(controllers.NewKubeletCAManager(cfg))) util.Must(m.AddService(node.NewKubeletServer(cfg))) util.Must(m.AddService(loadbalancerservice.NewLoadbalancerServiceController(cfg))) util.Must(m.AddService(controllers.NewKubeStorageVersionMigrator(cfg))) diff --git a/pkg/controllers/kubelet-ca-manager.go b/pkg/controllers/kubelet-ca-manager.go new file mode 100644 index 0000000000..b3f522b781 --- /dev/null +++ b/pkg/controllers/kubelet-ca-manager.go @@ -0,0 +1,232 @@ +/* +Copyright © 2025 MicroShift Contributors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package controllers + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/openshift/microshift/pkg/assets" + "github.com/openshift/microshift/pkg/config" + "github.com/openshift/microshift/pkg/util/cryptomaterial" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" +) + +const ( + kubeletCAConfigMapName = "kubelet-client-ca" + kubeletCAConfigMapNamespace = "kube-system" + kubeletCAFileName = "ca.crt" + defaultInformerResyncPeriod = 10 * time.Minute +) + +type KubeletCAManager struct { + cfg *config.Config + client *kubernetes.Clientset + queue workqueue.TypedRateLimitingInterface[string] + informer cache.SharedIndexInformer + caCertData map[string]string +} + +func NewKubeletCAManager(cfg *config.Config) *KubeletCAManager { + return &KubeletCAManager{ + cfg: cfg, + } +} + +func (s *KubeletCAManager) Name() string { return "kubelet-ca-manager" } +func (s *KubeletCAManager) Dependencies() []string { + return []string{"kube-apiserver"} +} + +func (s *KubeletCAManager) restConfig() (*rest.Config, error) { + kubeConfigPath := s.cfg.KubeConfigPath(config.KubeAdmin) + return clientcmd.BuildConfigFromFlags("", kubeConfigPath) +} + +func (s *KubeletCAManager) loadCACertData() error { + certsDir := cryptomaterial.CertsDirectory(config.DataDir) + kubeletCAPath := cryptomaterial.KubeletClientCAPath(certsDir) + + caCertPEM, err := os.ReadFile(kubeletCAPath) + if err != nil { + return fmt.Errorf("failed to read kubelet client CA file %s: %v", kubeletCAPath, err) + } + + s.caCertData = map[string]string{ + kubeletCAFileName: string(caCertPEM), + } + return nil +} + +func (s *KubeletCAManager) ensureConfigMap(ctx context.Context) error { + var cm = "core/kubelet-client-ca.yaml" + kubeConfigPath := s.cfg.KubeConfigPath(config.KubeAdmin) + + if err := assets.ApplyConfigMapWithData(ctx, cm, s.caCertData, kubeConfigPath); err != nil { + return fmt.Errorf("failed to apply configMap %v: %v", cm, err) + } + return nil +} + +func (s *KubeletCAManager) processNextItem(ctx context.Context) bool { + key, quit := s.queue.Get() + if quit { + return false + } + defer s.queue.Done(key) + + err := s.syncConfigMap(ctx, key) + if err != nil { + s.queue.AddRateLimited(key) + klog.Errorf("failed to sync configmap %v: %v", key, err) + return true + } + + s.queue.Forget(key) + return true +} + +func (s *KubeletCAManager) syncConfigMap(ctx context.Context, key string) error { + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return fmt.Errorf("invalid resource key: %s", key) + } + + configMap, err := s.client.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("configmap %s/%s was deleted, recreating with default CA data", namespace, name) + return s.ensureConfigMap(ctx) + } + return fmt.Errorf("failed to get ConfigMap %s/%s: %v", namespace, name, err) + } + + if configMap.Data[kubeletCAFileName] != s.caCertData[kubeletCAFileName] { + klog.Infof("configmap %s/%s data has been tampered with, restoring default CA data", namespace, name) + return s.ensureConfigMap(ctx) + } + + return nil +} + +func (s *KubeletCAManager) runWorker(ctx context.Context) { + for s.processNextItem(ctx) { + } +} + +func (s *KubeletCAManager) Run(ctx context.Context, ready chan<- struct{}, stopped chan<- struct{}) error { + defer close(stopped) + + if err := s.loadCACertData(); err != nil { + klog.Errorf("failed to load CA certificate data: %v", err) + return err + } + + restCfg, err := s.restConfig() + if err != nil { + return fmt.Errorf("failed to create rest config for kubelet CA manager: %w", err) + } + s.client, err = kubernetes.NewForConfig(restCfg) + if err != nil { + return fmt.Errorf("failed to create clientset for kubelet CA manager: %w", err) + } + + if err := s.ensureConfigMap(ctx); err != nil { + klog.Errorf("failed to create initial ConfigMap: %v", err) + return err + } + klog.Infof("Successfully created initial kubelet client CA ConfigMap") + + stopCh := make(chan struct{}) + defer close(stopCh) + + factory := informers.NewSharedInformerFactoryWithOptions( + s.client, + defaultInformerResyncPeriod, + informers.WithNamespace(kubeletCAConfigMapNamespace), + ) + + configMapInformer := factory.Core().V1().ConfigMaps() + s.informer = configMapInformer.Informer() + s.queue = workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()) + + _, err = s.informer.AddEventHandler(cache.FilteringResourceEventHandler{ + FilterFunc: func(obj interface{}) bool { + switch cm := obj.(type) { + case *corev1.ConfigMap: + return cm.Name == kubeletCAConfigMapName && cm.Namespace == kubeletCAConfigMapNamespace + case cache.DeletedFinalStateUnknown: + if deletedCM, ok := cm.Obj.(*corev1.ConfigMap); ok { + return deletedCM.Name == kubeletCAConfigMapName && deletedCM.Namespace == kubeletCAConfigMapNamespace + } + } + return false + }, + Handler: cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err == nil { + s.queue.Add(key) + } + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err == nil { + s.queue.Add(key) + } + }, + DeleteFunc: func(obj interface{}) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + if err == nil { + s.queue.Add(key) + } + }, + }, + }) + if err != nil { + return fmt.Errorf("failed to initialize informer event handlers: %w", err) + } + + factory.Start(stopCh) + + if !cache.WaitForCacheSync(stopCh, s.informer.HasSynced) { + return fmt.Errorf("timed out waiting for caches to sync") + } + + go func() { + defer func() { + s.queue.ShutDown() + }() + s.runWorker(ctx) + }() + + close(ready) + + <-ctx.Done() + + return ctx.Err() +} diff --git a/pkg/healthcheck/debug_info.go b/pkg/healthcheck/debug_info.go index 90511a1b9b..cb995139a3 100644 --- a/pkg/healthcheck/debug_info.go +++ b/pkg/healthcheck/debug_info.go @@ -1,12 +1,18 @@ package healthcheck import ( + "context" + "fmt" "os" "path/filepath" "strings" "github.com/openshift/microshift/pkg/config" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/cli-runtime/pkg/genericclioptions" + coreclientv1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/util/homedir" "k8s.io/klog/v2" "k8s.io/kubectl/pkg/cmd/get" @@ -14,7 +20,16 @@ import ( "k8s.io/utils/ptr" ) -func logPodsAndEvents() { +func printPostFailureDebugInfo(ctx context.Context, coreClient *coreclientv1.CoreV1Client) { + output := strings.Builder{} + + unpulledOrFailedImages(ctx, coreClient, &output) + allPodsAndEvents(&output) + + klog.Infof("DEBUG INFORMATION\n%s", output.String()) +} + +func allPodsAndEvents(output *strings.Builder) { cliOptions := genericclioptions.NewConfigFlags(true).WithDeprecatedPasswordFlag() cliOptions.KubeConfig = ptr.To(filepath.Join(config.DataDir, "resources", string(config.KubeAdmin), "kubeconfig")) if homedir.HomeDir() == "" { @@ -27,8 +42,7 @@ func logPodsAndEvents() { matchVersionKubeConfigFlags := cmdutil.NewMatchVersionFlags(cliOptions) f := cmdutil.NewFactory(matchVersionKubeConfigFlags) - output := strings.Builder{} - ioStreams := genericclioptions.IOStreams{In: os.Stdin, Out: &output, ErrOut: &output} + ioStreams := genericclioptions.IOStreams{In: os.Stdin, Out: output, ErrOut: output} cmdGet := get.NewCmdGet("", f, ioStreams) opts := get.NewGetOptions("", ioStreams) @@ -49,12 +63,118 @@ func logPodsAndEvents() { klog.Errorf("Failed to run 'get pods': %v", err) return } + output.WriteString("\n") output.WriteString("\n---------- EVENTS:\n") opts.SortBy = ".metadata.creationTimestamp" if err := opts.Run(f, []string{"events"}); err != nil { klog.Errorf("Failed to run 'get events': %v", err) return } + output.WriteString("\n") +} - klog.Infof("DEBUG INFORMATION\n%s", output.String()) +// unpulledOrFailedImages prepares a debug log with information about images that are still being pulled or failed to be pulled. +func unpulledOrFailedImages(ctx context.Context, coreClient *coreclientv1.CoreV1Client, output *strings.Builder) { + // Get list of existing Pods to skip Events belonging to non-existing Pods to avoid false positives: + // If someone creates and deletes a lot of workloads, there might be "Pulling" events for each Pod without + // the corresponding "Pulled" event. + pods, err := coreClient.Pods("").List(ctx, v1.ListOptions{}) + if err != nil { + klog.Errorf("Failed to retrieve pods: %v", err) + return + } + existingPodsNames := sets.New[string]() + for _, pod := range pods.Items { + existingPodsNames.Insert(pod.Name) + } + + var pullingEvents, pulledEvents, failedEvents *corev1.EventList + if pullingEvents, err = coreClient.Events("").List(ctx, v1.ListOptions{FieldSelector: "reportingComponent=kubelet,reason=Pulling"}); err != nil { + klog.Errorf("Failed to retrieve Pulling events: %v", err) + return + } + if pulledEvents, err = coreClient.Events("").List(ctx, v1.ListOptions{FieldSelector: "reportingComponent=kubelet,reason=Pulled"}); err != nil { + klog.Errorf("Failed to retrieve Pulled events: %v", err) + return + } + if failedEvents, err = coreClient.Events("").List(ctx, v1.ListOptions{FieldSelector: "reportingComponent=kubelet,reason=Failed"}); err != nil { + klog.Errorf("Failed to retrieve Failed events: %v", err) + return + } + + unpulledImages, failedImages := analyzeEventsLookingForUnpulledOrFailedImages(existingPodsNames, pullingEvents, pulledEvents, failedEvents) + + if len(unpulledImages) > 0 { + output.WriteString("---------- IMAGES THAT ARE STILL BEING PULLED:\n") + for _, unpulledImage := range unpulledImages { + output.WriteString(fmt.Sprintf("- %q for Pod %q in namespace %q\n", unpulledImage.Image, unpulledImage.PodName, unpulledImage.Namespace)) + } + output.WriteString("\n") + } + + if len(failedImages) > 0 { + output.WriteString("---------- IMAGES THAT FAILED TO BE PULLED:\n") + for _, failedImage := range failedImages { + output.WriteString(fmt.Sprintf("- %q for Pod %q in namespace %q: %s\n", failedImage.Image, failedImage.PodName, failedImage.Namespace, failedImage.Message)) + } + output.WriteString("\n") + } +} + +type unpulledImage struct { + Namespace string + PodName string + Image string +} + +type failedImage struct { + unpulledImage + Message string +} + +// analyzeEventsLookingForUnpulledOrFailedImages goes through and tries to match +// image related Events to find images that are still being pulled +// and images that failed to be pulled. +func analyzeEventsLookingForUnpulledOrFailedImages(existingPodsNames sets.Set[string], pullingEvents, pulledEvents, failedEvents *corev1.EventList) ([]unpulledImage, []failedImage) { + getImageInfo := func(event corev1.Event) (string, string, string) { + pod := event.InvolvedObject.Name + ns := event.InvolvedObject.Namespace + img := strings.Split(event.Message, "\"")[1] + return ns, pod, img + } + + unpulledImages := sets.New[unpulledImage]() + + for _, event := range pullingEvents.Items { + ns, pod, img := getImageInfo(event) + if !existingPodsNames.Has(pod) { + continue + } + unpulledImages.Insert(unpulledImage{Namespace: ns, PodName: pod, Image: img}) + } + + for _, event := range pulledEvents.Items { + ns, pod, img := getImageInfo(event) + unpulledImages.Delete(unpulledImage{Namespace: ns, PodName: pod, Image: img}) + } + + failedImages := sets.New[failedImage]() + + for _, event := range failedEvents.Items { + if !strings.HasPrefix(event.Message, "Failed to pull image") { + continue + } + ns, pod, img := getImageInfo(event) + if !existingPodsNames.Has(pod) { + continue + } + unpulledImages.Delete(unpulledImage{Namespace: ns, PodName: pod, Image: img}) + + failedImages.Insert(failedImage{ + unpulledImage: unpulledImage{Namespace: ns, PodName: pod, Image: img}, + Message: event.Message, + }) + } + + return unpulledImages.UnsortedList(), failedImages.UnsortedList() } diff --git a/pkg/healthcheck/debug_info_test.go b/pkg/healthcheck/debug_info_test.go new file mode 100644 index 0000000000..a4a91064e8 --- /dev/null +++ b/pkg/healthcheck/debug_info_test.go @@ -0,0 +1,217 @@ +package healthcheck + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" +) + +func Test_analyzeEventsLookingForUnpulledOrFailedImages(t *testing.T) { + testCases := []struct { + name string + existingPodsNames sets.Set[string] + pullingEvents *corev1.EventList + pulledEvents *corev1.EventList + failedEvents *corev1.EventList + expectedUnpulled []unpulledImage + expectedFailed []failedImage + }{ + { + name: "no events", + existingPodsNames: sets.New[string](), + pullingEvents: &corev1.EventList{}, + pulledEvents: &corev1.EventList{}, + failedEvents: &corev1.EventList{}, + expectedUnpulled: []unpulledImage{}, + expectedFailed: []failedImage{}, + }, + { + name: "image still being pulled", + existingPodsNames: sets.New("test-pod"), + pullingEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "test-pod", + Namespace: "test-ns", + }, + Message: `Pulling image "nginx:latest"`, + }, + }, + }, + pulledEvents: &corev1.EventList{}, + failedEvents: &corev1.EventList{}, + expectedUnpulled: []unpulledImage{ + {Namespace: "test-ns", PodName: "test-pod", Image: "nginx:latest"}, + }, + expectedFailed: []failedImage{}, + }, + { + name: "image successfully pulled", + existingPodsNames: sets.New("test-pod"), + pullingEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "test-pod", + Namespace: "test-ns", + }, + Message: `Pulling image "nginx:latest"`, + }, + }, + }, + pulledEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "test-pod", + Namespace: "test-ns", + }, + Message: `Successfully pulled image "nginx:latest"`, + }, + }, + }, + failedEvents: &corev1.EventList{}, + expectedUnpulled: []unpulledImage{}, + expectedFailed: []failedImage{}, + }, + { + name: "image failed to pull", + existingPodsNames: sets.New("test-pod"), + pullingEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "test-pod", + Namespace: "test-ns", + }, + Message: `Pulling image "nginx:latest"`, + }, + }, + }, + pulledEvents: &corev1.EventList{}, + failedEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "test-pod", + Namespace: "test-ns", + }, + Message: `Failed to pull image "nginx:latest": error message`, + }, + }, + }, + expectedUnpulled: []unpulledImage{}, + expectedFailed: []failedImage{ + { + unpulledImage: unpulledImage{Namespace: "test-ns", PodName: "test-pod", Image: "nginx:latest"}, + Message: `Failed to pull image "nginx:latest": error message`, + }, + }, + }, + { + name: "skip events for non-existing pods", + existingPodsNames: sets.New("existing-pod"), + pullingEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "deleted-pod", + Namespace: "test-ns", + }, + Message: `Pulling image "nginx:latest"`, + }, + { + InvolvedObject: corev1.ObjectReference{ + Name: "existing-pod", + Namespace: "test-ns", + }, + Message: `Pulling image "redis:latest"`, + }, + }, + }, + pulledEvents: &corev1.EventList{}, + failedEvents: &corev1.EventList{}, + expectedUnpulled: []unpulledImage{ + {Namespace: "test-ns", PodName: "existing-pod", Image: "redis:latest"}, + }, + expectedFailed: []failedImage{}, + }, + { + name: "multiple images with mixed states", + existingPodsNames: sets.New("pod1", "pod2", "pod3"), + pullingEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "pod1", + Namespace: "ns1", + }, + Message: `Pulling image "nginx:latest"`, + }, + { + InvolvedObject: corev1.ObjectReference{ + Name: "pod2", + Namespace: "ns2", + }, + Message: `Pulling image "redis:latest"`, + }, + { + InvolvedObject: corev1.ObjectReference{ + Name: "pod3", + Namespace: "ns3", + }, + Message: `Pulling image "postgres:13"`, + }, + }, + }, + pulledEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "pod1", + Namespace: "ns1", + }, + Message: `Successfully pulled image "nginx:latest"`, + }, + }, + }, + failedEvents: &corev1.EventList{ + Items: []corev1.Event{ + { + InvolvedObject: corev1.ObjectReference{ + Name: "pod2", + Namespace: "ns2", + }, + Message: `Failed to pull image "redis:latest": connection timeout`, + }, + }, + }, + expectedUnpulled: []unpulledImage{ + {Namespace: "ns3", PodName: "pod3", Image: "postgres:13"}, + }, + expectedFailed: []failedImage{ + { + unpulledImage: unpulledImage{Namespace: "ns2", PodName: "pod2", Image: "redis:latest"}, + Message: `Failed to pull image "redis:latest": connection timeout`, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + unpulled, failed := analyzeEventsLookingForUnpulledOrFailedImages( + tc.existingPodsNames, + tc.pullingEvents, + tc.pulledEvents, + tc.failedEvents, + ) + + assert.ElementsMatch(t, tc.expectedUnpulled, unpulled) + assert.ElementsMatch(t, tc.expectedFailed, failed) + }) + } +} diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index 21e98165e2..bed4114588 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -21,6 +21,10 @@ func MicroShiftHealthcheck(ctx context.Context, timeout time.Duration) error { return err } + if err := fillOptionalMicroShiftWorkloads(workloads); err != nil { + return err + } + if err := waitForWorkloads(ctx, timeout, workloads); err != nil { return err } diff --git a/pkg/healthcheck/microshift_core_workloads.go b/pkg/healthcheck/microshift_core_workloads.go index 6c50d99c50..164ddcb3ac 100644 --- a/pkg/healthcheck/microshift_core_workloads.go +++ b/pkg/healthcheck/microshift_core_workloads.go @@ -119,6 +119,13 @@ func fillOptionalWorkloadsIfApplicable(cfg *config.Config, workloads map[string] Deployments: comps, } } + + if cfg.Network.Multus.IsEnabled() { + workloads["openshift-multus"] = NamespaceWorkloads{ + DaemonSets: []string{"multus", "dhcp-daemon"}, + } + } + return nil } diff --git a/pkg/healthcheck/microshift_optional_workloads.go b/pkg/healthcheck/microshift_optional_workloads.go new file mode 100644 index 0000000000..163aa31af6 --- /dev/null +++ b/pkg/healthcheck/microshift_optional_workloads.go @@ -0,0 +1,51 @@ +package healthcheck + +import ( + "github.com/openshift/microshift/pkg/util" + "k8s.io/klog/v2" +) + +type optionalWorkloads struct { + Namespace string + Workloads NamespaceWorkloads +} + +// optionalWorkloadPaths defines the mapping of manifest filepath to the namespace and workloads. +var optionalWorkloadPaths = map[string]optionalWorkloads{ + "/usr/lib/microshift/manifests.d/001-microshift-olm": { + Namespace: "openshift-operator-lifecycle-manager", + Workloads: NamespaceWorkloads{Deployments: []string{"olm-operator", "catalog-operator"}}, + }, + + "/usr/lib/microshift/manifests.d/000-microshift-gateway-api": { + Namespace: "openshift-gateway-api", + Workloads: NamespaceWorkloads{ + Deployments: []string{"servicemesh-operator3", "istiod-openshift-gateway-api"}, + }, + }, + + "/usr/lib/microshift/manifests.d/060-microshift-cert-manager": { + Namespace: "cert-manager", + Workloads: NamespaceWorkloads{Deployments: []string{"cert-manager", "cert-manager-webhook", "cert-manager-cainjector"}}, + }, + + "/usr/lib/microshift/manifests.d/010-microshift-ai-model-serving-kserve": { + Namespace: "redhat-ods-applications", + Workloads: NamespaceWorkloads{Deployments: []string{"kserve-controller-manager"}}, + }, +} + +// fillOptionalMicroShiftWorkloads assembles list of optional MicroShift workloads +// existing on the filesystem as manifests (in comparison to Multus which +// manifests are part of MicroShift binary). +func fillOptionalMicroShiftWorkloads(workloadsToCheck map[string]NamespaceWorkloads) error { + for path, ow := range optionalWorkloadPaths { + if exists, err := util.PathExists(path); err != nil { + return err + } else if exists { + klog.Infof("Optional component path exists: %s - expecting %v in namespace %q", path, ow.Workloads.String(), ow.Namespace) + workloadsToCheck[ow.Namespace] = ow.Workloads + } + } + return nil +} diff --git a/pkg/healthcheck/workloads.go b/pkg/healthcheck/workloads.go index 8dcc3e67c9..181bf88321 100644 --- a/pkg/healthcheck/workloads.go +++ b/pkg/healthcheck/workloads.go @@ -2,8 +2,12 @@ package healthcheck import ( "context" + "errors" "fmt" + "os" "path/filepath" + "strings" + "syscall" "time" "github.com/openshift/microshift/pkg/config" @@ -13,6 +17,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" appsclientv1 "k8s.io/client-go/kubernetes/typed/apps/v1" + coreclientv1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" @@ -24,90 +29,202 @@ type NamespaceWorkloads struct { StatefulSets []string `json:"statefulsets"` } +func (nw NamespaceWorkloads) String() string { + var parts []string + + if len(nw.Deployments) > 0 { + parts = append(parts, fmt.Sprintf("Deployments: [%s]", strings.Join(nw.Deployments, ", "))) + } + if len(nw.DaemonSets) > 0 { + parts = append(parts, fmt.Sprintf("DaemonSets: [%s]", strings.Join(nw.DaemonSets, ", "))) + } + if len(nw.StatefulSets) > 0 { + parts = append(parts, fmt.Sprintf("StatefulSets: [%s]", strings.Join(nw.StatefulSets, ", "))) + } + + if len(parts) == 0 { + return "" + } + + return strings.Join(parts, ", ") +} + +func getKubeconfigPath() string { + if os.Geteuid() == 0 { + return filepath.Join(config.DataDir, "resources", string(config.KubeAdmin), "kubeconfig") + } + + getKubeconfigFromEnv := func() string { + kubeconfigPath, ok := os.LookupEnv("KUBECONFIG") + if !ok { + return "" + } + if kubeconfigPath == "" { + klog.Warning("KUBECONFIG env var is defined but empty") + return "" + } + ok, err := util.PathExists(kubeconfigPath) + if err != nil { + klog.Errorf("Failed to verify access to file (%s) defined by KUBECONFIG env var: %v", kubeconfigPath, err) + return "" + } + if !ok { + klog.Errorf("File (%s) defined by KUBECONFIG env var does not exist", kubeconfigPath) + return "" + } + + return kubeconfigPath + } + + getKubeconfigFromDefaultPath := func() string { + defaultUserKubeconfig := fmt.Sprintf("%s/.kube/config", os.Getenv("HOME")) + ok, err := util.PathExists(defaultUserKubeconfig) + if err != nil { + klog.Errorf("Failed to verify access to ~/.kube/config: %v", err) + return "" + } + if !ok { + klog.Errorf("~/.kube/config does not exist") + return "" + } + return defaultUserKubeconfig + } + + if kubeconfigPath := getKubeconfigFromEnv(); kubeconfigPath != "" { + klog.Warningf("WARNING: Running healthcheck as non-root user, using KUBECONFIG environment variable: %s", kubeconfigPath) + return kubeconfigPath + } + + if kubeconfigPath := getKubeconfigFromDefaultPath(); kubeconfigPath != "" { + klog.Warningf("WARNING: Running healthcheck as non-root user, using ~/.kube/config") + return kubeconfigPath + } + + klog.Errorf("ERROR: Could not find suitable kubeconfig") + return "" +} + func waitForWorkloads(ctx context.Context, timeout time.Duration, workloads map[string]NamespaceWorkloads) error { - restConfig, err := clientcmd.BuildConfigFromFlags("", filepath.Join(config.DataDir, "resources", string(config.KubeAdmin), "kubeconfig")) + kubeconfigPath := getKubeconfigPath() + if kubeconfigPath == "" { + return fmt.Errorf("could not find existing kubeconfig file") + } + + restConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { - return fmt.Errorf("failed to create restConfig: %v", err) + return fmt.Errorf("failed to load kubeconfig from %s: %v", kubeconfigPath, err) } client, err := appsclientv1.NewForConfig(rest.AddUserAgent(restConfig, "healthcheck")) if err != nil { - return fmt.Errorf("failed to create client: %v", err) + return fmt.Errorf("unable to create Kubernetes client: %v", err) } + coreClient, err := coreclientv1.NewForConfig(rest.AddUserAgent(restConfig, "healthcheck")) + if err != nil { + return fmt.Errorf("unable to create Kubernetes core client: %v", err) + } + + interval := max(timeout/30, 1*time.Second) + klog.Infof("API Server will be queried every %v", interval) + aeg := &util.AllErrGroup{} for ns, wls := range workloads { for _, deploy := range wls.Deployments { - aeg.Go(func() error { return waitForDeployment(ctx, client, timeout, ns, deploy) }) + aeg.Go(func() error { return waitForDeployment(ctx, client, timeout, interval, ns, deploy) }) } for _, ds := range wls.DaemonSets { - aeg.Go(func() error { return waitForDaemonSet(ctx, client, timeout, ns, ds) }) + aeg.Go(func() error { return waitForDaemonSet(ctx, client, timeout, interval, ns, ds) }) } for _, sts := range wls.StatefulSets { - aeg.Go(func() error { return waitForStatefulSet(ctx, client, timeout, ns, sts) }) + aeg.Go(func() error { return waitForStatefulSet(ctx, client, timeout, interval, ns, sts) }) } } errs := aeg.Wait() if errs != nil { - logPodsAndEvents() + printPostFailureDebugInfo(ctx, coreClient) return errs } return nil } -func waitForDaemonSet(ctx context.Context, client *appsclientv1.AppsV1Client, timeout time.Duration, namespace, name string) error { +func waitForDaemonSet(ctx context.Context, client *appsclientv1.AppsV1Client, timeout, interval time.Duration, namespace, name string) error { klog.Infof("Waiting %v for daemonset/%s in %s", timeout, name, namespace) - err := wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (done bool, err error) { - ds, err := client.DaemonSets(namespace).Get(ctx, name, v1.GetOptions{}) + var lastHumanReadableErr error + err := wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (done bool, err error) { + getctx, cancel := context.WithTimeout(ctx, interval/2) + defer cancel() + + ds, err := client.DaemonSets(namespace).Get(getctx, name, v1.GetOptions{}) if err != nil { - if apierrors.IsNotFound(err) { - // Resources created by an operator might not exist yet. - // We allow for full timeout duration to be created and become ready. + // Always return 'false, nil' to keep retrying until timeout. + + if commonErr := commonGetErrors(err); commonErr != nil { + lastHumanReadableErr = commonErr return false, nil } - klog.Errorf("Error getting daemonset/%s in %q: %v", name, namespace, err) - // Ignore errors, give chance until timeout + if isDeadlineExceededError(err) { + return false, nil + } + + klog.Errorf("Unexpected error while getting daemonset %q in %q (ignoring): %v", name, namespace, err) return false, nil } - klog.V(3).Infof("Status of daemonset/%s in %s: %+v", name, namespace, ds.Status) + klog.V(3).Infof("Status of DaemonSet %q in %q: %+v", name, namespace, ds.Status) // Borrowed and adjusted from k8s.io/kubectl/pkg/polymorphichelpers/rollout_status.go if ds.Generation > ds.Status.ObservedGeneration { + lastHumanReadableErr = fmt.Errorf("daemonset is still being processed by the controller (generation %d > observed %d)", ds.Generation, ds.Status.ObservedGeneration) return false, nil } if ds.Status.UpdatedNumberScheduled < ds.Status.DesiredNumberScheduled { + lastHumanReadableErr = fmt.Errorf("only %d of %d nodes have the updated daemonset pods", ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled) return false, nil } if ds.Status.NumberAvailable < ds.Status.DesiredNumberScheduled { + lastHumanReadableErr = fmt.Errorf("only %d of %d daemonset pods are ready across all nodes", ds.Status.NumberAvailable, ds.Status.DesiredNumberScheduled) return false, nil } return true, nil }) if err != nil { - klog.Errorf("Failed waiting for daemonset/%s in %s: %v", name, namespace, err) + if isDeadlineExceededError(err) { + klog.Errorf("DaemonSet %q in %q namespace didn't become ready in %v: %v", name, namespace, timeout, lastHumanReadableErr) + return fmt.Errorf("daemonset '%s' in namespace '%s' failed to become ready within %v. Last status: %v", name, namespace, timeout, lastHumanReadableErr) + } + klog.Errorf("Failed waiting for DaemonSet %q in namespace %q: %v", name, namespace, err) return err } - klog.Infof("Daemonset/%s in %s is ready", name, namespace) + klog.Infof("DaemonSet %q in namespace %q is ready", name, namespace) return nil } -func waitForDeployment(ctx context.Context, client *appsclientv1.AppsV1Client, timeout time.Duration, namespace, name string) error { +func waitForDeployment(ctx context.Context, client *appsclientv1.AppsV1Client, timeout, interval time.Duration, namespace, name string) error { klog.Infof("Waiting %v for deployment/%s in %s", timeout, name, namespace) - err := wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (done bool, err error) { - deployment, err := client.Deployments(namespace).Get(ctx, name, v1.GetOptions{}) + var lastHumanReadableErr error + err := wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (done bool, err error) { + getctx, cancel := context.WithTimeout(ctx, interval/2) + defer cancel() + + deployment, err := client.Deployments(namespace).Get(getctx, name, v1.GetOptions{}) if err != nil { - if apierrors.IsNotFound(err) { - // Resources created by an operator might not exist yet. - // We allow for full timeout duration to be created and become ready. + // Always return 'false, nil' to keep retrying until timeout. + + if commonErr := commonGetErrors(err); commonErr != nil { + lastHumanReadableErr = commonErr + return false, nil + } + if isDeadlineExceededError(err) { return false, nil } - klog.Errorf("Error getting deployment/%s in %q: %v", name, namespace, err) - // Ignore errors, give chance until timeout + + klog.Errorf("Unexpected error while getting deployment %q in %q (ignoring): %v", name, namespace, err) return false, nil } - klog.V(3).Infof("Status of deployment/%s in %s: %+v", name, namespace, deployment.Status) + klog.V(3).Infof("Status of Deployment %q in %q: %+v", name, namespace, deployment.Status) // Borrowed and adjusted from k8s.io/kubectl/pkg/polymorphichelpers/rollout_status.go if deployment.Generation > deployment.Status.ObservedGeneration { + lastHumanReadableErr = fmt.Errorf("deployment is still being processed by the controller (generation %d > observed %d)", deployment.Generation, deployment.Status.ObservedGeneration) return false, nil } // 'rollout status' command would check the 'Progressing' condition and if the reason is 'ProgressDeadlineExceeded', @@ -117,64 +234,114 @@ func waitForDeployment(ctx context.Context, client *appsclientv1.AppsV1Client, t // - we want to give full timeout duration for the Deployment to become ready, no early exits. if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { + lastHumanReadableErr = fmt.Errorf("only %d of %d pods have been updated with the latest configuration", deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas) return false, nil } if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { + lastHumanReadableErr = fmt.Errorf("%d pods are still running the old configuration while %d are updated", deployment.Status.Replicas-deployment.Status.UpdatedReplicas, deployment.Status.UpdatedReplicas) return false, nil } if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { + lastHumanReadableErr = fmt.Errorf("only %d of %d updated pods are ready", deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas) return false, nil } return true, nil }) if err != nil { - klog.Errorf("Failed waiting for deployment/%s in %s: %v", name, namespace, err) + if isDeadlineExceededError(err) { + klog.Errorf("Deployment/%s in %s didn't become ready in %v: %v", name, namespace, timeout, lastHumanReadableErr) + return fmt.Errorf("deployment '%s' in namespace '%s' failed to become ready within %v. Last status: %v", name, namespace, timeout, lastHumanReadableErr) + } + klog.Errorf("Failed waiting for Deployment %q in namespace %q: %v", name, namespace, err) return err } - klog.Infof("Deployment/%s in %s is ready", name, namespace) + klog.Infof("Deployment %q in namespace %q is ready", name, namespace) return nil } -func waitForStatefulSet(ctx context.Context, client *appsclientv1.AppsV1Client, timeout time.Duration, namespace, name string) error { +func waitForStatefulSet(ctx context.Context, client *appsclientv1.AppsV1Client, timeout, interval time.Duration, namespace, name string) error { klog.Infof("Waiting %v for statefulset/%s in %s", timeout, name, namespace) - err := wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (done bool, err error) { - sts, err := client.StatefulSets(namespace).Get(ctx, name, v1.GetOptions{}) + var lastHumanReadableErr error + err := wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (done bool, err error) { + getctx, cancel := context.WithTimeout(ctx, interval/2) + defer cancel() + + sts, err := client.StatefulSets(namespace).Get(getctx, name, v1.GetOptions{}) if err != nil { - if apierrors.IsNotFound(err) { - // Resources created by an operator might not exist yet. - // We allow for full timeout duration to be created and become ready. + // Always return 'false, nil' to keep retrying until timeout. + + if commonErr := commonGetErrors(err); commonErr != nil { + lastHumanReadableErr = commonErr return false, nil } - klog.Errorf("Error getting statefulset/%s in %s: %v", name, namespace, err) - // Ignore errors, give chance until timeout + if isDeadlineExceededError(err) { + return false, nil + } + + klog.Errorf("Unexpected error while getting statefulset %q in %q (ignoring): %v", name, namespace, err) return false, nil } - klog.V(3).Infof("Status of statefulset/%s in %s: %+v", name, namespace, sts.Status) + klog.V(3).Infof("Status of StatefulSet %q in %q: %+v", name, namespace, sts.Status) // Borrowed and adjusted from k8s.io/kubectl/pkg/polymorphichelpers/rollout_status.go if sts.Status.ObservedGeneration == 0 || sts.Generation > sts.Status.ObservedGeneration { + lastHumanReadableErr = fmt.Errorf("statefulset is still being processed by the controller (generation %d > observed %d)", sts.Generation, sts.Status.ObservedGeneration) return false, nil } if sts.Spec.Replicas != nil && sts.Status.ReadyReplicas < *sts.Spec.Replicas { + lastHumanReadableErr = fmt.Errorf("only %d of %d replicas are ready", sts.Status.ReadyReplicas, *sts.Spec.Replicas) return false, nil } if sts.Spec.UpdateStrategy.Type == appsv1.RollingUpdateStatefulSetStrategyType && sts.Spec.UpdateStrategy.RollingUpdate != nil { if sts.Spec.Replicas != nil && sts.Spec.UpdateStrategy.RollingUpdate.Partition != nil { if sts.Status.UpdatedReplicas < (*sts.Spec.Replicas - *sts.Spec.UpdateStrategy.RollingUpdate.Partition) { + lastHumanReadableErr = fmt.Errorf("only %d of %d replicas have been updated (partition: %d)", sts.Status.UpdatedReplicas, *sts.Spec.Replicas, *sts.Spec.UpdateStrategy.RollingUpdate.Partition) return false, nil } } return true, nil } if sts.Status.UpdateRevision != sts.Status.CurrentRevision { + lastHumanReadableErr = fmt.Errorf("update revision (%s) differs from current revision (%s)", sts.Status.UpdateRevision, sts.Status.CurrentRevision) return false, nil } return true, nil }) if err != nil { - klog.Errorf("Failed waiting for statefulset/%s in %s: %v", name, namespace, err) + if isDeadlineExceededError(err) { + klog.Errorf("Statefulset/%s in %s didn't become ready in %v: %v", name, namespace, timeout, lastHumanReadableErr) + return fmt.Errorf("statefulset '%s' in namespace '%s' failed to become ready within %v. Last status: %v", name, namespace, timeout, lastHumanReadableErr) + } + klog.Errorf("Failed waiting for StatefulSet %q in namespace %q: %v", name, namespace, err) return err } - klog.Infof("StatefulSet/%s in %s is ready", name, namespace) + klog.Infof("StatefulSet %q in namespace %q is ready", name, namespace) + return nil +} + +func isDeadlineExceededError(err error) bool { + if strings.Contains(err.Error(), "would exceed context deadline") { + return true + } + + // 'client rate limiter Wait returned an error: context deadline exceeded' -> drop the wrapping errors + if errors.Is(err, context.DeadlineExceeded) { + return true + } + + return false +} + +func commonGetErrors(err error) error { + if apierrors.IsNotFound(err) { + // Resources created by an operator might not exist yet. + // We allow for full timeout duration to be created and become ready. + return fmt.Errorf("resource does not exist yet") + } + + if errors.Is(err, syscall.ECONNREFUSED) { + return fmt.Errorf("cannot connect to API server") + } + return nil } diff --git a/robocop.toml b/robocop.toml new file mode 100644 index 0000000000..254f1d660f --- /dev/null +++ b/robocop.toml @@ -0,0 +1,22 @@ +[tool.robocop] +language = [ + "en", +] + +[tool.robocop.lint] +ignore = [ + "misaligned-continuation-row", + "unused-variable", + "expression-can-be-simplified", + "no-suite-variable", + "no-global-variable", + "no-test-variable", + "variable-overwritten-before-usage" +] + +configure = [ + "too-long-test-case.max_len=40", + "too-many-calls-in-test-case.max_calls=20", + "line-too-long.line_length=200", + "file-too-long.max_lines=1000" +] diff --git a/scripts/advisory_publication/advisory_publication_report.py b/scripts/advisory_publication/advisory_publication_report.py index d4ed147430..cbf9974e0b 100644 --- a/scripts/advisory_publication/advisory_publication_report.py +++ b/scripts/advisory_publication/advisory_publication_report.py @@ -1,85 +1,224 @@ #!/usr/bin/env python3 +import json import os import sys -import jira.client +from urllib.parse import quote + import requests import urllib3 -import json -import jira import yaml -SERVER_URL = 'https://issues.redhat.com/' +import jira +import jira.client + +JIRA_URL = 'https://issues.redhat.com/' JIRA_API_TOKEN = os.environ.get('JIRA_API_TOKEN') +GITLAB_API_TOKEN = os.environ.get('GITLAB_API_TOKEN') +GITLAB_BASE_URL = 'https://gitlab.cee.redhat.com' +GITLAB_PROJECT_ID = 'hybrid-platforms/art/ocp-shipment-data' def usage(): + """Print usage information.""" print("""\ usage: advisory_publication_report.py OCP_VERSION arguments: - OCP_VERSION: The OCP versions to analyse if MicroShift version should be published. Format: "4.X.Z"\ + OCP_VERSION: The OCP versions to analyse if MicroShift version should be published. Format: "4.X.Z" + + environment variables: + JIRA_API_TOKEN: API token for Jira access + GITLAB_API_TOKEN: API token for GitLab access\ """) -def get_advisories(ocp_version: str) -> dict[str, int]: +def get_shipment_merge_request_url(ocp_version: str) -> str: """ - Get a list of advisory ids for a OCP version from github.com/openshift-eng/ocp-build-data repository - Parameters: - ocp_version (str): OCP version with format: "X.Y.Z" - Returns: - (dict): advisory dict with type and id + Get merge request URL from GitHub releases.yml file. + + Parameters: + ocp_version (str): OCP version with format: "X.Y.Z" + + Returns: + str: GitLab merge request URL """ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) try: microshift_xy_version = '.'.join(ocp_version.split('.')[:2]) - request = requests.get(f'https://raw.githubusercontent.com/openshift-eng/ocp-build-data/refs/heads/openshift-{microshift_xy_version}/releases.yml', verify=False) - request.raise_for_status() - except requests.exceptions.HTTPError as err: - raise SystemExit(err) - releases_dict = yaml.load(str(request.text), Loader=yaml.SafeLoader) + releases_url = ( + f'https://raw.githubusercontent.com/openshift-eng/ocp-build-data/' + f'refs/heads/openshift-{microshift_xy_version}/releases.yml' + ) + + response = requests.get(releases_url, verify=False) + response.raise_for_status() - if ocp_version in releases_dict['releases']: - return releases_dict['releases'][ocp_version]['assembly']['group']['advisories'] - else: - raise KeyError(f"{ocp_version} OCP version does NOT exist") + releases_dict = yaml.load(response.text, Loader=yaml.SafeLoader) + return releases_dict['releases'][ocp_version]['assembly']['group']['shipment']['url'] + except requests.exceptions.HTTPError as err: + raise SystemExit(f"Failed to fetch releases.yml: {err}") -def get_advisory_info(advisory_id: int) -> dict[str, str]: +def get_yaml_files_from_mr(mr_info: dict, headers: dict) -> dict: """ - Get a list of strings with the CVEs ids for an advisory - Parameters: - advisory_id (int): advisory id - Returns: - (list): list of strings with CVE ids + Get YAML files from a merge request. + + Parameters: + mr_info (dict): merge request information + headers (dict): GitLab API headers + + Returns: + dict: dictionary containing parsed YAML content """ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + encoded_project_id = quote(GITLAB_PROJECT_ID, safe='') + mr_iid = mr_info['iid'] + try: - request = requests.get(f'https://errata.devel.redhat.com/cve/show/{advisory_id}.json', verify=False) - request.raise_for_status() + # Get changes in the merge request + url = f'{GITLAB_BASE_URL}/api/v4/projects/{encoded_project_id}/merge_requests/{mr_iid}/changes' + response = requests.get(url, headers=headers, verify=False) + response.raise_for_status() + + changes = response.json() + yaml_content = {} + + # Process each changed file + for change in changes.get('changes', []): + file_path = change.get('new_path', change.get('old_path', 'unknown')) + + # Only process YAML files + if file_path.endswith(('.yml', '.yaml')): + file_url = f'{GITLAB_BASE_URL}/api/v4/projects/{encoded_project_id}/repository/files/{quote(file_path, safe="")}/raw' + + # Try target branch first, then source branch + file_response = requests.get(file_url, headers=headers, params={'ref': mr_info['target_branch']}, verify=False) + + if file_response.status_code != 200: + file_response = requests.get(file_url, headers=headers, params={'ref': mr_info['source_branch']}, verify=False) + + if file_response.status_code == 200: + try: + yaml_content[file_path] = yaml.load(file_response.text, Loader=yaml.SafeLoader) + except yaml.YAMLError as e: + print(f"Warning: Could not parse YAML file {file_path}: {e}") + + return yaml_content + except requests.exceptions.HTTPError as err: - raise SystemExit(err) - advisory_info = json.loads(request.text) + raise SystemExit(f"GitLab API error while fetching MR changes: {err}") - if advisory_info is None: - raise ValueError - if not isinstance(advisory_info, dict): - raise TypeError - return advisory_info + +def extract_cves_recursively(data): + """Recursively search for CVE patterns in the YAML data""" + cves_found = [] + + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(key, str) and key.startswith('CVE-'): + cves_found.append(key) + if isinstance(value, str) and value.startswith('CVE-'): + cves_found.append(value) + cves_found.extend(extract_cves_recursively(value)) + elif isinstance(data, list): + for item in data: + if isinstance(item, str) and item.startswith('CVE-'): + cves_found.append(item) + if isinstance(item, dict) and 'key' in item and isinstance(item['key'], str) and item['key'].startswith('CVE-'): + cves_found.append(item['key']) + cves_found.extend(extract_cves_recursively(item)) + return cves_found + + +def get_advisories(ocp_version: str) -> dict[str, str]: + """ + Get a list of advisory URLs for a OCP version from GitLab merge request YAML files. + + Parameters: + ocp_version (str): OCP version with format: "X.Y.Z" + + Returns: + dict: advisory dict with type and URL + """ + # Get MR URL from GitHub releases.yml + mr_url = get_shipment_merge_request_url(ocp_version) + + # Convert web URL to API URL + mr_iid = mr_url.split('/')[-1] + encoded_project_id = quote(GITLAB_PROJECT_ID, safe='') + api_url = f'{GITLAB_BASE_URL}/api/v4/projects/{encoded_project_id}/merge_requests/{mr_iid}' + + headers = {'PRIVATE-TOKEN': GITLAB_API_TOKEN} + + try: + response = requests.get(api_url, headers=headers, verify=False) + response.raise_for_status() + mr_info = response.json() + except requests.exceptions.HTTPError as err: + raise SystemExit(f"GitLab API error: {err}") + + # Get YAML files from the merge request + yaml_files = get_yaml_files_from_mr(mr_info, headers) + + # Search through all YAML files to find the advisory information + advisories_found = {} + + for file_path, yaml_content in yaml_files.items(): + # Skip the fbc file as requested + if 'fbc-openshift' in file_path or not yaml_content: + continue + + # Extract advisory URL using dict.get() for safer navigation + public_url = (yaml_content.get('shipment', {}) + .get('environments', {}) + .get('stage', {}) + .get('advisory', {}) + .get('url', '')) + + if public_url: + # Determine advisory type from filename and extract CVEs from YAML content + for advisory_type in ['image', 'extras', 'metadata', 'rpm']: + if advisory_type in file_path: + # Extract advisory name from public URL + advisory_name = public_url.split('/')[-1] if '/' in public_url else public_url + # Extract CVEs from the entire YAML content + cves = extract_cves_recursively(yaml_content) + advisories_found[advisory_type] = { + 'name': advisory_name, + 'cves': list(set(cves)) # Remove duplicates + } + break + + if not advisories_found: + raise KeyError(f"{ocp_version} OCP version advisory data not found in any YAML files from the merge request") + + # Check if RPM advisory type is missing and log warning + if 'rpm' not in advisories_found: + warning_msg = ( + f"\033[93mWARNING: RPMS are still being released via errata, please run " + f"\033[1m`sh advisory_publication_report.sh {ocp_version}`\033[0m\033[93m " + f"from 4.19 branch to see if there are any CVES that effects microshift\033[0m" + ) + print(warning_msg) + + return advisories_found def search_microshift_tickets(affects_version: str, cve_id: str) -> jira.client.ResultList: """ - Query Jira for MicroShift ticket with CVE id and MicroShift version - Parameters: - affects_version (str): MicroShift affected version with format: "X.Y" - cve_id (str): the CVE id with format: "CVE-YYYY-NNNNN" - Returns: - (jira.client.ResultList): a list with all the Jira tickets matching the query + Query Jira for MicroShift ticket with CVE id and MicroShift version. + + Parameters: + affects_version (str): MicroShift affected version with format: "X.Y" + cve_id (str): the CVE id with format: "CVE-YYYY-NNNNN" + + Returns: + jira.client.ResultList: a list with all the Jira tickets matching the query """ - server = jira.JIRA(server=SERVER_URL, token_auth=JIRA_API_TOKEN) + server = jira.JIRA(server=JIRA_URL, token_auth=JIRA_API_TOKEN) jira_tickets = server.search_issues(f''' summary ~ "{cve_id}" and component = MicroShift and (affectedVersion = {affects_version} or affectedVersion = {affects_version}.z) ''') @@ -91,46 +230,57 @@ def search_microshift_tickets(affects_version: str, cve_id: str) -> jira.client. def get_report(ocp_version: str) -> dict[str, dict]: """ - Get a json object with all the advisories, CVEs and jira tickets linked - Parameters: - ocp_version (str): OCP version with format: "X.Y.Z" - Returns: - (dict): json object with all the advisories, CVEs and jira tickets linked + Get a json object with all the advisories, CVEs and jira tickets linked. + + Parameters: + ocp_version (str): OCP version with format: "X.Y.Z" + + Returns: + dict: json object with all the advisories, CVEs and jira tickets linked """ - result_json = dict() + result_json = {} advisories = get_advisories(ocp_version) - for advisory_type, advisory_id in advisories.items(): - advisory_info = get_advisory_info(advisory_id) - cve_list = advisory_info['cve'] - advisory_dict = dict() - advisory_dict['type'] = advisory_type - advisory_dict['url'] = f'https://errata.devel.redhat.com/advisory/{advisory_id}' - advisory_dict['cves'] = dict() + for advisory_type, advisory_data in advisories.items(): + advisory_name = advisory_data['name'] + cve_list = advisory_data['cves'] + advisory_dict = { + 'type': advisory_type, + 'cves': {} + } + for cve in cve_list: jira_tickets = search_microshift_tickets(".".join(ocp_version.split(".")[:2]), cve) - advisory_dict['cves'][cve] = dict() - for ticket in jira_tickets: - jira_ticket_dict = dict() - jira_ticket_dict['id'] = ticket.key - jira_ticket_dict['summary'] = ticket.fields.summary - jira_ticket_dict['status'] = ticket.fields.status.name - jira_ticket_dict['resolution'] = str(ticket.fields.resolution) - advisory_dict['cves'][cve]['jira_ticket'] = jira_ticket_dict - result_json[advisory_info['advisory']] = advisory_dict + advisory_dict['cves'][cve] = {} + if jira_tickets: + for ticket in jira_tickets: + jira_ticket_dict = { + 'id': ticket.key, + 'summary': ticket.fields.summary, + 'status': ticket.fields.status.name, + 'resolution': str(ticket.fields.resolution) + } + advisory_dict['cves'][cve]['jira_ticket'] = jira_ticket_dict + result_json[advisory_name] = advisory_dict return result_json def main(): + """Main function to run the advisory publication report.""" if len(sys.argv) != 2: usage() raise ValueError('Invalid number of arguments') - if JIRA_API_TOKEN is None: - raise ValueError('JIRA_API_TOKEN var not found in the env') + if JIRA_API_TOKEN is None or GITLAB_API_TOKEN is None: + missing_tokens = [] + if JIRA_API_TOKEN is None: + missing_tokens.append('JIRA_API_TOKEN') + if GITLAB_API_TOKEN is None: + missing_tokens.append('GITLAB_API_TOKEN') + raise ValueError(f"Missing required environment variables: {', '.join(missing_tokens)}") ocp_version = str(sys.argv[1]) result_json = get_report(ocp_version) - print(f"{json.dumps(result_json, indent=4)}") + print(json.dumps(result_json, indent=4)) if __name__ == '__main__': diff --git a/scripts/auto-rebase/assets.yaml b/scripts/auto-rebase/assets.yaml index 260fe01156..9b3b347266 100644 --- a/scripts/auto-rebase/assets.yaml +++ b/scripts/auto-rebase/assets.yaml @@ -191,6 +191,8 @@ assets: ignore: "it's a local API service for security API group, needed if OpenShift API server is not present" - file: kubelet.yaml src: /machine-config-operator/templates/master/01-master-kubelet/_base/files/kubelet.yaml + - file: kubelet-client-ca.yaml + ignore: "it's a template for ConfigMap processed during runtime" - dir: crd/ src: release-manifests/ diff --git a/scripts/auto-rebase/changelog.txt b/scripts/auto-rebase/changelog.txt index e69de29bb2..ca1dad3993 100644 --- a/scripts/auto-rebase/changelog.txt +++ b/scripts/auto-rebase/changelog.txt @@ -0,0 +1,6 @@ +- machine-config-operator embedded-component cc2f713fa75afd493d1abc24cfa965a242ab07bc to 7a56cf0048a05d576e746c3e992ce586e07c97e8 + - e516665e 2025-08-28T17:36:33+02:00 OCPBUGS-61016: Fix wait failure on MCO pods + +- oc image-arm64 31597edbaedc6d8d206deda350cfc93e89fa24bf to a43428189603f12ae190703cb725f66f742fbe52 + - ea3cacdc 2025-08-29T13:21:45+02:00 must-gather: Fix usage checker for custom commands + diff --git a/scripts/auto-rebase/commits.txt b/scripts/auto-rebase/commits.txt index fd1eade4c7..76cc5b08eb 100644 --- a/scripts/auto-rebase/commits.txt +++ b/scripts/auto-rebase/commits.txt @@ -1,35 +1,35 @@ -https://github.com/openshift/api embedded-component 4a5da13889c1d8724e938521f739202ac7f304b0 -https://github.com/openshift/cluster-csi-snapshot-controller-operator embedded-component e3a691335bf8984295374737ffaaa79908723652 -https://github.com/openshift/cluster-dns-operator embedded-component 48ebc1269caad1e9ec7f422b24f3bccbe134d0c4 -https://github.com/openshift/cluster-ingress-operator embedded-component 5bfbb600998c9882e70546de8d2b846e61472d64 -https://github.com/openshift/cluster-kube-apiserver-operator embedded-component a08323ddd81bf18447df1d0f083501d86412819d -https://github.com/openshift/cluster-kube-controller-manager-operator embedded-component daadfa3c25be30d49168be7d30c10386f53e3ef6 +https://github.com/openshift/api embedded-component 4a165b214722ff53fbdac1d250f62d4b0bb2bd3d +https://github.com/openshift/cluster-csi-snapshot-controller-operator embedded-component 3880c9659829c9904e58a943050e8dfd4c9a79cb +https://github.com/openshift/cluster-dns-operator embedded-component 82578923164636746e0d74d594ab32d83a660c0b +https://github.com/openshift/cluster-ingress-operator embedded-component 2371120eedb27049160c70859ef9972a7599c7e3 +https://github.com/openshift/cluster-kube-apiserver-operator embedded-component 0bec046c8c10682390e2a20ae4f416a2d8589f40 +https://github.com/openshift/cluster-kube-controller-manager-operator embedded-component 7030e7353408e83096a285404700c573273c25c8 https://github.com/openshift/cluster-kube-scheduler-operator embedded-component f9b5b31943f2be768af6373def4c8946a30d0ed6 -https://github.com/openshift/cluster-network-operator embedded-component c19d5a6399e4aad7f627a7fa569caabe46ce94ac +https://github.com/openshift/cluster-network-operator embedded-component 746b8fb86171dfc75011a7b8db29de5c2355d848 https://github.com/openshift/cluster-openshift-controller-manager-operator embedded-component dfaeb8d5fdf1f43df37916ec3d70f57ddebb1028 https://github.com/openshift/cluster-policy-controller embedded-component 3e7538547c8f209c72083097a4ebaada6e9c46c5 https://github.com/openshift/csi-external-snapshotter embedded-component 42604822c25eb2aa0ecb017c24e328221b238eb9 -https://github.com/openshift/etcd embedded-component b5ad268120ccd50185b9ec00ff1c4e5aca379f84 -https://github.com/openshift/kubernetes embedded-component 80b82ba5ae030c3e2b7d7752a204ddf71139842c +https://github.com/openshift/etcd embedded-component 9c065d4d842c8de57806426c13201413b4d595e2 +https://github.com/openshift/kubernetes embedded-component a237dcbeee8bbe1440c94349399cc6eac8870910 https://github.com/openshift/kubernetes-kube-storage-version-migrator embedded-component 0f8a4eb84ace9b746cf25d51d80dacf34ca5f953 -https://github.com/openshift/machine-config-operator embedded-component 3a09b61ed8cee97153f865970f93c39e0ec7d85f -https://github.com/openshift/openshift-controller-manager embedded-component bd60afcd882e5be63825fb201cb55a5a48daceaa -https://github.com/openshift/operator-framework-olm embedded-component 45b91529be7117e7445c6e411e5f86ac8f8feb29 +https://github.com/openshift/machine-config-operator embedded-component 7a56cf0048a05d576e746c3e992ce586e07c97e8 +https://github.com/openshift/openshift-controller-manager embedded-component 49aaf599fb9024f36359f55bac6b3bb1482e24b8 +https://github.com/openshift/operator-framework-olm embedded-component e53cf3090214284bf913bd2d7165a0a9422635a0 https://github.com/openshift/route-controller-manager embedded-component bf2fa662f57f233d8541f94c4953e0dcd7a5ab20 -https://github.com/openshift/service-ca-operator embedded-component 4dfa6916f984d0fd7188380edc88b250738f07f7 -https://github.com/openshift/oc image-amd64 ea45cd5d407a1fc9805d051f3946e8a8fc1ca4da -https://github.com/openshift/coredns image-amd64 a69279e3393b269099dcaae967570741d4c5e469 +https://github.com/openshift/service-ca-operator embedded-component b0fe556a981964728306f803864af9d2881db967 +https://github.com/openshift/oc image-amd64 31597edbaedc6d8d206deda350cfc93e89fa24bf +https://github.com/openshift/coredns image-amd64 6f39336e6da9dc77b2db140a96773c413d50a665 https://github.com/openshift/csi-external-snapshotter image-amd64 42604822c25eb2aa0ecb017c24e328221b238eb9 -https://github.com/openshift/router image-amd64 6723dce855196c4554c9bee5581f4fbdf479eb80 -https://github.com/openshift/kube-rbac-proxy image-amd64 bc1ca29e2b6b50f15ece8db3990277276cacff0c -https://github.com/openshift/ovn-kubernetes image-amd64 1fde61ad03a950ba2236c29e9d00cb6166c9fe9b -https://github.com/openshift/kubernetes image-amd64 80b82ba5ae030c3e2b7d7752a204ddf71139842c -https://github.com/openshift/service-ca-operator image-amd64 4dfa6916f984d0fd7188380edc88b250738f07f7 -https://github.com/openshift/oc image-arm64 1073b975a85824a68503e7ecb933f7c48edbc393 -https://github.com/openshift/coredns image-arm64 a69279e3393b269099dcaae967570741d4c5e469 +https://github.com/openshift/router image-amd64 7c50294cfbc9a7c0d3c715de56fb92dadeefb8e6 +https://github.com/openshift/kube-rbac-proxy image-amd64 b9134351be37c43408334047d8eb85d0ac01fe4e +https://github.com/openshift/ovn-kubernetes image-amd64 9741174157381d7b87e2d21559cbe06b8f2b5ebf +https://github.com/openshift/kubernetes image-amd64 a237dcbeee8bbe1440c94349399cc6eac8870910 +https://github.com/openshift/service-ca-operator image-amd64 b0fe556a981964728306f803864af9d2881db967 +https://github.com/openshift/oc image-arm64 a43428189603f12ae190703cb725f66f742fbe52 +https://github.com/openshift/coredns image-arm64 6f39336e6da9dc77b2db140a96773c413d50a665 https://github.com/openshift/csi-external-snapshotter image-arm64 42604822c25eb2aa0ecb017c24e328221b238eb9 -https://github.com/openshift/router image-arm64 6723dce855196c4554c9bee5581f4fbdf479eb80 -https://github.com/openshift/kube-rbac-proxy image-arm64 bc1ca29e2b6b50f15ece8db3990277276cacff0c -https://github.com/openshift/ovn-kubernetes image-arm64 1fde61ad03a950ba2236c29e9d00cb6166c9fe9b -https://github.com/openshift/kubernetes image-arm64 9c2642ea7167e203db9f473422f4f9aa4501a51f -https://github.com/openshift/service-ca-operator image-arm64 a6346e9d7cfd9e7243742ede7c795b5ac93498a4 +https://github.com/openshift/router image-arm64 7c50294cfbc9a7c0d3c715de56fb92dadeefb8e6 +https://github.com/openshift/kube-rbac-proxy image-arm64 b9134351be37c43408334047d8eb85d0ac01fe4e +https://github.com/openshift/ovn-kubernetes image-arm64 9741174157381d7b87e2d21559cbe06b8f2b5ebf +https://github.com/openshift/kubernetes image-arm64 a237dcbeee8bbe1440c94349399cc6eac8870910 +https://github.com/openshift/service-ca-operator image-arm64 b0fe556a981964728306f803864af9d2881db967 diff --git a/scripts/auto-rebase/last_rebase.sh b/scripts/auto-rebase/last_rebase.sh index ecc7add741..6c2f8d82d4 100755 --- a/scripts/auto-rebase/last_rebase.sh +++ b/scripts/auto-rebase/last_rebase.sh @@ -1,2 +1,2 @@ #!/bin/bash -x -./scripts/auto-rebase/rebase.sh to "registry.ci.openshift.org/ocp/release:4.20.0-0.nightly-2025-07-31-063120" "registry.ci.openshift.org/ocp-arm64/release-arm64:4.20.0-0.nightly-arm64-2025-08-05-094808" +./scripts/auto-rebase/rebase.sh to "registry.ci.openshift.org/ocp/release:4.20.0-0.nightly-2025-09-01-101753" "registry.ci.openshift.org/ocp-arm64/release-arm64:4.20.0-0.nightly-arm64-2025-09-01-210443" diff --git a/scripts/auto-rebase/manifests_patches/011-ingress-deployment-access-logging.patch b/scripts/auto-rebase/manifests_patches/011-ingress-deployment-access-logging.patch index 8bb6df79c5..b9517e5751 100644 --- a/scripts/auto-rebase/manifests_patches/011-ingress-deployment-access-logging.patch +++ b/scripts/auto-rebase/manifests_patches/011-ingress-deployment-access-logging.patch @@ -78,7 +78,7 @@ index 916e4b601..9261e3700 100644 args: - -v=4 + {{- if and .AccessLoggingEnabled (not .AccessLoggingSyslogAddress) }} -+ - name: access-logs ++ - name: logs + imagePullPolicy: IfNotPresent + terminationMessagePolicy: FallbackToLogsOnError + image: '{{ .ReleaseImage.haproxy_router }}' diff --git a/scripts/auto-rebase/rebase_patches/0004-remove-config-informer-and-cpu-partitioning-admission-plugin.patch b/scripts/auto-rebase/rebase_patches/0004-remove-config-informer-and-cpu-partitioning-admission-plugin.patch index 92de96fce4..b780dd94b9 100644 --- a/scripts/auto-rebase/rebase_patches/0004-remove-config-informer-and-cpu-partitioning-admission-plugin.patch +++ b/scripts/auto-rebase/rebase_patches/0004-remove-config-informer-and-cpu-partitioning-admission-plugin.patch @@ -70,7 +70,7 @@ index 545e1a04..39b266a0 100644 - i.OpenshiftConfigInformers.Start(stopCh) } diff --git a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/admissionenablement/register.go b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/admissionenablement/register.go -index 2427e502d..a75cdfdde 100644 +index eb822bd99..e92ab9e9f 100644 --- a/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/admissionenablement/register.go +++ b/deps/github.com/openshift/kubernetes/openshift-kube-apiserver/admission/admissionenablement/register.go @@ -13,7 +13,6 @@ import ( @@ -81,7 +81,15 @@ index 2427e502d..a75cdfdde 100644 "k8s.io/kubernetes/openshift-kube-apiserver/admission/autoscaling/managementcpusoverride" quotarunonceduration "k8s.io/kubernetes/openshift-kube-apiserver/admission/autoscaling/runonceduration" "k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/customresourcevalidationregistration" -@@ -31,7 +31,6 @@ func RegisterOpenshiftKubeAdmissionPlugins(plugins *admission.Plugins) { +@@ -24,7 +23,6 @@ import ( + projectnodeenv "k8s.io/kubernetes/openshift-kube-apiserver/admission/scheduler/nodeenv" + schedulerpodnodeconstraints "k8s.io/kubernetes/openshift-kube-apiserver/admission/scheduler/podnodeconstraints" + "k8s.io/kubernetes/openshift-kube-apiserver/admission/storage/csiinlinevolumesecurity" +- "k8s.io/kubernetes/openshift-kube-apiserver/admission/storage/performantsecuritypolicy" + ) + + func RegisterOpenshiftKubeAdmissionPlugins(plugins *admission.Plugins) { +@@ -33,7 +31,6 @@ func RegisterOpenshiftKubeAdmissionPlugins(plugins *admission.Plugins) { imagepolicy.Register(plugins) ingressadmission.Register(plugins) managementcpusoverride.Register(plugins) @@ -89,11 +97,26 @@ index 2427e502d..a75cdfdde 100644 mixedcpus.Register(plugins) projectnodeenv.Register(plugins) quotaclusterresourceoverride.Register(plugins) -@@ -74,7 +73,6 @@ var ( +@@ -45,7 +42,6 @@ func RegisterOpenshiftKubeAdmissionPlugins(plugins *admission.Plugins) { + externalipranger.RegisterExternalIP(plugins) + restrictedendpoints.RegisterRestrictedEndpoints(plugins) + csiinlinevolumesecurity.Register(plugins) +- performantsecuritypolicy.Register(plugins) + } + + var ( +@@ -75,11 +71,9 @@ var ( + "security.openshift.io/SecurityContextConstraint", + "security.openshift.io/SCCExecRestrictions", "route.openshift.io/IngressAdmission", - hostassignment.PluginName, // "route.openshift.io/RouteHostAssignment" - csiinlinevolumesecurity.PluginName, // "storage.openshift.io/CSIInlineVolumeSecurity" -- managednode.PluginName, // "autoscaling.openshift.io/ManagedNode" - mixedcpus.PluginName, // "autoscaling.openshift.io/MixedCPUs" +- hostassignment.PluginName, // "route.openshift.io/RouteHostAssignment" +- csiinlinevolumesecurity.PluginName, // "storage.openshift.io/CSIInlineVolumeSecurity" +- managednode.PluginName, // "autoscaling.openshift.io/ManagedNode" +- mixedcpus.PluginName, // "autoscaling.openshift.io/MixedCPUs" +- performantsecuritypolicy.PluginName, // "storage.openshift.io/PerformantSecurityPolicy" ++ hostassignment.PluginName, // "route.openshift.io/RouteHostAssignment" ++ csiinlinevolumesecurity.PluginName, // "storage.openshift.io/CSIInlineVolumeSecurity" ++ mixedcpus.PluginName, // "autoscaling.openshift.io/MixedCPUs" } + // openshiftAdmissionPluginsForKubeAfterResourceQuota are the plugins to add after ResourceQuota plugin diff --git a/scripts/ci-ai-model-serving/tests/08-test-caikit-tgis.sh b/scripts/ci-ai-model-serving/tests/08-test-caikit-tgis.sh new file mode 100755 index 0000000000..1174e96fe2 --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/08-test-caikit-tgis.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +set -xeuo pipefail + +SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +function pull_image() { + local -r img="${1}" + for i in 1 2 3; do + GOMAXPROCS=8 sudo crictl pull "${img}" && break + if [ "${i}" -eq 3 ]; then + echo "ERROR: Failed to pull ${img} image after 3 attempts" + exit 1 + fi + echo "Attempt ${i} failed. Retrying in 5 seconds..." && sleep 5 + done +} + +NS=test-caikit-tgis +MINIO_NS=minio + +oc create ns "${NS}" + +CAIKIT_TGIS_IMAGE="$(jq -r '.images | with_entries(select(.key == "caikit-tgis-image")) | .[]' /usr/share/microshift/release/release-ai-model-serving-"$(uname -m)".json)" +TGIS_IMAGE="$(jq -r '.images | with_entries(select(.key == "tgis-image")) | .[]' /usr/share/microshift/release/release-ai-model-serving-"$(uname -m)".json)" +pull_image "${CAIKIT_TGIS_IMAGE}" +pull_image "${TGIS_IMAGE}" +pull_image quay.io/opendatahub/modelmesh-minio-examples:caikit-flan-t5 + +cp /usr/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/caikit-tgis.yaml /tmp/caikit-tgis.yaml +sed -i "s,image: caikit-tgis-image,image: ${CAIKIT_TGIS_IMAGE}," /tmp/caikit-tgis.yaml +sed -i "s,image: tgis-image,image: ${TGIS_IMAGE}," /tmp/caikit-tgis.yaml +oc apply -n "${NS}" -f /tmp/caikit-tgis.yaml + +# +# Following instructions are based on https://github.com/opendatahub-io/caikit-tgis-serving/blob/main/demo/kserve/deploy-remove.md +# + +# Deploy Minio (self-hostable S3-compatible alternative) with preloaded flan-t5-small +oc create ns "${MINIO_NS}" +oc apply -n "${MINIO_NS}" -f "${SCRIPTDIR}/caikit-tgis/010-minio.yaml" + +# Create ServiceAccount and Secret for the InferenceService to use Minio as S3 backend. +oc apply -n "${NS}" -f "${SCRIPTDIR}/caikit-tgis/011-minio-connection-secret.yaml" +oc apply -n "${NS}" -f "${SCRIPTDIR}/caikit-tgis/012-minio-sa.yaml" + +# Create InferenceService using model in S3 (minio) +oc apply -n "${NS}" -f "${SCRIPTDIR}/caikit-tgis/020-inference-svc.yaml" + +# Create Route for the InferenceService +oc apply -n "${NS}" -f "${SCRIPTDIR}/caikit-tgis/021-route.yaml" + +sudo microshift healthcheck \ + -v 2 \ + --timeout 10m0s \ + --namespace "${NS}" \ + --deployments flan-t5-predictor + +resp=$(curl -X POST \ + flan-t5-predictor.apps.example.com/api/v1/task/text-generation \ + --connect-to "flan-t5-predictor.apps.example.com::$(hostname -i):" \ + -H "Content-Type: application/json" \ + --data '{"model_id": "flan-t5-small-caikit", "inputs": "At what temperature does Nitrogen boil?"}') + +echo "${resp}" | jq + +# The answer is technically wrong (boiling point of nitrogen is -320.4°F), but at least we got a response. +# We don't test the model itself, just the integration. +res=0 +if ! echo "${resp}" | jq -r '.generated_text' | grep -q "74 degrees F"; then + echo "Unexpected answer" + res=1 +fi + +oc delete ns "${NS}" ; oc delete ns "${MINIO_NS}" + +exit "${res}" diff --git a/scripts/ci-ai-model-serving/tests/caikit-tgis/010-minio.yaml b/scripts/ci-ai-model-serving/tests/caikit-tgis/010-minio.yaml new file mode 100644 index 0000000000..5459f7eaa1 --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/caikit-tgis/010-minio.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio +spec: + ports: + - name: minio-client-port + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: minio +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: minio + name: minio +spec: + containers: + - args: + - server + - /data1 + env: + - name: MINIO_ACCESS_KEY + value: admin + - name: MINIO_SECRET_KEY + value: password + image: quay.io/opendatahub/modelmesh-minio-examples:caikit-flan-t5 + imagePullPolicy: IfNotPresent + name: minio + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" diff --git a/scripts/ci-ai-model-serving/tests/caikit-tgis/011-minio-connection-secret.yaml b/scripts/ci-ai-model-serving/tests/caikit-tgis/011-minio-connection-secret.yaml new file mode 100644 index 0000000000..48db0eeb71 --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/caikit-tgis/011-minio-connection-secret.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + serving.kserve.io/s3-endpoint: minio.minio.svc:9000 + serving.kserve.io/s3-usehttps: "0" + serving.kserve.io/s3-region: "us-east-2" + serving.kserve.io/s3-useanoncredential: "false" + name: storage-config +stringData: + "AWS_ACCESS_KEY_ID": "admin" + "AWS_SECRET_ACCESS_KEY": "password" diff --git a/scripts/ci-ai-model-serving/tests/caikit-tgis/012-minio-sa.yaml b/scripts/ci-ai-model-serving/tests/caikit-tgis/012-minio-sa.yaml new file mode 100644 index 0000000000..006e84a7a5 --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/caikit-tgis/012-minio-sa.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sa +secrets: +- name: storage-config diff --git a/scripts/ci-ai-model-serving/tests/caikit-tgis/020-inference-svc.yaml b/scripts/ci-ai-model-serving/tests/caikit-tgis/020-inference-svc.yaml new file mode 100644 index 0000000000..3354b2af92 --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/caikit-tgis/020-inference-svc.yaml @@ -0,0 +1,16 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: flan-t5 +spec: + predictor: + serviceAccountName: sa + model: + modelFormat: + name: caikit + storageUri: "s3://modelmesh-example-models/llm/models/flan-t5-small-caikit" + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 diff --git a/scripts/ci-ai-model-serving/tests/caikit-tgis/021-route.yaml b/scripts/ci-ai-model-serving/tests/caikit-tgis/021-route.yaml new file mode 100644 index 0000000000..e589c63cec --- /dev/null +++ b/scripts/ci-ai-model-serving/tests/caikit-tgis/021-route.yaml @@ -0,0 +1,13 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: flan-t5 +spec: + host: flan-t5-predictor.apps.example.com + port: + targetPort: 8080 + to: + kind: Service + name: flan-t5-predictor + weight: 100 + wildcardPolicy: None diff --git a/scripts/microshift-sos-report.sh b/scripts/microshift-sos-report.sh index 88adadc206..c66f5b15e9 100755 --- a/scripts/microshift-sos-report.sh +++ b/scripts/microshift-sos-report.sh @@ -13,22 +13,41 @@ function usage() { echo " profiles: ${PROFILES}" echo " plugins: ${PLUGINS}" echo "" - echo "Usage: ${SCRIPT_NAME} [--tmp-dir TMP-DIR]" - echo " --tmp-dir TMP-DIR Temporary directory for saving the report. Defaults to ${TEMPDIR}." + echo "Usage: ${SCRIPT_NAME} [--tmp-dir TMP-DIR] [--help] [sos-report-args...]" + echo " --tmp-dir TMP-DIR Temporary directory for saving the report. Defaults to ${TEMPDIR}." + echo " --help Show this help message and exit." + echo " --profiles PROFILES Profiles to include in the report. Can be overridden using PROFILES env var. Defaults to ${PROFILES}." + echo " --plugins PLUGINS Plugins to include in the report. Can be overridden using PLUGINS env var. Defaults to ${PLUGINS}." + echo " sos-report-args Additional arguments to pass to the sos report command." exit 1 } -if [ $# -ge 1 ]; then +remaining_args=() + +while [ $# -gt 0 ]; do case $1 in --tmp-dir) [ $# -ne 2 ] && usage TEMPDIR="$2" + shift 2 ;; - *) + --plugins) + PLUGINS="$2" + shift 2 + ;; + --profiles) + PROFILES="$2" + shift 2 + ;; + -h|--help) usage ;; + *) + remaining_args+=("$1") + shift + ;; esac -fi +done if [ "$(id -u)" -ne 0 ] ; then echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges" @@ -40,10 +59,22 @@ if [ ! -d "${TEMPDIR}" ]; then exit 1 fi +plugins_arg="" +if [ -n "${PLUGINS}" ]; then + plugins_arg="--only-plugins ${PLUGINS}" +fi + +profiles_arg="" +if [ -n "${PROFILES}" ]; then + profiles_arg="--profiles ${PROFILES}" +fi + +# shellcheck disable=SC2086,SC2068 sos report \ --quiet \ --batch \ --all-logs \ --tmp-dir "${TEMPDIR}" \ - --profiles "${PROFILES}" \ - --only-plugins "${PLUGINS}" + ${profiles_arg} \ + ${plugins_arg} \ + "${remaining_args[@]}" diff --git a/scripts/multinode/configure-sec.sh b/scripts/multinode/configure-sec.sh index 334d0fdcf7..47afbe60c5 100755 --- a/scripts/multinode/configure-sec.sh +++ b/scripts/multinode/configure-sec.sh @@ -77,10 +77,10 @@ function configure_kubelet() { # Checksums can be obtained from https://www.downloadkubernetes.com/ # or by downloading a "${url}.sha256" file (see below for ${url}). For example: - # version=v1.33.2; for kube_arch in amd64 arm64; do echo "${kube_arch}: $(curl -L https://dl.k8s.io/release/${version}/bin/linux/${kube_arch}/kubelet.sha256 2>/dev/null)"; done - local -r version="v1.33.2" - local -r kube_hash_amd64="77fa5d29995653fe7e2855759a909caf6869c88092e2f147f0b84cbdba98c8f3" - local -r kube_hash_arm64="0fa15aca9b90fe7aef1ed3aad31edd1d9944a8c7aae34162963a6aaaf726e065" + # version=v1.33.3; for kube_arch in amd64 arm64; do echo "${kube_arch}: $(curl -L https://dl.k8s.io/release/${version}/bin/linux/${kube_arch}/kubelet.sha256 2>/dev/null)"; done + local -r version="v1.33.3" + local -r kube_hash_amd64="37f9093ed2b4669cccf5474718e43ec412833e1267c84b01e662df2c4e5d7aaa" + local -r kube_hash_arm64="3f69bb32debfaf25fce91aa5e7181e1e32f3550f3257b93c17dfb37bed621a9c" local kube_arch="" local kube_hash="" diff --git a/scripts/verify/verify-rf.sh b/scripts/verify/verify-rf.sh index 6b4b0990a6..976e67ae48 100755 --- a/scripts/verify/verify-rf.sh +++ b/scripts/verify/verify-rf.sh @@ -13,11 +13,6 @@ cd "${ROOTDIR}/test" # https://robocop.readthedocs.io/en/stable/rules.html#too-many-calls-in-test-case-w0505 set -x -"${RF_VENV}/bin/robocop" \ - --exclude 1015 \ - --configure 0504:max_len:40 \ - --configure 0505:max_calls:20 \ - --configure 0508:line_length:200 \ - --configure 0506:max_lines:1000 +"${RF_VENV}/bin/robocop" check -"${RF_VENV}/bin/robotidy" --check --diff . +"${RF_VENV}/bin/robocop" format --check --diff --no-overwrite diff --git a/test/README.md b/test/README.md index 97e174311e..f1526ef01f 100644 --- a/test/README.md +++ b/test/README.md @@ -475,8 +475,8 @@ $ ./test/bin/scenario.sh run \ ### Scenario Definitions -Scenarios are saved as shell scripts under `./test/scenarios` and -`./test/scenarios-periodics`. +Scenarios are saved as shell scripts under `./test/scenarios`, +`./test/scenarios-bootc` and `./test/scenarios-bootc-containers`. Each scenario includes several functions that are combined with the framework scripts to take the specific actions for the combination of images and tests that make up the scenario. @@ -511,6 +511,8 @@ Scenarios utilize following distinct MicroShift sources: - `crel`: current MicroShift minor release (already built and released RPMs like ECs, RCs, Z-stream). It is optional meaning that shortly after branch cut, before first EC is released, it will be skipped. +- `lrel`: latest available release (EC, RC or zstream) available from internal + Red Hat repositories (staging). | Starting ref | End ref | Successful upgrade scenario | Failed upgrade scenario | |--------------|---------|-----------------------------|-------------------------| @@ -518,6 +520,7 @@ Scenarios utilize following distinct MicroShift sources: | `prel` | `src` |`el92-prel@upgrade-ok.sh` | **MISSING** | | `src` | `src` | **MISSING** | `el92-src@upgrade-failing-cannot-backup.sh` | | `crel` | `src` | `el92-crel@upgrade-ok.sh` | `el92-crel@upgrade-fails.sh` | +| `y1` | `lrel` | `el96-y1@el96-lrel@standard1.sh` | **MISSING** | #### scenario_create_vms diff --git a/test/assets/generic-device-plugin/fake-serial-communication.py b/test/assets/generic-device-plugin/fake-serial-communication.py index f256b6349e..064d6bfc14 100755 --- a/test/assets/generic-device-plugin/fake-serial-communication.py +++ b/test/assets/generic-device-plugin/fake-serial-communication.py @@ -25,9 +25,14 @@ def send_msg(ser, msg): def recv_msg(ser, expected_msg): print(f"Listening for a message. Expecting: {expected_msg}") - line = ser.readline() - print(f"Received message: {line}") - if expected_msg != line: + while True: + line = ser.readline() + print(f"Received message: {line}") + if len(line) == 0: + print("Received empty message - ignoring") + continue + if expected_msg == line: + break print("Received message does not match expected one") sys.exit(1) diff --git a/test/assets/generic-device-plugin/fuse-test-pod.yaml b/test/assets/generic-device-plugin/fuse-test-pod.yaml new file mode 100644 index 0000000000..cdddc1db6a --- /dev/null +++ b/test/assets/generic-device-plugin/fuse-test-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: fuse-test-pod +spec: + containers: + - name: fuse-app-container + image: registry.access.redhat.com/ubi9/ubi:9.6 + command: ["sleep", "infinity"] + resources: + limits: + device.microshift.io/fuse: "4" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" \ No newline at end of file diff --git a/test/bin/common_versions.sh b/test/bin/common_versions.sh index 3491e51d61..7605190c8a 100644 --- a/test/bin/common_versions.sh +++ b/test/bin/common_versions.sh @@ -72,8 +72,8 @@ export FAKE_NEXT_MINOR_VERSION=$(( "${MINOR_VERSION}" + 1 )) # # For a release branch, the current release repository should come from the # official 'rhocp' stream. -CURRENT_RELEASE_REPO="" # "https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/microshift/ocp/latest-4.20/el9/os" -CURRENT_RELEASE_VERSION="" # "$(get_vrel_from_beta "${CURRENT_RELEASE_REPO}")" +CURRENT_RELEASE_REPO="https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/microshift/ocp-dev-preview/latest-4.20/el9/os" +CURRENT_RELEASE_VERSION="$(get_vrel_from_beta "${CURRENT_RELEASE_REPO}")" export CURRENT_RELEASE_REPO export CURRENT_RELEASE_VERSION @@ -87,8 +87,8 @@ export CURRENT_RELEASE_VERSION # # For a release branch, the previous release repository should come from the # official 'rhocp' stream. -PREVIOUS_RELEASE_REPO="https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/microshift/ocp/latest-4.19/el9/os" -PREVIOUS_RELEASE_VERSION="$(get_vrel_from_beta "${PREVIOUS_RELEASE_REPO}")" +PREVIOUS_RELEASE_REPO="rhocp-4.19-for-rhel-9-$(uname -m)-rpms" +PREVIOUS_RELEASE_VERSION="$(get_vrel_from_rhsm "${PREVIOUS_RELEASE_REPO}")" export PREVIOUS_RELEASE_REPO export PREVIOUS_RELEASE_VERSION @@ -110,7 +110,7 @@ export RHOCP_MINOR_Y_BETA # If the release version is defined, the repository should be deduced from the # PREVIOUS_RELEASE_REPO setting. # Beta repository URL needs to be set for CentOS images as they don't have access to the RHOCP. -RHOCP_MINOR_Y1="" +RHOCP_MINOR_Y1=19 RHOCP_MINOR_Y1_BETA="https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/dependencies/rpms/4.19-el9-beta/" export RHOCP_MINOR_Y1 export RHOCP_MINOR_Y1_BETA @@ -123,6 +123,9 @@ export RHOCP_MINOR_Y2=18 # See https://github.com/vmware-tanzu/sonobuoy/releases. export CNCF_SONOBUOY_VERSION=v0.57.3 +# The current version of the microshift-gitops package. +export GITOPS_VERSION=1.16 + # The brew release versions needed for release regression testing BREW_Y0_RELEASE_VERSION="$(get_vrel_from_rpm "${BREW_RPM_SOURCE}/4.${MINOR_VERSION}-zstream/${UNAME_M}/")" BREW_Y1_RELEASE_VERSION="$(get_vrel_from_rpm "${BREW_RPM_SOURCE}/4.${PREVIOUS_MINOR_VERSION}-zstream/${UNAME_M}/")" @@ -136,3 +139,10 @@ export BREW_Y2_RELEASE_VERSION export BREW_RC_RELEASE_VERSION export BREW_EC_RELEASE_VERSION export BREW_NIGHTLY_RELEASE_VERSION + +# Set the release type to ec, rc or zstream +LATEST_RELEASE_TYPE="ec" +export LATEST_RELEASE_TYPE + +BREW_LREL_RELEASE_VERSION="${BREW_EC_RELEASE_VERSION}" +export BREW_LREL_RELEASE_VERSION diff --git a/test/bin/scenario.sh b/test/bin/scenario.sh index 10e01a3a87..792489f1e0 100755 --- a/test/bin/scenario.sh +++ b/test/bin/scenario.sh @@ -31,6 +31,7 @@ SKIP_GREENBOOT=${SKIP_GREENBOOT:-false} # may be overridden in scenario file IMAGE_SIGSTORE_ENABLED=false # may be overridden in scenario file VNC_CONSOLE=${VNC_CONSOLE:-false} # may be overridden in global settings file TEST_RANDOMIZATION="all" # may be overridden in scenario file +TEST_EXCLUDES="none" # may be overridden in scenario file TEST_EXECUTION_TIMEOUT="30m" # may be overriden in scenario file SUBSCRIPTION_MANAGER_PLUGIN="${SUBSCRIPTION_MANAGER_PLUGIN:-${SCRIPTDIR}/subscription_manager_register.sh}" # may be overridden in global settings file RUN_HOST_OVERRIDE="" # target any given VM for running scenarios @@ -1047,11 +1048,16 @@ EOF timeout_robot="${rf_binary}" fi + export SKIP_SOS # For sos-on-failure-listener.py + # shellcheck disable=SC2086 if ! ${timeout_robot} \ --name "${SCENARIO}" \ --randomize "${TEST_RANDOMIZATION}" \ + --exclude "${TEST_EXCLUDES}" \ --loglevel TRACE \ + --listener "${TESTDIR}/resources/sos-on-failure-listener.py" \ + --pythonpath "${TESTDIR}/resources" \ --outputdir "${SCENARIO_INFO_DIR}/${SCENARIO}" \ --debugfile "${SCENARIO_INFO_DIR}/${SCENARIO}/rf-debug.log" \ -x junit.xml \ diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel94-bootc-brew-y2-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel94-bootc-brew-y2-with-optional.containerfile index d11e489630..74f2bf006e 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel94-bootc-brew-y2-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel94-bootc-brew-y2-with-optional.containerfile @@ -38,6 +38,7 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp # {{- end -}} diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-ec-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-ec-with-optional.containerfile index 3819d06f3a..0fb9c6fe3d 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-ec-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-ec-with-optional.containerfile @@ -43,6 +43,24 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp + +# Prepare system for testing Generic Device Plugin. +# Upgrade the kernel to keep the same procedure with RHEL and CentOS. +# CentOS requires upgrade because of a different package retention policy +# which means that the kernel in the base bootc image might no longer +# be available in the repositories. +# hadolint ignore=DL3003 +RUN dnf upgrade kernel -y && \ + KERNEL_VER=$(rpm -q --qf "%{VERSION}-%{RELEASE}" kernel); \ + KERNEL_VER_ARCH="${KERNEL_VER}.$(uname -m)"; \ + dnf install -y git make "kernel-devel-${KERNEL_VER}" python3-pyserial && \ + dnf clean all && \ + git clone https://github.com/pmtk/serialsim.git /tmp/serialsim && \ + cd /tmp/serialsim && \ + make KERNEL="${KERNEL_VER_ARCH}" all install && \ + rm -rf /tmp/serialsim + # {{- end -}} diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-nightly-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-nightly-with-optional.containerfile index ebce25c0fd..f4204bd355 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-nightly-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-nightly-with-optional.containerfile @@ -43,6 +43,24 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp + +# Prepare system for testing Generic Device Plugin. +# Upgrade the kernel to keep the same procedure with RHEL and CentOS. +# CentOS requires upgrade because of a different package retention policy +# which means that the kernel in the base bootc image might no longer +# be available in the repositories. +# hadolint ignore=DL3003 +RUN dnf upgrade kernel -y && \ + KERNEL_VER=$(rpm -q --qf "%{VERSION}-%{RELEASE}" kernel); \ + KERNEL_VER_ARCH="${KERNEL_VER}.$(uname -m)"; \ + dnf install -y git make "kernel-devel-${KERNEL_VER}" python3-pyserial && \ + dnf clean all && \ + git clone https://github.com/pmtk/serialsim.git /tmp/serialsim && \ + cd /tmp/serialsim && \ + make KERNEL="${KERNEL_VER_ARCH}" all install && \ + rm -rf /tmp/serialsim + # {{- end -}} diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-rc-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-rc-with-optional.containerfile index fe1518d2c2..6e52ba83d4 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-rc-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-rc-with-optional.containerfile @@ -43,6 +43,23 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp + +# Prepare system for testing Generic Device Plugin. +# Upgrade the kernel to keep the same procedure with RHEL and CentOS. +# CentOS requires upgrade because of a different package retention policy +# which means that the kernel in the base bootc image might no longer +# be available in the repositories. +# hadolint ignore=DL3003 +RUN dnf upgrade kernel -y && \ + KERNEL_VER=$(rpm -q --qf "%{VERSION}-%{RELEASE}" kernel); \ + KERNEL_VER_ARCH="${KERNEL_VER}.$(uname -m)"; \ + dnf install -y git make "kernel-devel-${KERNEL_VER}" python3-pyserial && \ + dnf clean all && \ + git clone https://github.com/pmtk/serialsim.git /tmp/serialsim && \ + cd /tmp/serialsim && \ + make KERNEL="${KERNEL_VER_ARCH}" all install && \ + rm -rf /tmp/serialsim # {{- end -}} diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y1-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y1-with-optional.containerfile index edb1e43b9a..6461a928e8 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y1-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y1-with-optional.containerfile @@ -43,6 +43,7 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp # {{- end -}} diff --git a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y0-with-optional.containerfile b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-zstream-with-optional.containerfile similarity index 73% rename from test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y0-with-optional.containerfile rename to test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-zstream-with-optional.containerfile index 0eee5fa4b8..d551cb9f21 100644 --- a/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-y0-with-optional.containerfile +++ b/test/image-blueprints-bootc/layer1-base/group2/rhel96-bootc-brew-zstream-with-optional.containerfile @@ -43,6 +43,24 @@ RUN firewall-offline-cmd --zone=public --add-port=22/tcp && \ firewall-offline-cmd --zone=public --add-port=443/tcp && \ firewall-offline-cmd --zone=public --add-port=5353/udp && \ firewall-offline-cmd --zone=public --add-port=6443/tcp && \ + firewall-offline-cmd --zone=public --add-port=8889/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/tcp && \ firewall-offline-cmd --zone=public --add-port=30000-32767/udp + +# Prepare system for testing Generic Device Plugin. +# Upgrade the kernel to keep the same procedure with RHEL and CentOS. +# CentOS requires upgrade because of a different package retention policy +# which means that the kernel in the base bootc image might no longer +# be available in the repositories. +# hadolint ignore=DL3003 +RUN dnf upgrade kernel -y && \ +KERNEL_VER=$(rpm -q --qf "%{VERSION}-%{RELEASE}" kernel); \ +KERNEL_VER_ARCH="${KERNEL_VER}.$(uname -m)"; \ +dnf install -y git make "kernel-devel-${KERNEL_VER}" python3-pyserial && \ +dnf clean all && \ +git clone https://github.com/pmtk/serialsim.git /tmp/serialsim && \ +cd /tmp/serialsim && \ +make KERNEL="${KERNEL_VER_ARCH}" all install && \ +rm -rf /tmp/serialsim + # {{- end -}} diff --git a/test/image-blueprints-bootc/layer3-periodic/group2/rhel96-bootc-source-gitops.containerfile b/test/image-blueprints-bootc/layer3-periodic/group2/rhel96-bootc-source-gitops.containerfile new file mode 100644 index 0000000000..7294382846 --- /dev/null +++ b/test/image-blueprints-bootc/layer3-periodic/group2/rhel96-bootc-source-gitops.containerfile @@ -0,0 +1,11 @@ +FROM localhost/rhel96-bootc-source:latest + +# Copy repository configuration +COPY ./bootc-images/microshift-gitops.repo \ + /etc/yum.repos.d/ + +# Print repository configuration contents. +# Install MicroShift and cleanup. +RUN dnf install -y microshift-gitops && \ + rm -vf /etc/yum.repos.d/microshift-*.repo && \ + dnf clean all diff --git a/test/image-blueprints-bootc/layer3-periodic/group3/cos9-bootc-source-fips.containerfile b/test/image-blueprints-bootc/layer3-periodic/group3/cos9-bootc-source-fips.containerfile deleted file mode 100644 index 06158491df..0000000000 --- a/test/image-blueprints-bootc/layer3-periodic/group3/cos9-bootc-source-fips.containerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM localhost/cos9-bootc-source:latest - -# Add fips=1 kernel argument -# See https://containers.github.io/bootc/building/kernel-arguments.html -RUN cat > /usr/lib/bootc/kargs.d/01-fips.toml <<'EOF' -kargs = ["fips=1"] -match-architectures = ["x86_64"] -EOF - -# Enable the FIPS crypto policy -RUN dnf install -y crypto-policies-scripts && \ - update-crypto-policies --no-reload --set FIPS && \ - dnf clean all diff --git a/test/image-blueprints/layer1-base/group2/rhel94-brew-y2-with-optionals.toml b/test/image-blueprints/layer1-base/group2/rhel94-brew-y2-with-optionals.toml index 072472bb6d..e041e88c14 100644 --- a/test/image-blueprints/layer1-base/group2/rhel94-brew-y2-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group2/rhel94-brew-y2-with-optionals.toml @@ -42,6 +42,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer1-base/group3/rhel96-brew-y1-with-optionals.toml b/test/image-blueprints/layer1-base/group3/rhel96-brew-y1-with-optionals.toml index 25a2851c29..1da4ec8752 100644 --- a/test/image-blueprints/layer1-base/group3/rhel96-brew-y1-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group3/rhel96-brew-y1-with-optionals.toml @@ -53,6 +53,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer1-base/group4/rhel96-brew-ec-with-optionals.toml b/test/image-blueprints/layer1-base/group4/rhel96-brew-ec-with-optionals.toml index 5f08416eb8..64f742feed 100644 --- a/test/image-blueprints/layer1-base/group4/rhel96-brew-ec-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group4/rhel96-brew-ec-with-optionals.toml @@ -43,6 +43,10 @@ version = "{{ env.Getenv "BREW_EC_RELEASE_VERSION" }}" name = "microshift-test-agent" version = "*" +[[packages]] +name = "systemd-resolved" +version = "*" + [customizations.services] enabled = ["microshift", "microshift-test-agent"] @@ -53,6 +57,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer1-base/group4/rhel96-brew-nightly-with-optionals.toml b/test/image-blueprints/layer1-base/group4/rhel96-brew-nightly-with-optionals.toml index 15369ebb33..7496a41daa 100644 --- a/test/image-blueprints/layer1-base/group4/rhel96-brew-nightly-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group4/rhel96-brew-nightly-with-optionals.toml @@ -43,6 +43,10 @@ version = "{{ env.Getenv "BREW_NIGHTLY_RELEASE_VERSION" }}" name = "microshift-test-agent" version = "*" +[[packages]] +name = "systemd-resolved" +version = "*" + [customizations.services] enabled = ["microshift", "microshift-test-agent"] @@ -53,6 +57,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer1-base/group4/rhel96-brew-rc-with-optionals.toml b/test/image-blueprints/layer1-base/group4/rhel96-brew-rc-with-optionals.toml index 2160a33f50..932b3c5a1c 100644 --- a/test/image-blueprints/layer1-base/group4/rhel96-brew-rc-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group4/rhel96-brew-rc-with-optionals.toml @@ -43,6 +43,10 @@ version = "{{ env.Getenv "BREW_RC_RELEASE_VERSION" }}" name = "microshift-test-agent" version = "*" +[[packages]] +name = "systemd-resolved" +version = "*" + [customizations.services] enabled = ["microshift", "microshift-test-agent"] @@ -53,6 +57,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer1-base/group4/rhel96-brew-y0-with-optionals.toml b/test/image-blueprints/layer1-base/group4/rhel96-brew-zstream-with-optionals.toml similarity index 96% rename from test/image-blueprints/layer1-base/group4/rhel96-brew-y0-with-optionals.toml rename to test/image-blueprints/layer1-base/group4/rhel96-brew-zstream-with-optionals.toml index 5b60a6eb22..603f538a72 100644 --- a/test/image-blueprints/layer1-base/group4/rhel96-brew-y0-with-optionals.toml +++ b/test/image-blueprints/layer1-base/group4/rhel96-brew-zstream-with-optionals.toml @@ -43,6 +43,10 @@ version = "{{ env.Getenv "BREW_Y0_RELEASE_VERSION" }}" name = "microshift-test-agent" version = "*" +[[packages]] +name = "systemd-resolved" +version = "*" + [customizations.services] enabled = ["microshift", "microshift-test-agent"] @@ -53,6 +57,7 @@ ports = [ "443:tcp", "5353:udp", "6443:tcp", + "8889:tcp", "30000-32767:tcp", "30000-32767:udp", ] diff --git a/test/image-blueprints/layer2-presubmit/group1/rhel96-lrel-optionals-tuned.toml b/test/image-blueprints/layer2-presubmit/group1/rhel96-lrel-optionals-tuned.toml new file mode 100644 index 0000000000..9cba09bc4b --- /dev/null +++ b/test/image-blueprints/layer2-presubmit/group1/rhel96-lrel-optionals-tuned.toml @@ -0,0 +1,125 @@ +{{- if env.Getenv "BREW_LREL_RELEASE_VERSION" "" -}} +{{- /* + + We wrap this template in a test so that the body of the output is + empty when there is no "current" version release. The output file + must end up completely empty, so we need to remove whitespace from + around the first and last template instructions. + +*/ -}} + +name = "rhel-9.6-microshift-brew-tuned-4.{{ .Env.MINOR_VERSION}}-{{ .Env.LATEST_RELEASE_TYPE}}" +description = "A RHEL 9.6 image with already built and released RPMs like EC, RC, or Z-stream release: {{ .Env.BREW_LREL_RELEASE_VERSION }}" +version = "0.0.1" +modules = [] +groups = [] +distro = "rhel-96" + +# Parent specification directive recognized by test/bin/build_images.sh to be +# used with the '--parent' argument of 'osbuild-composer' +# parent = "rhel-9.6-microshift-brew-optionals-4.{{ .Env.PREVIOUS_MINOR_VERSION }}-zstream" + +{{ range (env.Getenv "MICROSHIFT_MANDATORY_RPMS" | strings.Split " ") }} +[[packages]] +name = "{{ . }}" +version = "{{ env.Getenv "BREW_LREL_RELEASE_VERSION" }}" +{{ end }} + +{{ range (env.Getenv "MICROSHIFT_OPTIONAL_RPMS" | strings.Split " ") }} +[[packages]] +name = "{{ . }}" +version = "{{ env.Getenv "BREW_LREL_RELEASE_VERSION" }}" +{{ end }} + +{{- if and (env.Getenv "UNAME_M" "") (eq "x86_64" .Env.UNAME_M) }} +{{ range (env.Getenv "MICROSHIFT_X86_64_RPMS" | strings.Split " ") }} +[[packages]] +name = "{{ . }}" +version = "{{ env.Getenv "BREW_LREL_RELEASE_VERSION" }}" +{{ end }} +{{- end }} + +[[packages]] +name = "microshift-test-agent" +version = "*" + +[[packages]] +name = "systemd-resolved" +version = "*" + +{{- if and (env.Getenv "UNAME_M" "") (eq "x86_64" .Env.UNAME_M) }} +# Kernel RT is only available for x86_64 +[customizations.kernel] +name = "kernel-rt" +{{- end }} + +[customizations.services] +enabled = ["microshift-test-agent", "microshift", "microshift-tuned"] + +[customizations.firewall] +ports = [ + "22:tcp", + "80:tcp", + "443:tcp", + "5353:udp", + "6443:tcp", + "8889:tcp", + "30000-32767:tcp", + "30000-32767:udp", +] + +[customizations.firewall.services] +enabled = ["mdns", "ssh", "http", "https"] + +[[customizations.firewall.zones]] +name = "trusted" +sources = ["10.42.0.0/16", "169.254.169.1", "fd01::/48"] + +[[customizations.files]] +path = "/etc/microshift/config.yaml" +data = """ +kubelet: + cpuManagerPolicy: static + cpuManagerPolicyOptions: + full-pcpus-only: "true" + cpuManagerReconcilePeriod: 5s + memoryManagerPolicy: Static + topologyManagerPolicy: single-numa-node + reservedSystemCPUs: 0-1 + reservedMemory: + - limits: + memory: 1100Mi + numaNode: 0 + kubeReserved: + memory: 500Mi + systemReserved: + memory: 500Mi + evictionHard: + imagefs.available: 15% + memory.available: 100Mi + nodefs.available: 10% + nodefs.inodesFree: 5% + evictionPressureTransitionPeriod: 0s +""" + +[[customizations.files]] +path = "/etc/tuned/microshift-baseline-variables.conf" +data = """ +# Isolated cores should be complementary to kubelet's reserved CPUs. +# Isolated and reserved CPUs should contain all online CPUs. +# Core #3 is for testing offlining hence skipped. +isolated_cores=2,4-5 +hugepages_size=2M +hugepages=10 +additional_args=test1=on test2=true dummy +offline_cpu_set=3 +""" + +[[customizations.files]] +path = "/etc/microshift/tuned.yaml" +data = """ +profile: microshift-baseline +reboot_after_apply: True +""" + +{{- end -}} diff --git a/test/package-sources-bootc/microshift-gitops.repo b/test/package-sources-bootc/microshift-gitops.repo new file mode 100644 index 0000000000..21e8ff6c91 --- /dev/null +++ b/test/package-sources-bootc/microshift-gitops.repo @@ -0,0 +1,10 @@ +[microshift-gitops] +name = Red Hat OpenShift GitOps {{ .Env.GITOPS_VERSION }} for RHEL 9 {{ .Env.UNAME_M }} (RPMs) +baseurl = https://cdn.redhat.com/content/dist/layered/rhel9/{{ .Env.UNAME_M }}/gitops/{{ .Env.GITOPS_VERSION }}/os +enabled = 1 +gpgcheck = 1 +gpgkey = file:///etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release +sslverify = 1 +sslcacert = /etc/rhsm/ca/redhat-uep.pem +sslclientkey = {{ .Env.SSL_CLIENT_KEY_FILE }} +sslclientcert = {{ .Env.SSL_CLIENT_CERT_FILE }} diff --git a/test/requirements.txt b/test/requirements.txt index b0d9694e36..f09ae2f6d1 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,8 +1,7 @@ -robotframework==6.0.2 -robotframework-requests==0.9.4 +robotframework==7.3 +robotframework-requests==0.9.7 robotframework-scplibrary==1.2.0 robotframework-sshlibrary==3.8.0 -pyyaml==6.0.1 -robotframework-robocop==3.1.1 -robotframework-tidy==4.3.0 -packaging==23.2 +pyyaml==6.0.2 +robotframework-robocop==6.5.2 +packaging==25.0 diff --git a/test/resources/common.resource b/test/resources/common.resource index d4d33a02ff..5abf5a8856 100644 --- a/test/resources/common.resource +++ b/test/resources/common.resource @@ -20,7 +20,7 @@ Setup Suite With Namespace Login MicroShift Host Setup Kubeconfig ${ns}= Create Unique Namespace - Set Suite Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=SUITE Create Unique Namespace [Documentation] Creates a namespace with a unique suffix @@ -58,7 +58,7 @@ Create Random Temp File Create File ${path} ${content} RETURN ${path} -Upload String To File # robocop: disable=too-many-calls-in-keyword +Upload String To File # robocop: off=too-many-calls-in-keyword [Documentation] Write the string to a remote file [Arguments] ${content} ${remote_filename} ${rand}= Generate Random String diff --git a/test/resources/fault-tests.resource b/test/resources/fault-tests.resource index 4188a3ae9c..1c7faa6f20 100644 --- a/test/resources/fault-tests.resource +++ b/test/resources/fault-tests.resource @@ -20,6 +20,6 @@ Get Expected Fault Messages [Arguments] ${action} ${resource} ${yaml_file}= OperatingSystem.Get File resources/fault-test-messages.yaml ${messages_dict}= yaml.Safe Load ${yaml_file} - ${messages}= Set Variable ${messages_dict}[${action}][${resource}] + VAR ${messages}= ${messages_dict}[${action}][${resource}] @{message_lines}= Split To Lines ${messages} RETURN @{message_lines} diff --git a/test/resources/kubeconfig.resource b/test/resources/kubeconfig.resource index d6715dd7e0..614b24d322 100644 --- a/test/resources/kubeconfig.resource +++ b/test/resources/kubeconfig.resource @@ -39,7 +39,7 @@ Setup Kubeconfig ... https://${ip}:${API_PORT} END ${path}= Create Random Temp File ${kubeconfig} - Set Suite Variable \${KUBECONFIG} ${path} + VAR ${KUBECONFIG}= ${path} scope=SUITE Setup Custom Kubeconfig [Documentation] Get the kubeconfig file from the path, @@ -47,7 +47,7 @@ Setup Custom Kubeconfig [Arguments] ${custom_path} ${kubeconfig}= Get Kubeconfig ${custom_path} ${path}= Create Random Temp File ${kubeconfig} - Set Suite Variable \${KUBECONFIG} ${path} + VAR ${KUBECONFIG}= ${path} scope=SUITE Remove Kubeconfig [Documentation] Remove kubeconfig ${KUBECONFIG} file. diff --git a/test/resources/microshift-config.resource b/test/resources/microshift-config.resource index 4703141fe3..203ff2fcd2 100644 --- a/test/resources/microshift-config.resource +++ b/test/resources/microshift-config.resource @@ -1,16 +1,16 @@ *** Settings *** Documentation Keywords for running the microshift command line. +Library OperatingSystem Library Process Library String -Library OperatingSystem Library SSHLibrary Library DataFormats.py Resource common.resource *** Variables *** -${LVMD_VG_OVERRIDE}= ${EMPTY} +${LVMD_VG_OVERRIDE} ${EMPTY} *** Keywords *** @@ -26,9 +26,9 @@ Save Default MicroShift Config ... cat /etc/microshift/config.yaml ... sudo=True return_rc=True IF ${rc} == 0 - Set Suite Variable \${DEFAULT_MICROSHIFT_CONFIG} ${stdout} + VAR ${DEFAULT_MICROSHIFT_CONFIG}= ${stdout} scope=SUITE ELSE - Set Suite Variable \${DEFAULT_MICROSHIFT_CONFIG} ${EMPTY} + VAR ${DEFAULT_MICROSHIFT_CONFIG}= ${EMPTY} scope=SUITE END Restore Default MicroShift Config @@ -89,9 +89,9 @@ Save Lvmd Config ... cat /etc/microshift/lvmd.yaml ... sudo=True return_rc=True IF ${rc} == 0 - Set Suite Variable \${DEFAULT_LVMD_CONFIG} ${stdout} + VAR ${DEFAULT_LVMD_CONFIG}= ${stdout} scope=SUITE ELSE - Set Suite Variable \${DEFAULT_LVMD_CONFIG} ${EMPTY} + VAR ${DEFAULT_LVMD_CONFIG}= ${EMPTY} scope=SUITE END Extend Lvmd Config diff --git a/test/resources/microshift-etcd-process.resource b/test/resources/microshift-etcd-process.resource index fb41462163..d9792d8af8 100644 --- a/test/resources/microshift-etcd-process.resource +++ b/test/resources/microshift-etcd-process.resource @@ -1,9 +1,9 @@ *** Settings *** Documentation Keywords for running the microshift-etcd command line. +Library OperatingSystem Library Process Library String -Library OperatingSystem Library SSHLibrary Resource oc.resource Resource systemd.resource diff --git a/test/resources/microshift-host.resource b/test/resources/microshift-host.resource index 61e8fd464f..ed3986c405 100644 --- a/test/resources/microshift-host.resource +++ b/test/resources/microshift-host.resource @@ -85,7 +85,7 @@ Is System Bootc IF ${rc} != 0 RETURN ${FALSE} ${json_status}= Json Parse ${stdout} - ${bootc_type}= Set Variable ${json_status}[status][type] + VAR ${bootc_type}= ${json_status}[status][type] IF '${bootc_type}' == 'bootcHost' RETURN ${TRUE} ELSE diff --git a/test/resources/microshift-network.resource b/test/resources/microshift-network.resource index 384f5ae3a4..24799f3300 100644 --- a/test/resources/microshift-network.resource +++ b/test/resources/microshift-network.resource @@ -45,8 +45,7 @@ Access Hello Microshift ... Use hello-microshift as authority and swap it with --connect-to option. ... Returns all output and return code. ... Times out after 15s. - [Arguments] - ... ${ushift_port} + [Arguments] ${ushift_port} ... ${ushift_ip}=${USHIFT_HOST} ... ${path}=${EMPTY} ... ${scheme}=http @@ -54,8 +53,8 @@ Access Hello Microshift ${ip}= Add Brackets If Ipv6 ${ushift_ip} - ${connect_to}= Set Variable "${hostname}::${ip}:" - ${url_path}= Set Variable "${scheme}://${hostname}:${ushift_port}${path}" + VAR ${connect_to}= "${hostname}::${ip}:" + VAR ${url_path}= "${scheme}://${hostname}:${ushift_port}${path}" ${result}= Run Process ... curl -k -i ${url_path} --connect-to ${connect_to} @@ -65,8 +64,7 @@ Access Hello Microshift Access Hello MicroShift Success [Documentation] Expect 200 OK when accessing "hello microshift" through the router. - [Arguments] - ... ${ushift_port} + [Arguments] ${ushift_port} ... ${ushift_ip}=${USHIFT_HOST} ... ${path}=${EMPTY} ... ${scheme}=http @@ -84,8 +82,7 @@ Access Hello MicroShift Success Access Hello MicroShift No Route [Documentation] Expect 503 Service Unavailable when accessing "hello microshift" through the router. - [Arguments] - ... ${ushift_port} + [Arguments] ${ushift_port} ... ${ushift_ip}=${USHIFT_HOST} ... ${path}=${EMPTY} ... ${scheme}=http diff --git a/test/resources/microshift-process.resource b/test/resources/microshift-process.resource index 6c001f942e..4542a9a034 100644 --- a/test/resources/microshift-process.resource +++ b/test/resources/microshift-process.resource @@ -1,9 +1,9 @@ *** Settings *** Documentation Keywords for running the microshift command line. +Library OperatingSystem Library Process Library String -Library OperatingSystem Library SSHLibrary Resource oc.resource Resource systemd.resource diff --git a/test/resources/microshift-rpm.resource b/test/resources/microshift-rpm.resource index c7377a3c37..7f97d3f1b1 100644 --- a/test/resources/microshift-rpm.resource +++ b/test/resources/microshift-rpm.resource @@ -97,9 +97,9 @@ Verify MicroShift RPM Install # times of the installed files ${is_ostree}= Is System OSTree IF ${is_ostree} - ${nomtime}= Set Variable --nomtime + VAR ${nomtime}= --nomtime ELSE - ${nomtime}= Set Variable ${EMPTY} + VAR ${nomtime}= ${EMPTY} END # Checks all files from RPM packages except config files diff --git a/test/resources/multus.resource b/test/resources/multus.resource index b9420be828..a9cd3874c8 100644 --- a/test/resources/multus.resource +++ b/test/resources/multus.resource @@ -25,7 +25,7 @@ Connect To Pod Over Local Interface [Arguments] ${pod} ${ns} ${if} ${networks}= Get And Verify Pod Networks ${pod} ${ns} ${NAMESPACE}/bridge*-conf - ${extra_ip}= Set Variable ${networks}[1][ips][0] + VAR ${extra_ip}= ${networks}[1][ips][0] ${stdout}= Command Should Work curl -v --interface ${if} ${extra_ip}:8080 Should Contain ${stdout} Hello MicroShift diff --git a/test/resources/oc.resource b/test/resources/oc.resource index 618fcf7197..35630b091a 100644 --- a/test/resources/oc.resource +++ b/test/resources/oc.resource @@ -30,9 +30,9 @@ Oc Get JsonPath [Arguments] ${type} ${namespace} ${resource} ${jsonpath} IF "${namespace}"=="${EMPTY}" - ${namespace_arg}= Set Variable -A + VAR ${namespace_arg}= -A ELSE - ${namespace_arg}= Set Variable -n ${namespace} + VAR ${namespace_arg}= -n ${namespace} END ${text}= Run With Kubeconfig oc get ${namespace_arg} -o=jsonpath='{ ${jsonpath} }' ${type} ${resource} diff --git a/test/resources/offline.resource b/test/resources/offline.resource index 2008368ee5..dcf18fa3d0 100644 --- a/test/resources/offline.resource +++ b/test/resources/offline.resource @@ -38,11 +38,11 @@ Greenboot Health Check Exited [Documentation] Check that the Greenboot Health Check systemd service has state "exited" ${result} ${exited}= Wait Until Keyword Succeeds 5x 5s ... Run Guest Process ${GUEST_NAME} - ... systemctl - ... show - ... --property\=SubState - ... --value - ... greenboot-healthcheck.service + ... systemctl + ... show + ... --property\=SubState + ... --value + ... greenboot-healthcheck.service Should Be Equal As Integers ${result["rc"]} 0 Should Be Equal As Strings ${result["stdout"]} exited @@ -80,7 +80,7 @@ Run With Kubeconfig ... ${command} The command to run. Should but `oc` or `kubectl` but this is not enforced ... @{args} The arguments to pass to the command. See ../../resources/qemu-guest-agent.py for syntax [Arguments] ${command} @{args} - ${env}= Create Dictionary KUBECONFIG=/var/lib/microshift/resources/kubeadmin/kubeconfig + VAR &{env}= KUBECONFIG=/var/lib/microshift/resources/kubeadmin/kubeconfig ${result} ${ignore}= Wait Until Keyword Succeeds 5x 2s ... Run Guest Process ${GUEST_NAME} ${command} @{args} env=&{env} Log Many ${result["stdout"]} ${result["stderr"]} diff --git a/test/resources/openssl.resource b/test/resources/openssl.resource index 402c909905..b448152b1a 100644 --- a/test/resources/openssl.resource +++ b/test/resources/openssl.resource @@ -8,13 +8,13 @@ Resource common.resource *** Keywords *** Openssl [Documentation] openssl command wrapper - [Arguments] ${cmd} ${cmd2}= + [Arguments] ${cmd} ${cmd2}=${EMPTY} ${result}= Run Process openssl ${cmd} ${cmd2} shell=True # robotcode: ignore Should Be Equal As Integers ${result.rc} 0 Generate CSR Config [Documentation] Generate csr based on config template file - [Arguments] ${config_template_file} ${output_file} + [Arguments] ${config_template_file} ${out_file} ${template}= OperatingSystem.Get File ${config_template_file} ${message}= Replace Variables ${template} - OperatingSystem.Append To File ${output_file} ${message} + OperatingSystem.Append To File ${out_file} ${message} diff --git a/test/resources/ostree-health.resource b/test/resources/ostree-health.resource index 0a224a10c8..5d431bc7b9 100644 --- a/test/resources/ostree-health.resource +++ b/test/resources/ostree-health.resource @@ -21,13 +21,13 @@ Greenboot Health Check Exited Restart Greenboot And Wait For Success [Documentation] Restart the greenboot-healthcheck service and check its status - ${unit_name}= Set Variable greenboot-healthcheck.service + VAR ${unit_name} greenboot-healthcheck.service # Note that the Systemctl keyword from systemd.resource cannot be used to # restart the greenboot-healthcheck service due to the keyword expecting # the 'running' state after the restart. This condition does not apply on # services like greenboot that exit after their startup finishes. - ${stdout} ${stderr} ${rc}= Execute Command + ${stdout} ${stderr} ${rc} Execute Command ... systemctl restart ${unit_name} ... sudo=True return_stdout=True return_stderr=True return_rc=True IF ${rc} != 0 Systemctl Print Service Status And Logs ${unit_name} diff --git a/test/resources/ostree.resource b/test/resources/ostree.resource index b4b855a390..95eeeab38b 100644 --- a/test/resources/ostree.resource +++ b/test/resources/ostree.resource @@ -130,9 +130,9 @@ Create Usr Directory Overlay ${is_bootc}= Is System Bootc IF ${is_bootc} - ${cmd}= Set Variable bootc usr-overlay + VAR ${cmd}= bootc usr-overlay ELSE - ${cmd}= Set Variable rpm-ostree usroverlay + VAR ${cmd}= rpm-ostree usroverlay END ${stdout} ${stderr} ${rc}= Execute Command diff --git a/test/resources/selinux.resource b/test/resources/selinux.resource index ad0b4c0125..4b42413c9d 100644 --- a/test/resources/selinux.resource +++ b/test/resources/selinux.resource @@ -84,7 +84,7 @@ Run Container Access Check On File ... returns a list of errors if access was granted [Arguments] ${file_path} ${cmd}=cat - ${cmd_list}= Create List ${cmd} - ${file_paths_map}= Create Dictionary ${file_path}=${cmd_list} + VAR @{cmd_list}= ${cmd} + VAR &{file_paths_map}= ${file_path}=${cmd_list} ${err_list}= Run Access Check ${file_paths_map} RETURN ${err_list} diff --git a/test/resources/sos-on-failure-listener.py b/test/resources/sos-on-failure-listener.py new file mode 100644 index 0000000000..cf69a7302f --- /dev/null +++ b/test/resources/sos-on-failure-listener.py @@ -0,0 +1,72 @@ +""" +Following file implements a listener for running SOS Report after failed test case. +See Robot Framework User Guide for more information about listener interfaces: +https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#listener-interface + +This listener listens for two events: +- Variable creation - store the variable name if it contains 'namespace' or 'ns' +- Keyword failure - failure of "test-level" keyword triggers SOS report collection + +The goal is to collect minimal sos report whenever test fails. +Sos reports collected at the end of the scenario might not contain relevant information for debugging failures +because cluster objects might be already deleted (like suite or test teardown removing namespaces) +or long running Pods (like MicroShift core Pods) might've been restarted couple times and the relevant logs are lost. +""" + +import os +import re +from robot import result, running +from robot.libraries.BuiltIn import BuiltIn + +suite_namespaces = [] +test_namespaces = [] +test_started = False + + +def start_test(data: running.model.TestCase, res: result.model.TestCase): + global test_started + test_started = True + + +def end_test(data: running.model.TestCase, res: result.model.TestCase): + global test_started + test_started = False + test_namespaces.clear() + + +def start_suite(data: running.model.TestSuite, res: result.model.TestSuite): + suite_namespaces.clear() + + +def end_keyword(data: running.model.Keyword, res: result.model.Keyword): + # If a test case level keyword failed, collect SOS report. + # If the keyword failed on different level (like inside Wait Until Keyword Succeeds), it's ignored. + if res.status == "FAIL" and isinstance(res.parent, result.model.TestCase): + value = os.getenv("SKIP_SOS") + if value == "true": + BuiltIn().log("sos-on-failure-listener.py: SKIP_SOS is set to true, skipping SOS report collection") + return + BuiltIn().log("sos-on-failure-listener.py: Failure was detected, collecting SOS report") + BuiltIn().import_resource('microshift-host.resource') + cmd = "microshift-sos-report --profiles microshift --plugins ''" + if len(suite_namespaces) > 0 or len(test_namespaces) > 0: + cmd += f" --plugin-option microshift.add-namespaces={','.join(suite_namespaces + test_namespaces)}" + stdout, _, _ = BuiltIn().run_keyword("Command Execution", cmd) + m = re.search(r'(\/\S+\/sosreport\S+.tar.xz)', stdout) + if m: + BuiltIn().log(f"sos-on-failure-listener.py: SOS report collected to {m.group(1)}") + + +def start_var(data: running.model.Var, res: result.model.Var): + # Store variable's name if: + # - it has SUITE scope + # - name contains 'namespace' or 'ns' (case insensitive) + # - it's not already in the list of test namespaces + # start_var requires RF >= 7.0 and gets information about variables set using VAR keyword. + if (res.scope == "SUITE" + and ('namespace' in res.name.lower() or 'ns' in res.name.lower()) + and res.name not in test_namespaces): + if test_started: + test_namespaces.append(res.name) + else: + suite_namespaces.append(res.name) diff --git a/test/resources/systemd.resource b/test/resources/systemd.resource index 24a453f98a..1ab890b014 100644 --- a/test/resources/systemd.resource +++ b/test/resources/systemd.resource @@ -57,9 +57,9 @@ Systemctl Should Be True "${verb}" in {"restart", "start", "stop", "enable", "disable"} IF "${verb}" in {"restart", "start"} - ${state}= Set Variable running + VAR ${state}= running ELSE IF "${verb}" in {"stop"} - ${state}= Set Variable dead + VAR ${state}= dead END ${stdout} ${stderr} ${rc}= Execute Command diff --git a/test/scenarios-bootc/periodics/cos9-src@fips.sh b/test/scenarios-bootc/periodics/cos9-src@fips.sh deleted file mode 100644 index ac5bb41add..0000000000 --- a/test/scenarios-bootc/periodics/cos9-src@fips.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Sourced from scenario.sh and uses functions defined there. - -check_platform() { - if [[ "${UNAME_M}" =~ aarch64 ]] ; then - record_junit "setup" "scenario_create_vms" "SKIPPED" - exit 0 - fi -} - -scenario_create_vms() { - check_platform - - prepare_kickstart host1 kickstart-bootc.ks.template cos9-bootc-source-fips true - launch_vm --boot_blueprint centos9-bootc --fips -} - -scenario_remove_vms() { - check_platform - - remove_vm host1 -} - -scenario_run_tests() { - check_platform - - run_tests host1 suites/fips/ -} diff --git a/test/scenarios-bootc/periodics/el96-crel@optional-sigstore.sh b/test/scenarios-bootc/periodics/el96-crel@optional-sigstore.sh.disabled similarity index 100% rename from test/scenarios-bootc/periodics/el96-crel@optional-sigstore.sh rename to test/scenarios-bootc/periodics/el96-crel@optional-sigstore.sh.disabled diff --git a/test/scenarios-bootc/periodics/el96-prel@el96-crel@upgrade-ok.sh b/test/scenarios-bootc/periodics/el96-prel@el96-crel@upgrade-ok.sh index ca5e663b4d..b6104302c6 100644 --- a/test/scenarios-bootc/periodics/el96-prel@el96-crel@upgrade-ok.sh +++ b/test/scenarios-bootc/periodics/el96-prel@el96-crel@upgrade-ok.sh @@ -10,7 +10,7 @@ scenario_create_vms() { return 0 fi prepare_kickstart host1 kickstart-bootc.ks.template rhel96-bootc-prel - launch_vm --boot_blueprint rhel94-bootc + launch_vm --boot_blueprint rhel96-bootc } scenario_remove_vms() { diff --git a/test/scenarios-bootc/periodics/el96-src@gitops.sh b/test/scenarios-bootc/periodics/el96-src@gitops.sh new file mode 100644 index 0000000000..0778c3c73d --- /dev/null +++ b/test/scenarios-bootc/periodics/el96-src@gitops.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template rhel96-bootc-source-gitops + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + suites/gitops/ +} diff --git a/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard1.sh b/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard1.sh new file mode 100644 index 0000000000..8f9af6c7d4 --- /dev/null +++ b/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard1.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +dest_image="rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + +scenario_create_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + prepare_kickstart host1 kickstart-bootc.ks.template rhel94-bootc-brew-y2-with-optional + launch_vm --boot_blueprint rhel94-bootc +} + +scenario_remove_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + remove_vm host1 +} + +scenario_run_tests() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + run_tests host1 \ + --variable "TARGET_REF:${dest_image}" \ + --variable "BOOTC_REGISTRY:${MIRROR_REGISTRY_URL}" \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard1/ +} diff --git a/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard2.sh b/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard2.sh new file mode 100644 index 0000000000..bb22a51ac7 --- /dev/null +++ b/test/scenarios-bootc/releases/el94-y2@el96-lrel@standard2.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +dest_image="rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + +scenario_create_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + prepare_kickstart host1 kickstart-bootc.ks.template rhel94-bootc-brew-y2-with-optional + launch_vm --boot_blueprint rhel94-bootc +} + +scenario_remove_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + remove_vm host1 +} + +scenario_run_tests() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + run_tests host1 \ + --variable "TARGET_REF:${dest_image}" \ + --variable "BOOTC_REGISTRY:${MIRROR_REGISTRY_URL}" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard2 +} diff --git a/test/scenarios-bootc/releases/el96-lrel@ai-model-serving-online.sh b/test/scenarios-bootc/releases/el96-lrel@ai-model-serving-online.sh new file mode 100644 index 0000000000..3aea260575 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@ai-model-serving-online.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# Currently, RHOAI is only available for x86_64 +check_platform() { + local -r record_junit=${1:-false} + + if [[ "${UNAME_M}" =~ aarch64 ]]; then + if "${record_junit}"; then + record_junit "setup" "scenario_create_vms" "SKIPPED" + fi + exit 0 + fi +} + +scenario_create_vms() { + check_platform true + + # Increased disk size because of the additional embedded images (especially OVMS which is ~3.5GiB) + LVM_SYSROOT_SIZE=20480 prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc --vm_disksize 30 +} + +scenario_remove_vms() { + check_platform + + remove_vm host1 +} + +scenario_run_tests() { + check_platform + + run_tests host1 \ + suites/ai-model-serving/ai-model-serving-online.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@dual-stack.sh b/test/scenarios-bootc/releases/el96-lrel@dual-stack.sh new file mode 100644 index 0000000000..e8d98152bf --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@dual-stack.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc --network "${VM_DUAL_STACK_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + local -r vmname=$(full_vm_name host1) + # Valid IP addresses are the first two entries returned by manage-vm script. + local -r vm_ip1=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | head -1) + local -r vm_ip2=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | head -2 | tail -1) + + run_tests host1 \ + --variable "USHIFT_HOST_IP1:${vm_ip1}" \ + --variable "USHIFT_HOST_IP2:${vm_ip2}" \ + suites/ipv6/dualstack.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@ipv6.sh b/test/scenarios-bootc/releases/el96-lrel@ipv6.sh new file mode 100644 index 0000000000..5e15dafba8 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@ipv6.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# Redefine network-related settings to use the dedicated IPv6 network bridge +# shellcheck disable=SC2034 # used elsewhere +VM_BRIDGE_IP="$(get_vm_bridge_ip "${VM_IPV6_NETWORK}")" +# shellcheck disable=SC2034 # used elsewhere +WEB_SERVER_URL="http://[${VM_BRIDGE_IP}]:${WEB_SERVER_PORT}" +# Using `hostname` here instead of a raw ip because skopeo only allows either +# ipv4 or fqdn's, but not ipv6. Since the registry is hosted on the ipv6 +# network gateway in the host, we need to use a combination of the hostname +# plus /etc/hosts resolution (which is taken care of by kickstart). +# shellcheck disable=SC2034 # used elsewhere +MIRROR_REGISTRY_URL="$(hostname):${MIRROR_REGISTRY_PORT}/microshift" + +scenario_create_vms() { + # Enable IPv6 single stack in kickstart + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" false true + launch_vm --boot_blueprint rhel96-bootc --network "${VM_IPV6_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/ipv6/singlestack.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@multi-nic.sh b/test/scenarios-bootc/releases/el96-lrel@multi-nic.sh new file mode 100644 index 0000000000..a263a0e824 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@multi-nic.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + # Using multus as secondary network to have 2 nics in different networks. + launch_vm --boot_blueprint rhel96-bootc --network default,"${VM_MULTUS_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + local -r vmname=$(full_vm_name host1) + local -r vm_ip1=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | head -1) + local -r vm_ip2=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | tail -1) + + run_tests host1 \ + --variable "USHIFT_HOST_IP1:${vm_ip1}" \ + --variable "USHIFT_HOST_IP2:${vm_ip2}" \ + suites/network/multi-nic.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@optional.sh b/test/scenarios-bootc/releases/el96-lrel@optional.sh new file mode 100644 index 0000000000..6bc601136a --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@optional.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# Redefine network-related settings to use the dedicated network bridge +VM_BRIDGE_IP="$(get_vm_bridge_ip "${VM_MULTUS_NETWORK}")" +# shellcheck disable=SC2034 # used elsewhere +WEB_SERVER_URL="http://${VM_BRIDGE_IP}:${WEB_SERVER_PORT}" + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + # Two nics - one for macvlan, another for ipvlan (they cannot enslave the same interface) + launch_vm --boot_blueprint rhel96-bootc --network "${VM_MULTUS_NETWORK},${VM_MULTUS_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "PROMETHEUS_HOST:$(hostname)" \ + --variable "PROMETHEUS_PORT:9092" \ + --variable "LOKI_HOST:$(hostname)" \ + --variable "LOKI_PORT:3200" \ + --variable "PROM_EXPORTER_PORT:8889" \ + suites/optional/ +} diff --git a/test/scenarios-bootc/releases/el96-lrel@osconfig.sh b/test/scenarios-bootc/releases/el96-lrel@osconfig.sh new file mode 100644 index 0000000000..2faa3e542b --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@osconfig.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + suites/osconfig/clusterid.robot \ + suites/osconfig/systemd-resolved.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@router.sh b/test/scenarios-bootc/releases/el96-lrel@router.sh new file mode 100644 index 0000000000..c7238c7e43 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@router.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +export TEST_EXCLUDES="ushift-6085" + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + suites/router +} diff --git a/test/scenarios-bootc/releases/el96-lrel@standard1.sh b/test/scenarios-bootc/releases/el96-lrel@standard1.sh new file mode 100644 index 0000000000..052faac7e1 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@standard1.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/standard1/ suites/selinux/validate-selinux-policy.robot +} diff --git a/test/scenarios-bootc/releases/el96-lrel@standard2.sh b/test/scenarios-bootc/releases/el96-lrel@standard2.sh new file mode 100644 index 0000000000..048165ad4d --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@standard2.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/standard2 +} diff --git a/test/scenarios-bootc/releases/el96-lrel@storage.sh b/test/scenarios-bootc/releases/el96-lrel@storage.sh new file mode 100644 index 0000000000..ff07d22704 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@storage.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/storage/ +} diff --git a/test/scenarios-bootc/releases/el96-lrel@telemetry.sh b/test/scenarios-bootc/releases/el96-lrel@telemetry.sh new file mode 100644 index 0000000000..68e8e71a33 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-lrel@telemetry.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart-bootc.ks.template "rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "PROXY_HOST:${VM_BRIDGE_IP}" \ + --variable "PROXY_PORT:9001" \ + --variable "PROMETHEUS_HOST:$(hostname)" \ + --variable "PROMETHEUS_PORT:9092" \ + suites/telemetry/telemetry.robot +} diff --git a/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard1.sh b/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard1.sh new file mode 100644 index 0000000000..ce888d08d0 --- /dev/null +++ b/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard1.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +dest_image="rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + +scenario_create_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + prepare_kickstart host1 kickstart-bootc.ks.template rhel96-bootc-brew-y1-with-optional + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + remove_vm host1 +} + +scenario_run_tests() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + run_tests host1 \ + --variable "TARGET_REF:${dest_image}" \ + --variable "BOOTC_REGISTRY:${MIRROR_REGISTRY_URL}" \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard1/ suites/selinux/validate-selinux-policy.robot +} diff --git a/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard2.sh b/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard2.sh new file mode 100644 index 0000000000..02982b866b --- /dev/null +++ b/test/scenarios-bootc/releases/el96-y1@el96-lrel@standard2.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +dest_image="rhel96-bootc-brew-${LATEST_RELEASE_TYPE}-with-optional" + +scenario_create_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + prepare_kickstart host1 kickstart-bootc.ks.template rhel96-bootc-brew-y1-with-optional + launch_vm --boot_blueprint rhel96-bootc +} + +scenario_remove_vms() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + remove_vm host1 +} + +scenario_run_tests() { + if ! does_image_exist "${dest_image}"; then + echo "Image '${dest_image}' not found - skipping test" + return 0 + fi + run_tests host1 \ + --variable "TARGET_REF:${dest_image}" \ + --variable "BOOTC_REGISTRY:${MIRROR_REGISTRY_URL}" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard2 +} diff --git a/test/scenarios/periodics/el96-crel@optional-sigstore.sh b/test/scenarios/periodics/el96-crel@optional-sigstore.sh.disabled similarity index 100% rename from test/scenarios/periodics/el96-crel@optional-sigstore.sh rename to test/scenarios/periodics/el96-crel@optional-sigstore.sh.disabled diff --git a/test/scenarios/presubmits/el96-src@low-latency.sh b/test/scenarios/presubmits/el96-src@low-latency.sh index 7b1a1a202a..0e23cf79ab 100644 --- a/test/scenarios/presubmits/el96-src@low-latency.sh +++ b/test/scenarios/presubmits/el96-src@low-latency.sh @@ -15,7 +15,7 @@ scenario_remove_vms() { } scenario_run_tests() { - # Should not be ran immediately after creating VM because of + # Should not be run immediately after creating VM because of # microshift-tuned rebooting the node to activate the profile. local -r start_time=$(date +%s) while true; do diff --git a/test/scenarios/releases/el94-yminus2@el96-lrel@standard1.sh b/test/scenarios/releases/el94-yminus2@el96-lrel@standard1.sh new file mode 100644 index 0000000000..50b434b589 --- /dev/null +++ b/test/scenarios/releases/el94-yminus2@el96-lrel@standard1.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.4-microshift-brew-optionals-4.${YMINUS2_MINOR_VERSION}-zstream" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "TARGET_REF:rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard1/ +} diff --git a/test/scenarios/releases/el94-yminus2@el96-lrel@standard2.sh b/test/scenarios/releases/el94-yminus2@el96-lrel@standard2.sh new file mode 100644 index 0000000000..054a46d2b2 --- /dev/null +++ b/test/scenarios/releases/el94-yminus2@el96-lrel@standard2.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.4-microshift-brew-optionals-4.${YMINUS2_MINOR_VERSION}-zstream" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "TARGET_REF:rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard2/ +} diff --git a/test/scenarios/releases/el96-lrel@backups.sh b/test/scenarios/releases/el96-lrel@backups.sh new file mode 100644 index 0000000000..7d38edabf2 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@backups.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/backup/backups.robot +} diff --git a/test/scenarios/releases/el96-lrel@dual-stack.sh b/test/scenarios/releases/el96-lrel@dual-stack.sh new file mode 100644 index 0000000000..0d1fba7326 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@dual-stack.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm --network "${VM_DUAL_STACK_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + local -r vmname=$(full_vm_name host1) + local -r vm_ip1=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | head -1) + local -r vm_ip2=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | tail -1) + + run_tests host1 \ + --variable "USHIFT_HOST_IP1:${vm_ip1}" \ + --variable "USHIFT_HOST_IP2:${vm_ip2}" \ + suites/ipv6/dualstack.robot +} diff --git a/test/scenarios/releases/el96-lrel@ipv6.sh b/test/scenarios/releases/el96-lrel@ipv6.sh new file mode 100644 index 0000000000..120b408ed5 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@ipv6.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# Redefine network-related settings to use the dedicated IPv6 network bridge +# shellcheck disable=SC2034 # used elsewhere +VM_BRIDGE_IP="$(get_vm_bridge_ip "${VM_IPV6_NETWORK}")" +# shellcheck disable=SC2034 # used elsewhere +WEB_SERVER_URL="http://[${VM_BRIDGE_IP}]:${WEB_SERVER_PORT}" +# shellcheck disable=SC2034 # used elsewhere +MIRROR_REGISTRY_URL="${VM_BRIDGE_IP}:${MIRROR_REGISTRY_PORT}" + +scenario_create_vms() { + # Enable IPv6 single stack in kickstart + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" false true + launch_vm --network "${VM_IPV6_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/ipv6/singlestack.robot +} diff --git a/test/scenarios/releases/el96-lrel@low-latency.sh b/test/scenarios/releases/el96-lrel@low-latency.sh new file mode 100644 index 0000000000..0f3a5b8e7e --- /dev/null +++ b/test/scenarios/releases/el96-lrel@low-latency.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +export SKIP_GREENBOOT=true +export TEST_RANDOMIZATION=none + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-tuned-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm --vm_vcpus 6 +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + # Should not be run immediately after creating VM because of + # microshift-tuned rebooting the node to activate the profile. + local -r start_time=$(date +%s) + while true; do + boot_num=$(run_command_on_vm host1 "sudo journalctl --list-boots --quiet | wc -l" || true) + boot_num="${boot_num%$'\r'*}" + if [[ "${boot_num}" -ge 2 ]]; then + break + fi + if [ $(( $(date +%s) - start_time )) -gt 60 ]; then + echo "Timed out waiting for VM having 2 boots" + exit 1 + fi + sleep 5 + done + + # --exitonfailure because tests within suites are meant to be ordered, + # so don't advance to next test if current failed. + + run_tests host1 \ + --exitonfailure \ + suites/tuned/profile.robot \ + suites/tuned/microshift-tuned.robot \ + suites/tuned/workload-partitioning.robot +} diff --git a/test/scenarios/releases/el96-lrel@multi-nic.sh b/test/scenarios/releases/el96-lrel@multi-nic.sh new file mode 100644 index 0000000000..21f65d659c --- /dev/null +++ b/test/scenarios/releases/el96-lrel@multi-nic.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + # Using multus as secondary network to have 2 nics in different networks. + launch_vm --network default,"${VM_MULTUS_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + local -r vmname=$(full_vm_name host1) + local -r vm_ip1=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | head -1) + local -r vm_ip2=$("${ROOTDIR}/scripts/devenv-builder/manage-vm.sh" ip -n "${vmname}" | tail -1) + + run_tests host1 \ + --variable "USHIFT_HOST_IP1:${vm_ip1}" \ + --variable "USHIFT_HOST_IP2:${vm_ip2}" \ + suites/network/multi-nic.robot +} diff --git a/test/scenarios/releases/el96-lrel@optional.sh b/test/scenarios/releases/el96-lrel@optional.sh new file mode 100644 index 0000000000..1eef1773d6 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@optional.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# Redefine network-related settings to use the dedicated network bridge +VM_BRIDGE_IP="$(get_vm_bridge_ip "${VM_MULTUS_NETWORK}")" +# shellcheck disable=SC2034 # used elsewhere +WEB_SERVER_URL="http://${VM_BRIDGE_IP}:${WEB_SERVER_PORT}" + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + # Two nics - one for macvlan, another for ipvlan (they cannot enslave the same interface) + launch_vm --network "${VM_MULTUS_NETWORK},${VM_MULTUS_NETWORK}" +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + # Generic Device Plugin suite is excluded because getting serialsim for ostree would require: + # - getting the version of the kernel of ostree image, + # - installing kernel-devel of that version on the hypervisor, + # - building serialsim + # - packaging serialsim as an RPM + # - including the RPM in the ostree blueprint + # GDP suite is tested with bootc images instead. + run_tests host1 \ + --variable "PROMETHEUS_HOST:$(hostname)" \ + --variable "PROMETHEUS_PORT:9092" \ + --variable "LOKI_HOST:$(hostname)" \ + --variable "LOKI_PORT:3200" \ + --variable "PROM_EXPORTER_PORT:8889" \ + --exclude generic-device-plugin \ + suites/optional/ +} diff --git a/test/scenarios/releases/el96-lrel@osconfig.sh b/test/scenarios/releases/el96-lrel@osconfig.sh new file mode 100644 index 0000000000..d084405a59 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@osconfig.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + suites/osconfig/clusterid.robot \ + suites/osconfig/systemd-resolved.robot +} diff --git a/test/scenarios/releases/el96-lrel@router.sh b/test/scenarios/releases/el96-lrel@router.sh new file mode 100644 index 0000000000..a1e0b2792a --- /dev/null +++ b/test/scenarios/releases/el96-lrel@router.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +export TEST_EXCLUDES="ushift-6085" + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + suites/router +} diff --git a/test/scenarios/releases/el96-lrel@standard1.sh b/test/scenarios/releases/el96-lrel@standard1.sh new file mode 100644 index 0000000000..cfd6cf0b3d --- /dev/null +++ b/test/scenarios/releases/el96-lrel@standard1.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/standard1/ suites/selinux/validate-selinux-policy.robot +} diff --git a/test/scenarios/releases/el96-lrel@standard2.sh b/test/scenarios/releases/el96-lrel@standard2.sh new file mode 100644 index 0000000000..84e892dca8 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@standard2.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/standard2/ +} diff --git a/test/scenarios/releases/el96-lrel@storage.sh b/test/scenarios/releases/el96-lrel@storage.sh new file mode 100644 index 0000000000..dfd4895424 --- /dev/null +++ b/test/scenarios/releases/el96-lrel@storage.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 suites/storage/ +} diff --git a/test/scenarios/releases/el96-yminus1@el96-lrel@standard1.sh b/test/scenarios/releases/el96-yminus1@el96-lrel@standard1.sh new file mode 100644 index 0000000000..fecd3c129c --- /dev/null +++ b/test/scenarios/releases/el96-yminus1@el96-lrel@standard1.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${PREVIOUS_MINOR_VERSION}-zstream" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "TARGET_REF:rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" \ + --variable "EXPECTED_OS_VERSION:9.6" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard1/ suites/selinux/validate-selinux-policy.robot +} diff --git a/test/scenarios/releases/el96-yminus1@el96-lrel@standard2.sh b/test/scenarios/releases/el96-yminus1@el96-lrel@standard2.sh new file mode 100644 index 0000000000..0de1778b65 --- /dev/null +++ b/test/scenarios/releases/el96-yminus1@el96-lrel@standard2.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Sourced from scenario.sh and uses functions defined there. + +# NOTE: Unlike most suites, these tests rely on being run IN ORDER to +# ensure MicroShift is upgraded before running standard suite tests +export TEST_RANDOMIZATION=none + +scenario_create_vms() { + prepare_kickstart host1 kickstart.ks.template "rhel-9.6-microshift-brew-optionals-4.${PREVIOUS_MINOR_VERSION}-zstream" + launch_vm +} + +scenario_remove_vms() { + remove_vm host1 +} + +scenario_run_tests() { + run_tests host1 \ + --variable "TARGET_REF:rhel-9.6-microshift-brew-optionals-4.${MINOR_VERSION}-${LATEST_RELEASE_TYPE}" \ + suites/upgrade/upgrade-successful.robot \ + suites/standard2/ +} diff --git a/test/suites/ai-model-serving/ai-model-serving-offline.robot b/test/suites/ai-model-serving/ai-model-serving-offline.robot index 61bce161c3..495056fa1c 100644 --- a/test/suites/ai-model-serving/ai-model-serving-offline.robot +++ b/test/suites/ai-model-serving/ai-model-serving-offline.robot @@ -10,9 +10,9 @@ Test Tags offline *** Variables *** -${MODEL_NAME}= openvino-resnet -${DOMAIN}= ${MODEL_NAME}-predictor-test-ai.apps.example.com -${IP}= 10.44.0.1 +${MODEL_NAME} openvino-resnet +${DOMAIN} ${MODEL_NAME}-predictor-test-ai.apps.example.com +${IP} 10.44.0.1 *** Test Cases *** @@ -79,7 +79,7 @@ Query Model Server ... --connect-to "${DOMAIN}::${IP}:" ${output}= Guest Process Should Succeed ${cmd} ${result}= Json Parse ${output} - ${data}= Set Variable ${result["outputs"][0]["data"]} + VAR ${data}= ${result["outputs"][0]["data"]} # Following expression can be referred to as 'argmax': index of the highest element. ${argmax}= Evaluate ${data}.index(max(${data})) diff --git a/test/suites/ai-model-serving/ai-model-serving-online.robot b/test/suites/ai-model-serving/ai-model-serving-online.robot index 1ff515a4c5..78c130c3a1 100644 --- a/test/suites/ai-model-serving/ai-model-serving-online.robot +++ b/test/suites/ai-model-serving/ai-model-serving-online.robot @@ -10,19 +10,19 @@ Suite Teardown Teardown Suite *** Variables *** -${USHIFT_HOST}= ${EMPTY} -${OVMS_KSERVE_MANIFEST}= /tmp/ovms-kserve.yaml -${OVMS_REQUEST}= /tmp/ovms-request.json +${USHIFT_HOST} ${EMPTY} +${OVMS_KSERVE_MANIFEST} /tmp/ovms-kserve.yaml +${OVMS_REQUEST} /tmp/ovms-request.json *** Test Cases *** Test OpenVINO model [Documentation] Sanity test for AI OpenVino Model Serving - Set Test Variable ${MODEL_NAME} openvino-resnet - Set Test Variable ${DOMAIN} ${MODEL_NAME}-predictor-test-ai.apps.example.com + VAR ${MODEL_NAME}= openvino-resnet scope=TEST + VAR ${DOMAIN}= ${MODEL_NAME}-predictor-test-ai.apps.example.com scope=TEST ${ns}= Create Unique Namespace - Set Test Variable ${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=TEST Deploy OpenVINO Serving Runtime Deploy OpenVINO Resnet Model @@ -105,7 +105,7 @@ Query Model Infer Endpoint ... --connect-to "${DOMAIN}::${USHIFT_HOST}:" ${output}= Local Command Should Work ${cmd} ${result}= Json Parse ${output} - ${data}= Set Variable ${result["outputs"][0]["data"]} + VAR ${data}= ${result["outputs"][0]["data"]} # Following expression can be referred to as 'argmax': index of the highest element. ${argmax}= Evaluate ${data}.index(max(${data})) diff --git a/test/suites/backup/auto-recovery-extra.robot b/test/suites/backup/auto-recovery-extra.robot index 827d0f31eb..c77c2622cc 100644 --- a/test/suites/backup/auto-recovery-extra.robot +++ b/test/suites/backup/auto-recovery-extra.robot @@ -84,7 +84,7 @@ Create Backup With Marker ${backup_path}= Command Should Work microshift backup --auto-recovery ${WORKDIR} Command Should Work touch ${backup_path}/marker -Set Up MicroShift Auto Recovery Unit # robocop: disable=too-many-calls-in-keyword +Set Up MicroShift Auto Recovery Unit # robocop: off=too-many-calls-in-keyword [Documentation] Sets up drop-in and service to run on MicroShift's failure. Command Should Work mkdir -p /etc/systemd/system/microshift.service.d diff --git a/test/suites/backup/auto-recovery.robot b/test/suites/backup/auto-recovery.robot index 88d629ba66..5a7631b311 100644 --- a/test/suites/backup/auto-recovery.robot +++ b/test/suites/backup/auto-recovery.robot @@ -39,7 +39,7 @@ Restore Fails When There Are No Suitable Backups FOR ${counter} IN RANGE 1 4 ${backup_path}= Command Should Work microshift backup --auto-recovery ${WORKDIR} Command Should Work bash -c "echo ${counter} > ${backup_path}/marker" - ${new_path}= Set Variable ${{ "${backup_path}[:-1]" + str(int("${backup_path}[-1]")+1) }} + VAR ${new_path}= ${{ "${backup_path}[:-1]" + str(int("${backup_path}[-1]")+1) }} Command Should Work sudo mv ${backup_path} ${new_path} Sleep 2s END @@ -56,7 +56,7 @@ Restore Selects Right Backup # Rename the last backup to different deployment ID. When restoring it should be skipped. # Incrementing the last character is enough and works for both ostree/bootc and rpm systems. - ${new_path}= Set Variable ${{ "${last_backup}[:-1]" + str(int("${last_backup}[-1]")+1) }} + VAR ${new_path}= ${{ "${last_backup}[:-1]" + str(int("${last_backup}[-1]")+1) }} Command Should Work sudo mv ${last_backup} ${new_path} Command Should Work microshift restore --dont-save-failed --auto-recovery ${WORKDIR} @@ -74,7 +74,7 @@ Previously Restored Backup Is Moved To Special Subdirectory ... moving previously restored backup to a "restored" subdir. ${last_backup}= Create Backups 3 - ${expected_path}= Set Variable + VAR ${expected_path}= ... ${{ "/".join( "${last_backup}".split("/")[:-1] + ["restored"] + [ "${last_backup}".split("/")[-1] ] ) }} Log ${expected_path} Command Should Work microshift restore --dont-save-failed --auto-recovery ${WORKDIR} diff --git a/test/suites/backup/backup-restore-on-reboot.robot b/test/suites/backup/backup-restore-on-reboot.robot index 0b83e12076..d1e8a269aa 100644 --- a/test/suites/backup/backup-restore-on-reboot.robot +++ b/test/suites/backup/backup-restore-on-reboot.robot @@ -62,7 +62,7 @@ Make Masquerading Backup Systemctl stop microshift.service ${deploy_id}= Get Booted Deployment ID - ${backup_name}= Set Variable ${deploy_id}_manual + VAR ${backup_name}= ${deploy_id}_manual Create Backup ${backup_name} ${TRUE} diff --git a/test/suites/fault-tests/outages.robot b/test/suites/fault-tests/outages.robot index 654bb34d25..0961d76868 100644 --- a/test/suites/fault-tests/outages.robot +++ b/test/suites/fault-tests/outages.robot @@ -1,9 +1,9 @@ *** Settings *** Documentation Fault Test For MicroShift +Library Collections Library OperatingSystem Library Process -Library Collections Library yaml Library ../../resources/journalctl.py Resource ../../resources/fault-tests.resource @@ -81,7 +81,7 @@ Delete Pod ${pod_name} In ${namespace} ${actual_str} ${rc}= Get Log Output With Pattern ${cursor} kubelet.go.*${namespace} @{expected_lines}= Get Expected Fault Messages delete pod - ${expected_lines_replaced}= Create List + VAR @{expected_lines_replaced}= @{EMPTY} FOR ${line} IN @{expected_lines} ${line_with_vars}= Replace Variables ${line} Append To List ${expected_lines_replaced} ${line_with_vars} diff --git a/test/suites/fips/validate-fips.robot b/test/suites/fips/validate-fips.robot index 148a223626..b90e24e65d 100644 --- a/test/suites/fips/validate-fips.robot +++ b/test/suites/fips/validate-fips.robot @@ -68,10 +68,10 @@ Teardown Check Payload Tool Must Pass [Documentation] Run check-paylod Tool - ${podman_args}= Set Variable --authfile /etc/crio/openshift-pull-secret --privileged -i -v /:/myroot - ${scan_command}= Set Variable scan node --root /myroot + VAR ${podman_args}= --authfile /etc/crio/openshift-pull-secret --privileged -i -v /:/myroot + VAR ${scan_command}= scan node --root /myroot ${path}= Create Random Temp File - Set Global Variable ${CHECK_PAYLOAD_OUTPUT_FILE} ${path} + VAR ${CHECK_PAYLOAD_OUTPUT_FILE}= ${path} scope=GLOBAL ${rc}= Execute Command rpm -qi microshift >${CHECK_PAYLOAD_OUTPUT_FILE} 2>&1 ... sudo=True return_rc=True return_stdout=False return_stderr=False Should Be Equal As Integers 0 ${rc} @@ -82,14 +82,14 @@ Check Payload Tool Must Pass Check Container Images In Release Must Pass [Documentation] Run check-paylod Tool for Release containers - ${podman_pull_secret}= Set Variable /root/.config/containers/auth.json - ${podman_mounts}= Set Variable -v ${PULL_SECRET_PATH}:${podman_pull_secret} - ${podman_args}= Set Variable --rm --authfile ${PULL_SECRET_PATH} --privileged ${podman_mounts} + VAR ${podman_pull_secret}= /root/.config/containers/auth.json + VAR ${podman_mounts}= -v ${PULL_SECRET_PATH}:${podman_pull_secret} + VAR ${podman_args}= --rm --authfile ${PULL_SECRET_PATH} --privileged ${podman_mounts} ${path}= Create Random Temp File - Set Global Variable ${CHECK_PAYLOAD_REL_OUTPUT_FILE} ${path} + VAR ${CHECK_PAYLOAD_REL_OUTPUT_FILE}= ${path} scope=GLOBAL @{images}= Get Images From Release File FOR ${image} IN @{images} - ${scan_command}= Set Variable scan operator --spec ${image} + VAR ${scan_command}= scan operator --spec ${image} ${rc}= Execute Command ... podman run ${podman_args} ${CHECK_PAYLOAD_IMAGE} ${scan_command} >>${CHECK_PAYLOAD_REL_OUTPUT_FILE} 2>&1 ... sudo=True return_rc=True return_stdout=False return_stderr=False diff --git a/test/suites/gitops/gitops.robot b/test/suites/gitops/gitops.robot new file mode 100644 index 0000000000..edd453e57f --- /dev/null +++ b/test/suites/gitops/gitops.robot @@ -0,0 +1,23 @@ +*** Settings *** +Documentation MicroShift GitOps tests + +Resource ../../resources/microshift-process.resource + +Suite Setup Setup Suite With Namespace +Suite Teardown Teardown Suite With Namespace + + +*** Test Cases *** +Verify GitOps Pods Start Correctly + [Documentation] Waits for pods to enter a running state + + Wait Until Keyword Succeeds 2min 10s + ... All Pods Should Be Running openshift-gitops + +Verify Workload Deployed Correctly + [Documentation] Deploys workload and waits for ready status + + VAR ${manifest_path}= ${CURDIR}/test-deployment.yaml + Oc Apply -f ${manifest_path} -n ${NAMESPACE} + Wait Until Keyword Succeeds 5min 10s + ... Named Deployment Should Be Available test-app diff --git a/test/suites/gitops/test-deployment.yaml b/test/suites/gitops/test-deployment.yaml new file mode 100644 index 0000000000..17c72fc397 --- /dev/null +++ b/test/suites/gitops/test-deployment.yaml @@ -0,0 +1,19 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-app +spec: + replicas: 1 + selector: + matchLabels: + app: test-app + template: + metadata: + labels: + app: test-app + spec: + containers: + - name: test-app + image: registry.redhat.io/ubi8/httpd-24:latest + ports: + - containerPort: 8080 diff --git a/test/suites/greenboot/greenboot.robot b/test/suites/greenboot/greenboot.robot index c8ffdf9f22..d0fff9c0ed 100644 --- a/test/suites/greenboot/greenboot.robot +++ b/test/suites/greenboot/greenboot.robot @@ -97,7 +97,7 @@ Disrupt Service ${stdout} ${rc}= Execute Command ... which hostname ... sudo=False return_rc=True - IF ${rc} == 0 Set Suite Variable \${HOSTNAME_BIN_PATH} ${stdout} + IF ${rc} == 0 VAR ${HOSTNAME_BIN_PATH}= ${stdout} scope=SUITE # This covers both ostree and bootc systems ${is_ostree}= Is System OSTree diff --git a/test/suites/ipv6/dualstack.robot b/test/suites/ipv6/dualstack.robot index cc237b3fe7..18ff896310 100644 --- a/test/suites/ipv6/dualstack.robot +++ b/test/suites/ipv6/dualstack.robot @@ -38,12 +38,12 @@ Verify New Pod Works With IPv6 Wait Until Keyword Succeeds 10x 6s ... Access Hello Microshift Success ushift_ip=${USHIFT_HOST_IP1} - ... ushift_port=${HTTP_PORT} - ... hostname=${HOSTNAME} + ... ushift_port=${HTTP_PORT} + ... hostname=${HOSTNAME} Wait Until Keyword Succeeds 10x 6s ... Access Hello Microshift Success ushift_ip=${USHIFT_HOST_IP2} - ... ushift_port=${HTTP_PORT} - ... hostname=${HOSTNAME} + ... ushift_port=${HTTP_PORT} + ... hostname=${HOSTNAME} [Teardown] Run Keywords ... Delete Hello MicroShift Route @@ -69,12 +69,12 @@ Verify New Pod Works With IPv4 Wait Until Keyword Succeeds 10x 6s ... Access Hello Microshift Success ushift_ip=${USHIFT_HOST_IP1} - ... ushift_port=${HTTP_PORT} - ... hostname=${HOSTNAME} + ... ushift_port=${HTTP_PORT} + ... hostname=${HOSTNAME} Wait Until Keyword Succeeds 10x 6s ... Access Hello Microshift Success ushift_ip=${USHIFT_HOST_IP2} - ... ushift_port=${HTTP_PORT} - ... hostname=${HOSTNAME} + ... ushift_port=${HTTP_PORT} + ... hostname=${HOSTNAME} [Teardown] Run Keywords ... Delete Hello MicroShift Route diff --git a/test/suites/logscan/log-scan.robot b/test/suites/logscan/log-scan.robot index 55eefd7919..35faf92c77 100644 --- a/test/suites/logscan/log-scan.robot +++ b/test/suites/logscan/log-scan.robot @@ -23,7 +23,7 @@ Check Logs After Clean Start Cleanup MicroShift --all --keep-images ${cursor}= Get Journal Cursor - Set Suite Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=SUITE Enable MicroShift Start MicroShift @@ -44,7 +44,7 @@ Check Logs After Restart ... stop and check for errors. ${cursor}= Get Journal Cursor - Set Suite Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=SUITE Start MicroShift Restart Greenboot And Wait For Success diff --git a/test/suites/network/multi-nic.robot b/test/suites/network/multi-nic.robot index 5c197beb51..529b4149c6 100644 --- a/test/suites/network/multi-nic.robot +++ b/test/suites/network/multi-nic.robot @@ -122,13 +122,13 @@ Initialize Nmcli Variables ... ${NMCLI_CMD} | sed -n 1p | xargs ... return_stdout=True return_stderr=True return_rc=True Should Be Equal As Integers ${rc} 0 - Set Suite Variable \${NIC1_NAME} ${stdout} + VAR ${NIC1_NAME}= ${stdout} scope=SUITE ${stdout} ${stderr} ${rc}= Execute Command ... ${NMCLI_CMD} | sed -n 2p | xargs ... return_stdout=True return_stderr=True return_rc=True Should Be Equal As Integers ${rc} 0 - Set Suite Variable \${NIC2_NAME} ${stdout} + VAR ${NIC2_NAME}= ${stdout} scope=SUITE Disable Interface [Documentation] Disable NIC given in ${conn_name}. Change is not persistent. On @@ -145,7 +145,7 @@ Login Switch To IP IF '${USHIFT_HOST}'!='${new_ip}' Logout MicroShift Host - Set Global Variable \${USHIFT_HOST} ${new_ip} + VAR ${USHIFT_HOST}= ${new_ip} scope=GLOBAL Login MicroShift Host END diff --git a/test/suites/network/offline.robot b/test/suites/network/offline.robot index c47f14e46a..ec94d4f87e 100644 --- a/test/suites/network/offline.robot +++ b/test/suites/network/offline.robot @@ -44,7 +44,7 @@ Setup Test offline.Run With Kubeconfig oc create -n\=${ns} -f\=/tmp/hello-microshift.yaml # Set this last, it's not available within this scope anyway offline.Run With Kubeconfig oc wait -n\=${ns} --for\=condition=Ready pod/hello-microshift - Set Test Variable ${TEST_NS} ${ns} + VAR ${TEST_NS}= ${ns} scope=TEST Teardown Test [Documentation] Test teardown @@ -58,9 +58,9 @@ Pod Should Be Reachable Via Ingress ... subshell. ${result} ${ignore}= Wait Until Keyword Succeeds 5x 1s ... Run Guest Process ${GUEST_NAME} - ... bash - ... -c - ... curl --fail -I --max-time 15 -H \"Host: hello-microshift.cluster.local\" ${NODE_IP}:80/principal + ... bash + ... -c + ... curl --fail -I --max-time 15 -H \"Host: hello-microshift.cluster.local\" ${NODE_IP}:80/principal Log Many ${result["stdout"]} ${result["stderr"]} Should Be Equal As Integers ${result["rc"]} 0 @@ -69,7 +69,7 @@ Create Test Namespace ${rand}= Generate Random String ${rand}= Convert To Lower Case ${rand} - ${ns}= Set Variable test-${rand} + VAR ${ns}= test-${rand} Wait Until Keyword Succeeds 5m 10s ... offline.Run With Kubeconfig oc create namespace ${ns} RETURN ${ns} diff --git a/test/suites/optional/cert-manager.robot b/test/suites/optional/cert-manager.robot index 673d3a7572..7092f31cda 100644 --- a/test/suites/optional/cert-manager.robot +++ b/test/suites/optional/cert-manager.robot @@ -133,5 +133,5 @@ Apply YAML Manifest Setup Namespace [Documentation] Setup namespace for cert-manager tests - Set Suite Variable \${NAMESPACE} ${CERTS_NAMESPACE} + VAR ${NAMESPACE}= ${CERTS_NAMESPACE} scope=SUITE Create Namespace ${CERTS_NAMESPACE} diff --git a/test/suites/optional/gateway-api.robot b/test/suites/optional/gateway-api.robot index e2fe65e835..58d2d06540 100644 --- a/test/suites/optional/gateway-api.robot +++ b/test/suites/optional/gateway-api.robot @@ -21,13 +21,7 @@ Test Simple HTTP Route [Setup] Run Keywords ... Setup Namespace ... Deploy Hello MicroShift - TRY - Create Gateway ${GATEWAY_HOSTNAME} ${GATEWAY_PORT} ${NS_GATEWAY} - EXCEPT - Oc Logs deploy/istiod-openshift-gateway-api openshift-gateway-api - Oc Logs deploy/servicemesh-operator3 openshift-gateway-api - Fail Failed to create gateway - END + Create Gateway ${GATEWAY_HOSTNAME} ${GATEWAY_PORT} ${NS_GATEWAY} Create HTTP Route ${GATEWAY_HOSTNAME} ${NS_GATEWAY} Access Hello MicroShift Success ushift_port=${GATEWAY_PORT} hostname=${GATEWAY_HOSTNAME} [Teardown] Run Keywords @@ -43,7 +37,7 @@ Deploy Hello MicroShift Setup Namespace [Documentation] Configure a namespace where to create all resources for later cleanup. - Set Suite Variable \${NS_GATEWAY} ${NAMESPACE}-gw-1 + VAR ${NS_GATEWAY} ${NAMESPACE}-gw-1 scope=SUITE Create Namespace ${NS_GATEWAY} Delete Namespace @@ -53,9 +47,9 @@ Delete Namespace Create Gateway [Documentation] Create a gateway using given hostname and port. Waits for readiness [Arguments] ${hostname} ${port} ${namespace} - ${tmp}= Set Variable /tmp/gateway.yaml - Set Test Variable ${HOSTNAME} ${hostname} - Set Test Variable ${PORT} ${port} + VAR ${tmp} /tmp/gateway.yaml + VAR ${HOSTNAME} ${hostname} scope=TEST + VAR ${PORT} ${port} scope=TEST Run Keyword And Ignore Error ... Remove File ${tmp} Generate File From Template ${GATEWAY_MANIFEST_TMPL} ${tmp} @@ -69,9 +63,9 @@ Create Gateway Create HTTP Route [Documentation] Create an HTTP route using the given hostname and namespace. Waits for acceptance in a gateway. [Arguments] ${hostname} ${namespace} - ${tmp}= Set Variable /tmp/route.yaml - Set Test Variable ${HOSTNAME} ${hostname} - Set Test Variable ${NS} ${namespace} + VAR ${tmp} /tmp/route.yaml + VAR ${HOSTNAME} ${hostname} scope=TEST + VAR ${NS} ${namespace} scope=TEST Run Keyword And Ignore Error ... Remove File ${tmp} Generate File From Template ${HTTP_ROUTE_MANIFEST_TMPL} ${tmp} @@ -85,7 +79,7 @@ Create HTTP Route Generate File From Template [Documentation] Generate file from template - [Arguments] ${template_file} ${output_file} - ${template}= OperatingSystem.Get File ${template_file} - ${message}= Replace Variables ${template} - OperatingSystem.Append To File ${output_file} ${message} + [Arguments] ${template_file} ${out_file} + ${template} OperatingSystem.Get File ${template_file} + ${message} Replace Variables ${template} + OperatingSystem.Append To File ${out_file} ${message} diff --git a/test/suites/optional/generic-device-plugin.robot b/test/suites/optional/generic-device-plugin.robot index ca1f0a1fde..5ee7e4f111 100644 --- a/test/suites/optional/generic-device-plugin.robot +++ b/test/suites/optional/generic-device-plugin.robot @@ -8,47 +8,100 @@ Variables strings.py Library strings.py Suite Setup Setup Suite With Namespace -Suite Teardown Teardown Suite With Namespace +Suite Teardown Teardown Suite With GDP Cleanup Test Tags generic-device-plugin *** Variables *** -${NAMESPACE} ${EMPTY} +${NAMESPACE} ${EMPTY} +${DEVICE_COUNT} 5 *** Test Cases *** Sanity Test [Documentation] Performs a simple test of Generic Device Plugin - [Setup] Run Keywords - ... Enable And Configure GDP - ... Enable Serialsim - ... Copy Script To Host + [Setup] GDP Test Setup ${GDP_CONFIG_DROPIN} - Wait Until Device Is Allocatable - - Command Should Work crictl pull registry.access.redhat.com/ubi9/ubi:9.6 - Start Script On Host Create Test Job + Wait For Job Completion And Check Logs + + [Teardown] GDP Test Teardown +Verify that mountPath correctly renames the device within the container + [Documentation] Performs a test of Generic Device Plugin with custom mountPath configuration + [Setup] GDP Test Setup ${GDP_CONFIG_DROPIN_WITH_MOUNT} + + Create Test Job With Modified Script Wait For Job Completion And Check Logs - [Teardown] Run Keywords - ... Stop Script On Host - ... Disable GDP + [Teardown] GDP Test Teardown + +Verify ttyUSB glob pattern device discovery and allocation + [Documentation] Tests GDP with glob pattern to discover multiple ttyUSB devices and verify device allocation across multiple pods + [Tags] glob-pattern + [Setup] TtyUSB Glob Test Setup + + # Create and verify pods with device allocation + Create Pod And Verify Allocation serial-test-pod 2 2 + Create Pod And Verify Allocation serial-test-pod1 1 3 + Create Pod And Verify Allocation serial-test-pod2 2 5 + + [Teardown] TtyUSB Glob Test Teardown + +Verify FUSE device allocation and accessibility + [Documentation] Verifies FUSE device configuration, allocation, and accessibility in pods + [Tags] fuse-device + [Setup] Enable And Configure GDP ${GDP_CONFIG_FUSE_COUNT} + Wait Until Device Is Allocatable 10 fuse + Oc Create -f ./assets/generic-device-plugin/fuse-test-pod.yaml -n ${NAMESPACE} + + Oc Wait -n ${NAMESPACE} pod/fuse-test-pod --for=condition=Ready --timeout=120s + + # Verify /dev/fuse is accessible in the pod + ${fuse_device}= Oc Exec fuse-test-pod ls -l /dev/fuse + Should Contain ${fuse_device} /dev/fuse + + # Verify node allocation shows 4 FUSE devices allocated + ${node}= Run With Kubeconfig oc get node -o=name + ${node_name}= Remove String ${node} node/ + ${describe_output}= Run With Kubeconfig oc describe node ${node_name} + ${allocated_line}= Get Lines Containing String ${describe_output} device.microshift.io/fuse + ${allocation_matches}= Get Regexp Matches + ... ${allocated_line} + ... device\\.microshift\\.io/fuse\\s+(\\d+)\\s+(\\d+) + ... 1 2 + Should Be Equal As Integers ${allocation_matches}[0][0] 4 + Should Be Equal As Integers ${allocation_matches}[0][1] 4 *** Keywords *** +GDP Test Setup + [Documentation] Common setup for GDP tests - configures GDP, enables serialsim, prepares environment + [Arguments] ${config_content} + Enable And Configure GDP ${config_content} + Enable Serialsim + Copy Script To Host + Wait Until Device Is Allocatable + Command Should Work crictl pull registry.access.redhat.com/ubi9/ubi:9.6 + Start Script On Host + +GDP Test Teardown + [Documentation] Common teardown for GDP tests - stops script, cleans up resources, disables GDP + Stop Script On Host + Cleanup Test Resources + Disable GDP + Enable And Configure GDP [Documentation] Enables GDP and adds fake device path in MicroShift configuration - Drop In MicroShift Config ${GDP_CONFIG_DROPIN} 10-gdp + [Arguments] ${config_content}=${GDP_CONFIG_DROPIN} ${dropin_name}=10-gdp + Drop In MicroShift Config ${config_content} ${dropin_name} Restart MicroShift Disable GDP - [Documentation] Removes GDP configuration drop-in - Remove Drop In MicroShift Config 10-gdp - Restart MicroShift - Restart Greenboot And Wait For Success + [Documentation] Removes GDP configuration drop-in (without restart) + [Arguments] ${dropin_name}=10-gdp + Remove Drop In MicroShift Config ${dropin_name} Enable Serialsim [Documentation] Enables the serialsim kernel module. @@ -75,6 +128,11 @@ Stop Script On Host Command Execution systemctl stop gdp-test-comm Command Execution systemctl reset-failed gdp-test-comm +Cleanup Test Resources + [Documentation] Cleans up test resources including configmap and job + Oc Delete job/gdp-test -n ${NAMESPACE} + Oc Delete configmap/gdp-script -n ${NAMESPACE} + Create Test Job [Documentation] Creates Job that spawns test Pod running to completion. ${script}= OperatingSystem.Get File ./assets/generic-device-plugin/fake-serial-communication.py @@ -84,23 +142,36 @@ Create Test Job Oc Create -f ${path} -n ${NAMESPACE} Oc Create -f ./assets/generic-device-plugin/job.yaml -n ${NAMESPACE} +Create Test Job With Modified Script + [Documentation] Creates Job that spawns test Pod running to completion with modified script. + ${script}= OperatingSystem.Get File ./assets/generic-device-plugin/fake-serial-communication.py + ${modified_script}= Replace String + ... ${script} + ... DEVICE_POD = "/dev/ttyPipeB0" + ... DEVICE_POD = "/dev/myrenamedserial" + ${configmap}= Append To Preamble ${modified_script} + Log ${configmap} + ${path}= Create Random Temp File ${configmap} + Oc Create -f ${path} -n ${NAMESPACE} + Oc Create -f ./assets/generic-device-plugin/job.yaml -n ${NAMESPACE} + Wait Until Device Is Allocatable [Documentation] Waits until device device.microshift.io/fakeserial is allocatable + [Arguments] ${expected_count}=1 ${device_type}=fakeserial ${node}= Run With Kubeconfig oc get node -o=name ${node_name}= Remove String ${node} node/ - Wait Until Keyword Succeeds 60s 5s - ... Device Should Be Allocatable ${node_name} + ... Device Should Be Allocatable ${node_name} ${expected_count} ${device_type} Device Should Be Allocatable - [Documentation] Checks if device device.microshift.io/fakeserial is allocatable - [Arguments] ${node_name} + [Documentation] Checks if specified device is allocatable + [Arguments] ${node_name} ${expected_count}=1 ${device_type}=fakeserial ${device_amount}= Oc Get JsonPath ... node ... ${EMPTY} ... ${node_name} - ... .status.allocatable.device\\.microshift\\.io/fakeserial - Should Be Equal As Integers ${device_amount} 1 + ... .status.allocatable.device\\.microshift\\.io/${device_type} + Should Be Equal As Integers ${device_amount} ${expected_count} Wait For Job Completion And Check Logs [Documentation] Waits for Job completion and checks Pod logs looking for 'Test successful' message @@ -113,3 +184,70 @@ Wait For Job Completion And Check Logs ... .items[*].metadata.name ${logs}= Oc Logs ${pod} ${NAMESPACE} Should Contain ${logs} Test successful + +Create TtyUSB Devices + [Documentation] Creates dummy ttyUSB devices + FOR ${i} IN RANGE ${DEVICE_COUNT} + Command Execution sudo mknod --mode=666 /dev/ttyUSB${i} c 166 ${i} + END + +Remove Dummy TtyUSB Devices + [Documentation] Removes dummy ttyUSB devices + FOR ${i} IN RANGE ${DEVICE_COUNT} + Command Execution sudo rm -f /dev/ttyUSB${i} + END + +TtyUSB Glob Test Setup + [Documentation] Setup for ttyUSB glob pattern test - creates devices, configures GDP, waits for allocation + Create TtyUSB Devices + Drop In MicroShift Config ${GDP_CONFIG_SERIAL_GLOB} 10-gdp + Restart MicroShift + Wait Until Device Is Allocatable ${DEVICE_COUNT} + +TtyUSB Glob Test Teardown + [Documentation] Cleanup for ttyUSB glob pattern test + Oc Delete pod --all -n ${NAMESPACE} + Remove Dummy TtyUSB Devices + Disable GDP + +Verify Node Device Allocation + [Documentation] Verifies device allocation on the node matches expected total + [Arguments] ${expected_total_allocated} + ${node}= Run With Kubeconfig oc get node -o=name + ${node_name}= Remove String ${node} node/ + ${describe_output}= Run With Kubeconfig oc describe node ${node_name} + ${allocated_line}= Get Lines Containing String ${describe_output} device.microshift.io/fakeserial + ${allocated_count}= Get Regexp Matches + ... ${allocated_line} + ... device\\.microshift\\.io/fakeserial\\s+(\\d+) + ... 1 + VAR ${allocated_count}= ${allocated_count[0]} + Should Be Equal As Integers ${allocated_count} ${expected_total_allocated} + +Create Pod And Verify Allocation + [Documentation] Creates a pod using dynamic spec generation and verifies device allocation + [Arguments] ${pod_name} ${requested_devices} ${expected_total_allocated} + + # Generate pod spec dynamically + ${pod_spec}= Get Ttyusb Pod Definition ${pod_name} ${requested_devices} + + # Create and wait for pod + ${path}= Create Random Temp File ${pod_spec} + Oc Create -f ${path} -n ${NAMESPACE} + Oc Wait -n ${NAMESPACE} pod/${pod_name} --for=condition=Ready --timeout=120s + + # Verify correct number of devices allocated to pod + ${devices_in_pod}= Oc Exec ${pod_name} ls /dev/ | grep ttyUSB | wc -l ${NAMESPACE} + Should Be Equal As Integers ${devices_in_pod} ${requested_devices} + + # Verify node shows correct allocation + Verify Node Device Allocation ${expected_total_allocated} + +Teardown Suite With GDP Cleanup + [Documentation] Suite teardown that cleans up GDP configuration and restarts MicroShift + # Clean up any remaining GDP configuration + Remove Drop In MicroShift Config 10-gdp + # Restart MicroShift to clean state for next suite + Restart MicroShift + Restart Greenboot And Wait For Success + Teardown Suite With Namespace diff --git a/test/suites/optional/multus.robot b/test/suites/optional/multus.robot index 4332060622..b311b14f7d 100644 --- a/test/suites/optional/multus.robot +++ b/test/suites/optional/multus.robot @@ -117,8 +117,8 @@ Setup @{enps}= String.Split To Lines ${out} ${len}= Get Length ${enps} Should Be True ${len}>=2 - Set Suite Variable ${MACVLAN_MASTER} ${enps[0]} - Set Suite Variable ${IPVLAN_MASTER} ${enps[1]} + VAR ${MACVLAN_MASTER}= ${enps[0]} scope=SUITE + VAR ${IPVLAN_MASTER}= ${enps[1]} scope=SUITE Verify MicroShift RPM Install Template And Create NAD And Pod @@ -144,7 +144,7 @@ Connect To Pod From The Hypervisor [Arguments] ${pod} ${ns} ${extra_cni_name} ${networks}= Get And Verify Pod Networks ${pod} ${ns} ${extra_cni_name} - ${extra_ip}= Set Variable ${networks}[1][ips][0] + VAR ${extra_ip}= ${networks}[1][ips][0] Should Contain ${extra_ip} 192.168.112 ${result}= Process.Run Process curl -v ${extra_ip}:8080 diff --git a/test/suites/optional/observability.robot b/test/suites/optional/observability.robot index bae1a16ca5..44a7346bc7 100644 --- a/test/suites/optional/observability.robot +++ b/test/suites/optional/observability.robot @@ -27,18 +27,18 @@ ${TEST_CONFIG_PATH} assets/observability/otel_config.yaml Host Metrics Are Exported [Documentation] The opentelemetry-collector should be able to export host metrics. - Set Test Variable ${METRIC} system_cpu_time_seconds_total{cpu="cpu0",state="idle"} + VAR ${METRIC} system_cpu_time_seconds_total{cpu="cpu0",state="idle"} scope=TEST Check Prometheus Query ${PROMETHEUS_HOST} ${PROMETHEUS_PORT} ${METRIC} Check Prometheus Exporter ${USHIFT_HOST} ${PROM_EXPORTER_PORT} ${METRIC} Kube Metrics Are Exported [Documentation] The opentelemetry-collector should be able to export kube metrics. - Set Test Variable ${METRIC} container_cpu_time_seconds_total + VAR ${METRIC} container_cpu_time_seconds_total scope=TEST Check Prometheus Query ${PROMETHEUS_HOST} ${PROMETHEUS_PORT} ${METRIC} Check Prometheus Exporter ${USHIFT_HOST} ${PROM_EXPORTER_PORT} ${METRIC} - Set Test Variable ${METRIC} k8s_pod_cpu_time_seconds_total + VAR ${METRIC} k8s_pod_cpu_time_seconds_total scope=TEST Check Prometheus Query ${PROMETHEUS_HOST} ${PROMETHEUS_PORT} ${METRIC} Check Prometheus Exporter ${USHIFT_HOST} ${PROM_EXPORTER_PORT} ${METRIC} @@ -56,7 +56,7 @@ Logs Should Not Contain Receiver Errors [Documentation] Internal receiver errors are not treated as fatal. Typically these are due to a misconfiguration ... and thus indicate the provided default config should be reviewed. - ${pattern} Catenate SEPARATOR= + ${pattern} Catenate SEPARATOR=${EMPTY} ... \\s+\\{"error":.*\\} Pattern Should Not Appear In Log Output ${JOURNAL_CUR} ${pattern} unit="microshift-observability" @@ -71,7 +71,7 @@ Setup Suite And Prepare Test Host Check Required Observability Variables Set Test OTEL Configuration ${cur} Get Journal Cursor unit=microshift-observability - Set Suite Variable ${JOURNAL_CUR} ${cur} + VAR ${JOURNAL_CUR} ${cur} scope=SUITE Check Required Observability Variables [Documentation] Check if the required proxy variables are set diff --git a/test/suites/optional/olm.robot b/test/suites/optional/olm.robot index f93661f53a..6219038a2d 100644 --- a/test/suites/optional/olm.robot +++ b/test/suites/optional/olm.robot @@ -98,7 +98,7 @@ Subscription Should Be AtLatestKnown Get CSV Name From Subscription [Documentation] Obtains Subscription's CSV name. - [Arguments] ${namespace} ${name} + [Arguments] ${namespace} ${name} # robocop: off=unused-argument ${sub}= Oc Get subscriptions.operators.coreos.com ${OPERATORS_NAMESPACE} ${SUBSCRIPTION_NAME} RETURN ${sub.status.currentCSV} diff --git a/test/suites/optional/strings.py b/test/suites/optional/strings.py index 34e9f6e984..9dae8d8dbb 100644 --- a/test/suites/optional/strings.py +++ b/test/suites/optional/strings.py @@ -8,6 +8,39 @@ - path: /dev/ttyPipeB0 ''' +GDP_CONFIG_DROPIN_WITH_MOUNT = ''' +genericDevicePlugin: + status: Enabled + devices: + - name: fakeserial + groups: + - paths: + - path: /dev/ttyPipeB0 + mountPath: /dev/myrenamedserial +''' + +GDP_CONFIG_SERIAL_GLOB = ''' +genericDevicePlugin: + status: Enabled + devices: + - name: fakeserial + groups: + - paths: + - path: /dev/ttyUSB* +''' + +GDP_CONFIG_FUSE_COUNT = ''' +genericDevicePlugin: + status: Enabled + devices: + - name: fuse + groups: + - count: 10 + paths: + - path: /dev/fuse +''' + + CONFIGMAP_PREAMBLE = ''' apiVersion: v1 kind: ConfigMap @@ -25,6 +58,30 @@ ''' +def get_ttyusb_pod_definition(name: str, num_devices: int): + return f""" +apiVersion: v1 +kind: Pod +metadata: + name: {name} +spec: + containers: + - name: serialdevice-app-container + image: registry.access.redhat.com/ubi9/ubi:9.6 + command: ["sleep", "infinity"] + resources: + limits: + device.microshift.io/fakeserial: "{num_devices}" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" +""" + + def append_to_preamble(content: str) -> str: # Add 4 spaces before each line content = " " + content.replace("\n", "\n ") diff --git a/test/suites/osconfig/clusterid.robot b/test/suites/osconfig/clusterid.robot index 847b438116..6611f90f38 100644 --- a/test/suites/osconfig/clusterid.robot +++ b/test/suites/osconfig/clusterid.robot @@ -96,7 +96,7 @@ Create Sos Report [Documentation] Create a MicroShift Sos Report and return the tar file path ${rand_str}= Generate Random String 4 [NUMBERS] - ${sos_report_dir}= Catenate SEPARATOR= /tmp/rf-test/sos-report_ ${rand_str} + ${sos_report_dir}= Catenate SEPARATOR=${EMPTY} /tmp/rf-test/sos-report_ ${rand_str} Command Should Work mkdir -p ${sos_report_dir} Command Should Work sos report --batch --all-logs --tmp-dir ${sos_report_dir} -p microshift -o logs diff --git a/test/suites/router/router.robot b/test/suites/router/router.robot index eb4d639563..68e848c55f 100644 --- a/test/suites/router/router.robot +++ b/test/suites/router/router.robot @@ -239,6 +239,7 @@ Router Verify Security Configuration Router Verify Access Logging Configuration Container [Documentation] Test ingress access logging configuration. + [Tags] ushift-6085 [Setup] Run Keywords ... Remove Custom Config ... AND @@ -334,8 +335,8 @@ Setup With Custom Config Setup Namespaces [Documentation] Configure the required namespaces for namespace ownership tests. - Set Suite Variable \${NS_OWNERSHIP_1} ${NAMESPACE}-ownership-1 - Set Suite Variable \${NS_OWNERSHIP_2} ${NAMESPACE}-ownership-2 + VAR ${NS_OWNERSHIP_1}= ${NAMESPACE}-ownership-1 scope=SUITE + VAR ${NS_OWNERSHIP_2}= ${NAMESPACE}-ownership-2 scope=SUITE Create Namespace ${NS_OWNERSHIP_1} Create Namespace ${NS_OWNERSHIP_2} @@ -431,5 +432,5 @@ Delete Custom CA Secret Check Access Logs [Documentation] Retrieve and check if a pattern appears in the router's access logs. [Arguments] ${pattern} - ${logs}= Oc Logs deployment/router-default -c access-logs openshift-ingress + ${logs}= Oc Logs deployment/router-default -c logs openshift-ingress Should Contain ${logs} ${pattern} diff --git a/test/suites/rpm/install-and-upgrade-successful.robot b/test/suites/rpm/install-and-upgrade-successful.robot index b6f66345f6..9c586ae3be 100644 --- a/test/suites/rpm/install-and-upgrade-successful.robot +++ b/test/suites/rpm/install-and-upgrade-successful.robot @@ -138,10 +138,10 @@ Verify Service Active Timestamps Should Not Be Empty ${cts2} Should Not Be Empty ${mts2} # Verify that timestamps exist (services were active) - Should Not Be Equal As Strings ${cts1} ActiveEnterTimestamp= - Should Not Be Equal As Strings ${mts1} ActiveEnterTimestamp= - Should Not Be Equal As Strings ${cts2} ActiveEnterTimestamp= - Should Not Be Equal As Strings ${mts2} ActiveEnterTimestamp= + Should Not Be Equal As Strings ${cts1} "ActiveEnterTimestamp=" + Should Not Be Equal As Strings ${mts1} "ActiveEnterTimestamp=" + Should Not Be Equal As Strings ${cts2} "ActiveEnterTimestamp=" + Should Not Be Equal As Strings ${mts2} "ActiveEnterTimestamp=" # Verify that timestamps changed (services restarted) Should Not Be Equal As Strings ${cts1} ${cts2} Should Not Be Equal As Strings ${mts1} ${mts2} diff --git a/test/suites/standard1/containers-policy.robot b/test/suites/standard1/containers-policy.robot index 60bafd802c..26f609eabb 100644 --- a/test/suites/standard1/containers-policy.robot +++ b/test/suites/standard1/containers-policy.robot @@ -2,8 +2,8 @@ Documentation Container policy verification Resource ../../resources/microshift-process.resource -Library OperatingSystem Library Collections +Library OperatingSystem Suite Setup Setup Suite Teardown Teardown @@ -36,7 +36,7 @@ Teardown [Documentation] Test suite teardown Logout MicroShift Host -Verify Sigstore Signing Enabled # robocop: disable=too-many-calls-in-keyword +Verify Sigstore Signing Enabled # robocop: off=too-many-calls-in-keyword [Documentation] Verify the policy file contents when sigstore signing ... verification is enabled [Arguments] ${policy} diff --git a/test/suites/standard1/hostname.robot b/test/suites/standard1/hostname.robot index 9c227e67c9..bc6969c3ef 100644 --- a/test/suites/standard1/hostname.robot +++ b/test/suites/standard1/hostname.robot @@ -43,7 +43,7 @@ Teardown Configure New Hostname [Documentation] Configures ${NEW_HOSTNAME} in the MicroShift host. ${old}= Setup Hostname ${NEW_HOSTNAME} - Set Suite Variable \${OLD_HOSTNAME} ${old} + VAR ${OLD_HOSTNAME}= ${old} scope=SUITE Restore Old Hostname [Documentation] Configure old hostname again in the MicroShift host. diff --git a/test/suites/standard1/kustomize.robot b/test/suites/standard1/kustomize.robot index 2d2bc062fe..4949948c6f 100644 --- a/test/suites/standard1/kustomize.robot +++ b/test/suites/standard1/kustomize.robot @@ -77,7 +77,7 @@ No Extension *** Keywords *** -Setup Suite # robocop: disable=too-long-keyword +Setup Suite # robocop: off=too-long-keyword [Documentation] Set up all of the tests in this suite Check Required Env Variables Login MicroShift Host @@ -86,55 +86,55 @@ Setup Suite # robocop: disable=too-long-keyword # Used by "Load From /etc/microshift/manifests" ${ns}= Generate Manifests /etc/microshift/manifests - Set Suite Variable \${ETC_NAMESPACE} ${ns} + VAR ${ETC_NAMESPACE}= ${ns} scope=SUITE # Used by "Load From /etc/microshift/manifestsd" ${rand}= Generate Random String - Set Suite Variable \${ETC_SUBDIR} /etc/microshift/manifests.d/${rand} + VAR ${ETC_SUBDIR}= /etc/microshift/manifests.d/${rand} scope=SUITE ${ns}= Generate Manifests ${ETC_SUBDIR} - Set Suite Variable \${ETC_SUBDIR_NAMESPACE} ${ns} + VAR ${ETC_SUBDIR_NAMESPACE}= ${ns} scope=SUITE # Used by "Delete from /etc/microshift/manifestsd" ${rand}= Generate Random String - Set Suite Variable \${DELETE_ETC_SUBDIR} /etc/microshift/manifests.d/${rand} + VAR ${DELETE_ETC_SUBDIR}= /etc/microshift/manifests.d/${rand} scope=SUITE ${ns}= Generate Manifests ${DELETE_ETC_SUBDIR} - Set Suite Variable \${DELETE_ETC_SUBDIR_NAMESPACE} ${ns} + VAR ${DELETE_ETC_SUBDIR_NAMESPACE}= ${ns} scope=SUITE # Used by "Load From /usr/lib/microshift/manifests" ${ns}= Generate Manifests /usr/lib/microshift/manifests - Set Suite Variable \${USR_NAMESPACE} ${ns} + VAR ${USR_NAMESPACE}= ${ns} scope=SUITE # Used by "Load From /usr/lib/microshift/manifestsd" ${rand}= Generate Random String - Set Suite Variable \${USR_SUBDIR} /usr/lib/microshift/manifests.d/${rand} + VAR ${USR_SUBDIR}= /usr/lib/microshift/manifests.d/${rand} scope=SUITE ${ns}= Generate Manifests ${USR_SUBDIR} - Set Suite Variable \${USR_SUBDIR_NAMESPACE} ${ns} + VAR ${USR_SUBDIR_NAMESPACE}= ${ns} scope=SUITE # Used by "Delete from /usr/lib/microshift/manifestsd" ${rand}= Generate Random String - Set Suite Variable \${DELETE_USR_SUBDIR} /usr/lib/microshift/manifests.d/${rand} + VAR ${DELETE_USR_SUBDIR}= /usr/lib/microshift/manifests.d/${rand} scope=SUITE ${ns}= Generate Manifests ${DELETE_USR_SUBDIR} - Set Suite Variable \${DELETE_USR_SUBDIR_NAMESPACE} ${ns} + VAR ${DELETE_USR_SUBDIR_NAMESPACE}= ${ns} scope=SUITE # Used by "Load From Configured Dir" ${ns}= Generate Manifests ${NON_DEFAULT_DIR} - Set Suite Variable \${NON_DEFAULT_NAMESPACE} ${ns} + VAR ${NON_DEFAULT_NAMESPACE}= ${ns} scope=SUITE # Used by "Do Not Load From Unconfigured Dir" ${ns}= Generate Manifests ${UNCONFIGURED_DIR} - Set Suite Variable \${UNCONFIGURED_NAMESPACE} ${ns} + VAR ${UNCONFIGURED_NAMESPACE}= ${ns} scope=SUITE # Used by "Yaml Extension" ${ns}= Generate Manifests ${YAML_PATH} kustomization.yaml - Set Suite Variable \${YAML_NAMESPACE} ${ns} + VAR ${YAML_NAMESPACE}= ${ns} scope=SUITE # Used by "Yml Extension" ${ns}= Generate Manifests ${YML_PATH} kustomization.yml - Set Suite Variable \${YML_NAMESPACE} ${ns} + VAR ${YML_NAMESPACE}= ${ns} scope=SUITE # Used by "No Extension" ${ns}= Generate Manifests ${NOEXT_PATH} Kustomization - Set Suite Variable \${NOEXT_NAMESPACE} ${ns} + VAR ${NOEXT_NAMESPACE}= ${ns} scope=SUITE # Extend the configuration setting to add the unique path to the defaults ${config_content}= Catenate SEPARATOR=\n @@ -151,7 +151,7 @@ Setup Suite # robocop: disable=too-long-keyword Restart MicroShift -Teardown Suite # robocop: disable=too-many-calls-in-keyword +Teardown Suite # robocop: off=too-many-calls-in-keyword [Documentation] Clean up all of the tests in this suite Clear Manifest Directory /etc/microshift/manifests @@ -288,7 +288,7 @@ Get Manifest Deletion Paths [Documentation] For given manifest path, get path to 'delete' subdir and destination dir for manifest to be deleted. [Arguments] ${manifest_path} ${path_list}= Split String ${manifest_path} / - ${manifest_dirname}= Set Variable ${path_list}[-1] + VAR ${manifest_dirname}= ${path_list}[-1] ${delete_base_dir}= Evaluate "/".join(${path_list}[:-1]+["delete"]) ${delete_manifest_path}= Catenate SEPARATOR=/ ${delete_base_dir} ${manifest_dirname} RETURN ${delete_base_dir} ${delete_manifest_path} diff --git a/test/suites/standard2/tls-configuration.robot b/test/suites/standard1/tls-configuration.robot similarity index 93% rename from test/suites/standard2/tls-configuration.robot rename to test/suites/standard1/tls-configuration.robot index 703b52245c..ccaf55da8c 100644 --- a/test/suites/standard2/tls-configuration.robot +++ b/test/suites/standard1/tls-configuration.robot @@ -122,7 +122,7 @@ Save Journal Cursor ... Save the journal cursor then restart MicroShift so we capture the ... shutdown messages and startup messages. ${cursor}= Get Journal Cursor - Set Test Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=TEST Setup TLS Configuration [Documentation] Apply the TLS configuration in the argument @@ -170,28 +170,28 @@ Check TLS Endpoints IF ${cipher_available} == ${FALSE} RETURN IF "${tls_version}" == "TLSv1.2" - Set Test Variable ${TLS_AND_CIPHER_ARGS} -tls1_2 -cipher ${cipher} + VAR ${TLS_AND_CIPHER_ARGS}= -tls1_2 -cipher ${cipher} scope=TEST ELSE IF "${tls_version}" == "TLSv1.3" - Set Test Variable ${TLS_AND_CIPHER_ARGS} -tls1_3 -ciphersuites ${cipher} + VAR ${TLS_AND_CIPHER_ARGS}= -tls1_3 -ciphersuites ${cipher} scope=TEST END # api server, kubelet, kube controller manager and kube scheduler endpoint ports FOR ${port} IN 6443 10250 10257 10259 ${stdout}= Wait Until Keyword Succeeds 10x 10s ... Openssl Connect Command localhost:${port} - ... ${TLS_AND_CIPHER_ARGS} - ... ${return_code} - ... ${tls_version}, Cipher is ${cipher} + ... ${TLS_AND_CIPHER_ARGS} + ... ${return_code} + ... ${tls_version}, Cipher is ${cipher} END # etcd endpoint, need to use cert and key because etcd requires mTLS - Set Test Variable ${CERT_ARG} -cert ${APISERVER_ETCD_CLIENT_CERT}/client.crt - Set Test Variable ${KEY_ARG} -key ${APISERVER_ETCD_CLIENT_CERT}/client.key + VAR ${CERT_ARG}= -cert ${APISERVER_ETCD_CLIENT_CERT}/client.crt scope=TEST + VAR ${KEY_ARG}= -key ${APISERVER_ETCD_CLIENT_CERT}/client.key scope=TEST Wait Until Keyword Succeeds 10x 2s ... Openssl Connect Command localhost:2379 - ... ${TLS_AND_CIPHER_ARGS} ${CERT_ARG} ${KEY_ARG} - ... ${return_code} - ... CONNECTED + ... ${TLS_AND_CIPHER_ARGS} ${CERT_ARG} ${KEY_ARG} + ... ${return_code} + ... CONNECTED Check Journal Logs [Documentation] Verify system logs contain expected error messages for configuration errors diff --git a/test/suites/standard1/version.robot b/test/suites/standard1/version.robot index 7cd6f43683..c7c7d111ae 100644 --- a/test/suites/standard1/version.robot +++ b/test/suites/standard1/version.robot @@ -52,10 +52,10 @@ Metadata File Contents ${is_ostree}= Is System OSTree IF ${is_ostree} - ${expected}= Set Variable + VAR ${expected}= ... {"version":"${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}","deployment_id":"*","boot_id":"*"} ELSE - ${expected}= Set Variable + VAR ${expected}= ... {"version":"${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}","boot_id":"*"} END @@ -91,16 +91,16 @@ Read Expected Versions # robocop: disable=too-many-calls-in-keyword ... the content. # This returns a string like 4.14.0-0.nightly-arm64-2023-05-04-012046 ${version_full}= Get Version Of MicroShift RPM - Set Suite Variable \${FULL_VERSION} ${version_full} + VAR ${FULL_VERSION}= ${version_full} scope=SUITE # 4.14.0 ${version_short_matches}= Get Regexp Matches ${version_full} ^(\\d+.\\d+.\\d+) ${version_short_parts}= Split String ${version_short_matches}[0] . # 4 - Set Suite Variable \${MAJOR_VERSION} ${version_short_parts}[0] + VAR ${MAJOR_VERSION}= ${version_short_parts}[0] scope=SUITE # 14 - Set Suite Variable \${MINOR_VERSION} ${version_short_parts}[1] + VAR ${MINOR_VERSION}= ${version_short_parts}[1] scope=SUITE # 0 - Set Suite Variable \${PATCH_VERSION} ${version_short_parts}[2] + VAR ${PATCH_VERSION}= ${version_short_parts}[2] scope=SUITE # 4.14 ${ystream}= Format String {}.{} ${MAJOR_VERSION} ${MINOR_VERSION} - Set Suite Variable \${Y_STREAM} ${ystream} + VAR ${Y_STREAM}= ${ystream} scope=SUITE diff --git a/test/suites/standard2/configuration.robot b/test/suites/standard2/configuration.robot index 12807d8407..7a39216cf9 100644 --- a/test/suites/standard2/configuration.robot +++ b/test/suites/standard2/configuration.robot @@ -163,7 +163,7 @@ Save Journal Cursor ... Save the journal cursor then restart MicroShift so we capture the ... shutdown messages and startup messages. ${cursor}= Get Journal Cursor - Set Suite Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=SUITE Setup With Bad Log Level [Documentation] Set log level to an unknown value and restart diff --git a/test/suites/standard2/validate-certificate-rotation.robot b/test/suites/standard2/validate-certificate-rotation.robot index 64bcfc274f..0d5bf6b529 100644 --- a/test/suites/standard2/validate-certificate-rotation.robot +++ b/test/suites/standard2/validate-certificate-rotation.robot @@ -5,8 +5,8 @@ Resource ../../resources/common.resource Resource ../../resources/microshift-process.resource Resource ../../resources/microshift-host.resource Resource ../../resources/ostree-health.resource -Library DateTime Library Collections +Library DateTime Suite Setup Setup Suite Teardown Teardown @@ -88,7 +88,7 @@ All Certificates Should Be Valid For Current Time Command Should Work ... echo '${kubeconfig}' | grep client-certificate-data | cut -d: -f2 | tr -d ' ' | base64 -d > ${kubeconfig_cert_file} - @{cert_files}= Create List + VAR @{cert_files}= ... /var/lib/microshift/certs/kube-control-plane-signer/kube-scheduler/client.crt ... /var/lib/microshift/certs/kube-control-plane-signer/kube-controller-manager/client.crt ... /var/lib/microshift/certs/admin-kubeconfig-signer/admin-kubeconfig-client/client.crt diff --git a/test/suites/standard2/validate-custom-certificates.robot b/test/suites/standard2/validate-custom-certificates.robot index b56e77d8ec..7c7013c69a 100644 --- a/test/suites/standard2/validate-custom-certificates.robot +++ b/test/suites/standard2/validate-custom-certificates.robot @@ -25,7 +25,7 @@ Test Missing File [Documentation] Missing certificate files should be ignored with a warning [Setup] Setup Test ${cursor}= Get Journal Cursor - Set Suite Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=SUITE Configure Named Certificates ${TMPDIR}/server.crt ${TMPDIR}/server.key Restart MicroShift Pattern Should Appear In Log Output ${CURSOR} unparsable certificates are ignored @@ -51,7 +51,7 @@ Test Local Cert [Documentation] localhost certs should be ignored with a warning [Setup] Setup Test ${cursor}= Get Journal Cursor - Set Suite Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=SUITE Create Keys Create Cert TestCN localhost Upload Certificates @@ -118,7 +118,7 @@ Setup Setup Test [Documentation] Test suite setup ${tmp}= Create Random Temp Directory - Set Global Variable ${TMPDIR} ${tmp} + VAR ${TMPDIR}= ${tmp} scope=GLOBAL Teardown [Documentation] Test suite teardown @@ -133,29 +133,20 @@ Create Keys Openssl req -x509 -new -nodes -key ${TMPDIR}/ca.key -subj "/CN\=${MASTER_IP}" ... -days 10000 -out ${TMPDIR}/ca.crt -Create Cert No San - [Documentation] Create a certificate - [Arguments] ${cert_cn} - Set Global Variable ${CERT_CN} - Generate CSR Config ${CSR_NOSAN_CONFIG} ${TMPDIR}/csr.conf - Openssl req -new -key ${TMPDIR}/server.key -out ${TMPDIR}/server.csr -config ${TMPDIR}/csr.conf - Openssl x509 -req -in ${TMPDIR}/server.csr -CA ${TMPDIR}/ca.crt -CAkey ${TMPDIR}/ca.key -CAcreateserial - ... -out ${TMPDIR}/server.crt -days 10000 -extensions v3_ext -extfile ${TMPDIR}/csr.conf -sha256 - Create Cert [Documentation] Create a certificate [Arguments] ${cert_cn} ${cert_san_dns}=${EMPTY} ${cert_san_ip}=${EMPTY} ${expiry_days}=1000 - Set Global Variable ${CERT_CN} + VAR ${CERT_CN}= ${cert_cn} scope=GLOBAL IF "${cert_san_dns}"!="${EMPTY}" - Set Global Variable ${CERT_SAN_DNS} DNS.1 = ${cert_san_dns} + VAR ${CERT_SAN_DNS}= DNS.1 = ${cert_san_dns} scope=GLOBAL ELSE - Set Global Variable ${CERT_SAN_DNS} + VAR ${CERT_SAN_DNS}= ${EMPTY} scope=GLOBAL END IF "${cert_san_ip}"!="${EMPTY}" - Set Global Variable ${CERT_SAN_IP} IP.1 = ${cert_san_ip} + VAR ${CERT_SAN_IP}= IP.1 = ${cert_san_ip} scope=GLOBAL ELSE - Set Global Variable ${CERT_SAN_IP} + VAR ${CERT_SAN_IP}= ${EMPTY} scope=GLOBAL END Generate CSR Config ${CSR_CONFIG} ${TMPDIR}/csr.conf Openssl req -new -key ${TMPDIR}/server.key -out ${TMPDIR}/server.csr -config ${TMPDIR}/csr.conf @@ -194,7 +185,7 @@ Generate Random HostName Add Entry To Hosts [Documentation] Add new entry to local /etc/hosts [Arguments] ${ip} ${host} - ${ttt}= Set Variable ${ip}\t${host} # RF test marker\n + VAR ${ttt}= ${ip}\t${host} # RF test marker\n ${result}= Run Process sudo tee -a /etc/hosts shell=True stdin=${ttt} Should Be Equal As Integers ${result.rc} 0 diff --git a/test/suites/storage/pvc-resize.robot b/test/suites/storage/pvc-resize.robot index c107e2c2ec..f973396776 100644 --- a/test/suites/storage/pvc-resize.robot +++ b/test/suites/storage/pvc-resize.robot @@ -29,7 +29,7 @@ Increase Running Pod PV Size Test Case Setup [Documentation] Prepare the cluster env and test pod workload. ${ns}= Create Unique Namespace - Set Test Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=SUITE Oc Create -f ${SOURCE_POD} -n ${NAMESPACE} Named Pod Should Be Ready ${POD_NAME_STATIC} diff --git a/test/suites/storage/reboot.robot b/test/suites/storage/reboot.robot index 3d9589843b..e93ba165df 100644 --- a/test/suites/storage/reboot.robot +++ b/test/suites/storage/reboot.robot @@ -29,7 +29,7 @@ Rebooting Healthy System Should Keep Functional PVC Test Case Setup [Documentation] Prepare the cluster env and test pod workload. ${ns}= Create Unique Namespace - Set Test Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=SUITE Oc Create -f ${SOURCE_POD} -n ${NAMESPACE} Named Pod Should Be Ready ${POD_NAME_STATIC} diff --git a/test/suites/storage/snapshot.robot b/test/suites/storage/snapshot.robot index f2acc7d4ad..476b022739 100644 --- a/test/suites/storage/snapshot.robot +++ b/test/suites/storage/snapshot.robot @@ -63,7 +63,7 @@ Test Suite Teardown Test Case Setup [Documentation] Prepare the cluster-level APIs and a data-volume with some simple text ${ns}= Create Unique Namespace - Set Test Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=TEST Oc Apply -k ${SOURCE_KUSTOMIZE} -n ${NAMESPACE} Named Pod Should Be Ready ${POD_NAME_STATIC} Write To Volume ${POD_NAME_STATIC} ${TEST_DATA} diff --git a/test/suites/storage/storage-version-migration.robot b/test/suites/storage/storage-version-migration.robot index 01c6c38c36..1d1b4bc826 100644 --- a/test/suites/storage/storage-version-migration.robot +++ b/test/suites/storage/storage-version-migration.robot @@ -86,7 +86,7 @@ Validate Migration Delete Migration Resources [Documentation] Remove the CRD and Storage State and Version Migration resources - ${query}= Set Variable {.items[?(@.spec.resource.group=='test.resource')].metadata.name} + VAR ${query}= {.items[?(@.spec.resource.group=='test.resource')].metadata.name} ${migration_resource_name}= Run With Kubeconfig ... oc get storageversionmigration -o jsonpath="${query}" diff --git a/test/suites/telemetry/telemetry.robot b/test/suites/telemetry/telemetry.robot index 9266994f23..69f938c636 100644 --- a/test/suites/telemetry/telemetry.robot +++ b/test/suites/telemetry/telemetry.robot @@ -151,7 +151,7 @@ Setup Telemetry Configuration Drop In MicroShift Config ${config} 10-telemetry Stop MicroShift ${cursor}= Get Journal Cursor - Set Test Variable \${CURSOR} ${cursor} + VAR ${CURSOR}= ${cursor} scope=TEST Restart MicroShift Remove Telemetry Configuration @@ -201,10 +201,10 @@ Get List Prometheus Metrics To Check ${os_id}= Get Host OS Id ${os_version}= Get Host OS Version ${microshift_ver}= MicroShift Version - ${microshift_version}= Set Variable ${microshift_ver.major}.${microshift_ver.minor}.${microshift_ver.patch} + VAR ${microshift_version}= ${microshift_ver.major}.${microshift_ver.minor}.${microshift_ver.patch} - Set Local Variable - ... @{METRICS_TO_CHECK} + VAR + ... @{METRICS_TO_CHECK}= ... cluster:capacity_cpu_cores:sum{_id="${cluster_id}",label_beta_kubernetes_io_instance_type="rhde",label_node_openshift_io_os_id="${os_id}",label_kubernetes_io_arch="${arch}"} ... cluster:capacity_memory_bytes:sum{_id="${cluster_id}",label_beta_kubernetes_io_instance_type="rhde",label_node_openshift_io_os_id="${os_id}",label_kubernetes_io_arch="${arch}"} ... cluster:cpu_usage_cores:sum{_id="${cluster_id}"} @@ -217,5 +217,6 @@ Get List Prometheus Metrics To Check ... cluster:usage:resources:sum{_id="${cluster_id}",resource="customresourcedefinitions.apiextensions.k8s.io"} ... cluster:usage:containers:sum{_id="${cluster_id}"} ... microshift_version{_id="${cluster_id}",deployment_type="${deployment_type}",os_version_id="${os_version}",version="${microshift_version}"} + ... scope=LOCAL RETURN @{METRICS_TO_CHECK} diff --git a/test/suites/tuned/profile.robot b/test/suites/tuned/profile.robot index d117a96821..81d6244fe9 100644 --- a/test/suites/tuned/profile.robot +++ b/test/suites/tuned/profile.robot @@ -88,7 +88,7 @@ Teardown Setup Namespace [Documentation] Setup unique namespace with elevated privileges ${ns}= Create Unique Namespace - Set Suite Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=SUITE Run With Kubeconfig oc label ns ${ns} --overwrite pod-security.kubernetes.io/audit=privileged Run With Kubeconfig oc label ns ${ns} --overwrite pod-security.kubernetes.io/enforce=privileged Run With Kubeconfig oc label ns ${ns} --overwrite pod-security.kubernetes.io/warn=privileged diff --git a/test/suites/tuned/workload-partitioning.robot b/test/suites/tuned/workload-partitioning.robot index ef2ec851ab..f567f6ad78 100644 --- a/test/suites/tuned/workload-partitioning.robot +++ b/test/suites/tuned/workload-partitioning.robot @@ -143,7 +143,7 @@ Construct Pod Info ${pod_pid}= Evaluate "${pod_json}[info][pid]" ${pod_id}= Evaluate "${pod_json}[status][id]" ${namespace}= Evaluate "${pod_json}[info][runtimeSpec][annotations][io.kubernetes.pod.namespace]" - ${pod_info}= Catenate SEPARATOR= + ${pod_info}= Catenate SEPARATOR=${EMPTY} ... container: ${container_name} ... ${EMPTY} pod: ${pod_name} ... ${EMPTY} pid: ${pod_pid} @@ -153,9 +153,9 @@ Construct Pod Info Get Json From Crio Output [Documentation] get json from the crio command [Arguments] ${is_workloads} - Set Global Variable ${NOT_WORKLOADS} ${EMPTY} + VAR ${NOT_WORKLOADS}= ${EMPTY} scope=GLOBAL IF "${is_workloads}"!="${EMPTY}" - Set Global Variable ${NOT_WORKLOADS} | not + VAR ${NOT_WORKLOADS}= | not scope=GLOBAL END ${stdout} ${stderr} ${rc}= Execute Command @@ -171,7 +171,7 @@ Crio Save Pod Manifest [Documentation] Saves running pod manifest using crio inspect [Arguments] ${pod_id} ${path}= Create Random Temp File - Set Global Variable ${DEBUG_OUTPUT_FILE} ${path} + VAR ${DEBUG_OUTPUT_FILE}= ${path} scope=GLOBAL ${stdout} ${stderr} ${rc}= Execute Command ... crictl ps -q | xargs sudo crictl inspect | jq -rs '[.[][] | select(.status.id=="${pod_id}")]' >${DEBUG_OUTPUT_FILE} 2>&1 ... sudo=True @@ -204,7 +204,11 @@ All Pods Should Be Annotated As Management [Documentation] Obtains list of Deployments created by CSV. ${pods_raw}= Oc Get All Pods @{pods}= Split String ${pods_raw} - Set Test Variable @{NS_TO_SKIP_LIST} openshift-gateway-api redhat-ods-applications + VAR @{NS_TO_SKIP_LIST}= + ... openshift-gateway-api + ... redhat-ods-applications + ... cert-manager-operator + ... scope=TEST FOR ${pod} IN @{pods} ${ns} ${pod}= Split String ${pod} \@ IF "${ns}" not in "@{NS_TO_SKIP_LIST}" @@ -259,7 +263,7 @@ Cleanup And Create NS Remove Files ${KUBELET_CPU_STATE_FILE} Restart MicroShift ${ns}= Create Unique Namespace - Set Suite Variable \${NAMESPACE} ${ns} + VAR ${NAMESPACE}= ${ns} scope=SUITE Setup Suite And Wait For Greenboot [Documentation] Run setup suit and wait for greenboot to become ready diff --git a/test/suites/upgrade/upgrade-multus.robot b/test/suites/upgrade/upgrade-multus.robot index 5ebfd62174..ab01a739f7 100644 --- a/test/suites/upgrade/upgrade-multus.robot +++ b/test/suites/upgrade/upgrade-multus.robot @@ -59,7 +59,6 @@ Verify Multus Embedded Manifests [Documentation] Delete Multus' DHCP Daemon and reboot host to make sure ... it comes back even though the manifests do not exist anymore. - SSHLibrary.File Should Exist /etc/greenboot/check/required.d/41_microshift_running_check_multus.sh SSHLibrary.File Should Exist /etc/crio/crio.conf.d/12-microshift-multus.conf SSHLibrary.Directory Should Not Exist /usr/lib/microshift/manifests.d/000-microshift-multus/ diff --git a/vendor/k8s.io/apiserver/pkg/cel/common/schemas.go b/vendor/k8s.io/apiserver/pkg/cel/common/schemas.go index 19392babeb..909284166a 100644 --- a/vendor/k8s.io/apiserver/pkg/cel/common/schemas.go +++ b/vendor/k8s.io/apiserver/pkg/cel/common/schemas.go @@ -55,8 +55,15 @@ func SchemaDeclType(s Schema, isResourceRoot bool) *apiservercel.DeclType { // `type(intOrStringField) == int ? intOrStringField < 5 : double(intOrStringField.replace('%', '')) < 0.5 // dyn := apiservercel.NewSimpleTypeWithMinSize("dyn", cel.DynType, nil, 1) // smallest value for a serialized x-kubernetes-int-or-string is 0 - // handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string - dyn.MaxElements = maxRequestSizeBytes - 2 + + // If the schema has a maxlength constraint, bound the max elements based on the max length. + // Otherwise, fallback to the max request size. + if s.MaxLength() != nil { + dyn.MaxElements = estimateMaxElementsFromMaxLength(s) + } else { + dyn.MaxElements = estimateMaxStringLengthPerRequest(s) + } + return dyn } @@ -159,11 +166,7 @@ func SchemaDeclType(s Schema, isResourceRoot bool) *apiservercel.DeclType { strWithMaxLength := apiservercel.NewSimpleTypeWithMinSize("string", cel.StringType, types.String(""), apiservercel.MinStringSize) if s.MaxLength() != nil { - // multiply the user-provided max length by 4 in the case of an otherwise-untyped string - // we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points, - // but we need to reason about length for things like request size, so we use bytes in this code (and an individual - // unicode code point can be up to 4 bytes long) - strWithMaxLength.MaxElements = zeroIfNegative(*s.MaxLength()) * 4 + strWithMaxLength.MaxElements = estimateMaxElementsFromMaxLength(s) } else { if len(s.Enum()) > 0 { strWithMaxLength.MaxElements = estimateMaxStringEnumLength(s) @@ -228,6 +231,7 @@ func WithTypeAndObjectMeta(s *spec.Schema) *spec.Schema { // must only be called on schemas of type "string" or x-kubernetes-int-or-string: true func estimateMaxStringLengthPerRequest(s Schema) int64 { if s.IsXIntOrString() { + // handle x-kubernetes-int-or-string by returning the max length/min serialized size of the largest possible string return maxRequestSizeBytes - 2 } switch s.Format() { @@ -272,3 +276,13 @@ func estimateMaxAdditionalPropertiesFromMinSize(minSize int64) int64 { // subtract 2 to account for { and } return (maxRequestSizeBytes - 2) / keyValuePairSize } + +// estimateMaxElementsFromMaxLength estimates the maximum number of elements for a string schema +// that is bound with a maxLength constraint. +func estimateMaxElementsFromMaxLength(s Schema) int64 { + // multiply the user-provided max length by 4 in the case of an otherwise-untyped string + // we do this because the OpenAPIv3 spec indicates that maxLength is specified in runes/code points, + // but we need to reason about length for things like request size, so we use bytes in this code (and an individual + // unicode code point can be up to 4 bytes long) + return zeroIfNegative(*s.MaxLength()) * 4 +} diff --git a/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go b/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go index ac17fb1c88..10d2ce4c81 100644 --- a/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go +++ b/vendor/k8s.io/apiserver/pkg/storage/cacher/delegator.go @@ -206,6 +206,7 @@ func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.L return c.storage.GetList(ctx, key, opts, listObj) } } + fallbackOpts := opts if result.ConsistentRead { listRV, err = c.storage.GetCurrentResourceVersion(ctx) if err != nil { @@ -213,20 +214,28 @@ func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.L } // Setting resource version for consistent read in cache based on current ResourceVersion in etcd. opts.ResourceVersion = strconv.FormatInt(int64(listRV), 10) + // If continue is not set, we need to set the resource version match to ResourceVersionMatchNotOlderThan to serve latest from cache + if opts.Predicate.Continue == "" { + opts.ResourceVersionMatch = metav1.ResourceVersionMatchNotOlderThan + } } err = c.cacher.GetList(ctx, key, opts, listObj) success := "true" fallback := "false" if err != nil { - if errors.IsResourceExpired(err) { - return c.storage.GetList(ctx, key, opts, listObj) + // ResourceExpired error occurs when attempting to list from cache with a specific resourceVersion + // that is no longer available in the cache. With ListFromCacheSnapshot feature (1.34+), we can + // serve exact resourceVersion requests from cache if available, falling back to storage only when + // the requested version is expired. + if errors.IsResourceExpired(err) && utilfeature.DefaultFeatureGate.Enabled(features.ListFromCacheSnapshot) { + return c.storage.GetList(ctx, key, fallbackOpts, listObj) } if result.ConsistentRead { + // IsTooLargeResourceVersion occurs when the requested RV is higher than cache's current RV + // and cache hasn't caught up within the timeout period. Fall back to etcd. if storage.IsTooLargeResourceVersion(err) { fallback = "true" - // Reset resourceVersion during fallback from consistent read. - opts.ResourceVersion = "" - err = c.storage.GetList(ctx, key, opts, listObj) + err = c.storage.GetList(ctx, key, fallbackOpts, listObj) } if err != nil { success = "false" diff --git a/vendor/k8s.io/apiserver/pkg/util/webhook/webhook.go b/vendor/k8s.io/apiserver/pkg/util/webhook/webhook.go index b03640ae8d..8552e91eb5 100644 --- a/vendor/k8s.io/apiserver/pkg/util/webhook/webhook.go +++ b/vendor/k8s.io/apiserver/pkg/util/webhook/webhook.go @@ -83,6 +83,7 @@ func NewGenericWebhook(scheme *runtime.Scheme, codecFactory serializer.CodecFact clientConfig := rest.CopyConfig(config) codec := codecFactory.LegacyCodec(groupVersions...) + clientConfig.ContentType = runtime.ContentTypeJSON clientConfig.ContentConfig.NegotiatedSerializer = serializer.NegotiatedSerializerWrapper(runtime.SerializerInfo{Serializer: codec}) clientConfig.Wrap(x509metrics.NewDeprecatedCertificateRoundTripperWrapperConstructor( diff --git a/vendor/k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go b/vendor/k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go index 26506e4701..67781bcb4e 100644 --- a/vendor/k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go +++ b/vendor/k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation/authentication/validate_authentication.go @@ -1,22 +1,51 @@ package authentication import ( + "cmp" "context" "fmt" "io" + "math" + "slices" + "time" + "golang.org/x/sync/singleflight" "k8s.io/apimachinery/pkg/api/validation" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/cel/library" + "k8s.io/apiserver/pkg/warning" + "k8s.io/klog/v2" + "k8s.io/utils/lru" + + "github.com/google/cel-go/checker" configv1 "github.com/openshift/api/config/v1" + authenticationcel "k8s.io/apiserver/pkg/authentication/cel" crvalidation "k8s.io/kubernetes/openshift-kube-apiserver/admission/customresourcevalidation" ) const PluginName = "config.openshift.io/ValidateAuthentication" +const ( + wholeResourceExcessiveCostThreshold = 100000000 + excessiveCompileDuration = time.Second + costlyExpressionWarningCount = 3 + + // This is the default KAS request header size limit in bytes. + // Because JWTs are only limited in size by the maximum request header size, + // we can use this fixed value to make pessimistic size estimates by assuming + // that the inputs were decoded from base64-encoded JSON. + // + // This isn't very precise, but can still be used to provide + // end-users a signal that they are potentially doing very expensive + // operations with CEL expressions whose cost is dependent + // on the size of the input. + fixedSize = 1 << 20 +) + // Register registers a plugin func Register(plugins *admission.Plugins) { plugins.Register(PluginName, func(config io.Reader) (admission.Interface, error) { @@ -25,7 +54,9 @@ func Register(plugins *admission.Plugins) { configv1.GroupVersion.WithResource("authentications").GroupResource(): true, }, map[schema.GroupVersionKind]crvalidation.ObjectValidator{ - configv1.GroupVersion.WithKind("Authentication"): authenticationV1{}, + configv1.GroupVersion.WithKind("Authentication"): authenticationV1{ + cel: defaultCelStore(), + }, }) }) } @@ -46,21 +77,66 @@ func toAuthenticationV1(uncastObj runtime.Object) (*configv1.Authentication, fie return obj, nil } -type authenticationV1 struct{} +type celStore struct { + compilingGroup singleFlightDoer + compiledStore compiledExpressionStore + compiler authenticationcel.Compiler + sizeEstimator checker.CostEstimator + timerFactory timerFactory +} + +func defaultCelStore() *celStore { + return &celStore{ + compiledStore: lru.New(100), + compilingGroup: new(singleflight.Group), + compiler: authenticationcel.NewDefaultCompiler(), + sizeEstimator: &fixedSizeEstimator{ + size: fixedSize, + }, + timerFactory: &excessiveCompileTimerFactory{}, + } +} + +type singleFlightDoer interface { + Do(key string, fn func() (any, error)) (any, error, bool) +} + +type compiledExpressionStore interface { + Add(key lru.Key, value interface{}) + Get(key lru.Key) (value interface{}, ok bool) +} + +type timerFactory interface { + Timer(time.Duration, func()) timer +} + +type timer interface { + Stop() bool +} + +type excessiveCompileTimerFactory struct{} + +func (ectf *excessiveCompileTimerFactory) Timer(duration time.Duration, do func()) timer { + return time.AfterFunc(duration, do) +} + +type authenticationV1 struct { + cel *celStore +} -func (authenticationV1) ValidateCreate(_ context.Context, uncastObj runtime.Object) field.ErrorList { +func (a authenticationV1) ValidateCreate(ctx context.Context, uncastObj runtime.Object) field.ErrorList { obj, errs := toAuthenticationV1(uncastObj) if len(errs) > 0 { return errs } errs = append(errs, validation.ValidateObjectMeta(&obj.ObjectMeta, false, crvalidation.RequireNameCluster, field.NewPath("metadata"))...) - errs = append(errs, validateAuthenticationSpecCreate(obj.Spec)...) + errs = append(errs, validateAuthenticationSpecCreate(ctx, obj.Spec, a.cel)...) return errs } -func (authenticationV1) ValidateUpdate(_ context.Context, uncastObj runtime.Object, uncastOldObj runtime.Object) field.ErrorList { +func (a authenticationV1) ValidateUpdate(ctx context.Context, uncastObj runtime.Object, uncastOldObj runtime.Object) field.ErrorList { obj, errs := toAuthenticationV1(uncastObj) if len(errs) > 0 { return errs @@ -71,7 +147,7 @@ func (authenticationV1) ValidateUpdate(_ context.Context, uncastObj runtime.Obje } errs = append(errs, validation.ValidateObjectMetaUpdate(&obj.ObjectMeta, &oldObj.ObjectMeta, field.NewPath("metadata"))...) - errs = append(errs, validateAuthenticationSpecUpdate(obj.Spec, oldObj.Spec)...) + errs = append(errs, validateAuthenticationSpecUpdate(ctx, obj.Spec, oldObj.Spec, a.cel)...) return errs } @@ -92,15 +168,15 @@ func (authenticationV1) ValidateStatusUpdate(_ context.Context, uncastObj runtim return errs } -func validateAuthenticationSpecCreate(spec configv1.AuthenticationSpec) field.ErrorList { - return validateAuthenticationSpec(spec) +func validateAuthenticationSpecCreate(ctx context.Context, spec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { + return validateAuthenticationSpec(ctx, spec, cel) } -func validateAuthenticationSpecUpdate(newspec, oldspec configv1.AuthenticationSpec) field.ErrorList { - return validateAuthenticationSpec(newspec) +func validateAuthenticationSpecUpdate(ctx context.Context, newspec, oldspec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { + return validateAuthenticationSpec(ctx, newspec, cel) } -func validateAuthenticationSpec(spec configv1.AuthenticationSpec) field.ErrorList { +func validateAuthenticationSpec(ctx context.Context, spec configv1.AuthenticationSpec, cel *celStore) field.ErrorList { errs := field.ErrorList{} specField := field.NewPath("spec") @@ -121,14 +197,238 @@ func validateAuthenticationSpec(spec configv1.AuthenticationSpec) field.ErrorLis spec.WebhookTokenAuthenticator, fmt.Sprintf("this field cannot be set with the %q .spec.type", spec.Type), )) } - } errs = append(errs, crvalidation.ValidateConfigMapReference(specField.Child("oauthMetadata"), spec.OAuthMetadata, false)...) + // Perform External OIDC Provider related validations + // ---------------- + + // There is currently no guarantee that these fields are not set when the spec.Type is != OIDC. + // To ensure we are enforcing approriate admission validations at all times, just always iterate through the list + // of OIDC Providers and perform the validations. + // If/when the openshift/api admission validations are updated to enforce that this field is not configured + // when Type != OIDC, this loop should be a no-op due to an empty list. + for i, provider := range spec.OIDCProviders { + errs = append(errs, validateOIDCProvider(ctx, specField.Child("oidcProviders").Index(i), cel, provider)...) + } + // ---------------- + return errs } func validateAuthenticationStatus(status configv1.AuthenticationStatus) field.ErrorList { return crvalidation.ValidateConfigMapReference(field.NewPath("status", "integratedOAuthMetadata"), status.IntegratedOAuthMetadata, false) } + +type costRecorder struct { + Recordings []costRecording +} + +func (cr *costRecorder) AddRecording(field *field.Path, cost uint64) { + cr.Recordings = append(cr.Recordings, costRecording{ + Field: field, + Cost: cost, + }) +} + +type costRecording struct { + Field *field.Path + Cost uint64 +} + +func validateOIDCProvider(ctx context.Context, path *field.Path, cel *celStore, provider configv1.OIDCProvider) field.ErrorList { + costRecorder := &costRecorder{} + + errs := validateClaimMappings(ctx, path, cel, costRecorder, provider.ClaimMappings) + + var totalCELExpressionCost uint64 = 0 + + for _, recording := range costRecorder.Recordings { + totalCELExpressionCost = addCost(totalCELExpressionCost, recording.Cost) + } + + if totalCELExpressionCost > wholeResourceExcessiveCostThreshold { + costlyExpressions := getNMostCostlyExpressions(costlyExpressionWarningCount, costRecorder.Recordings...) + warn := fmt.Sprintf("runtime cost of all CEL expressions exceeds %d points. top %d most costly expressions: %v", wholeResourceExcessiveCostThreshold, len(costlyExpressions), costlyExpressions) + warning.AddWarning(ctx, "", warn) + klog.Warning(warn) + } + + return errs +} + +// addCost adds a cost value to a total value, +// returning the resulting value. +// addCost handles integer overflow errors +// by just always returning the maximum uint64 +// value if an overflow would occur. +func addCost(total, cost uint64) uint64 { + if total > math.MaxUint64-cost { + return math.MaxUint64 + } + + return total + cost +} + +func getNMostCostlyExpressions(n int, records ...costRecording) []costRecording { + // sort in descending order of cost + slices.SortFunc(records, func(a, b costRecording) int { + return cmp.Compare(b.Cost, a.Cost) + }) + + // safely get the N most expensive cost records + if len(records) > n { + return records[:n] + } + + return records +} + +func validateClaimMappings(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, claimMappings configv1.TokenClaimMappings) field.ErrorList { + path = path.Child("claimMappings") + + out := field.ErrorList{} + + out = append(out, validateUIDClaimMapping(ctx, path, cel, costRecorder, claimMappings.UID)...) + out = append(out, validateExtraClaimMapping(ctx, path, cel, costRecorder, claimMappings.Extra...)...) + + return out +} + +func validateUIDClaimMapping(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, uid *configv1.TokenClaimOrExpressionMapping) field.ErrorList { + if uid == nil { + return nil + } + + out := field.ErrorList{} + + if uid.Expression != "" { + childPath := path.Child("uid", "expression") + + out = append(out, validateCELExpression(ctx, cel, costRecorder, childPath, &authenticationcel.ClaimMappingExpression{ + Expression: uid.Expression, + })...) + } + + return out +} + +func validateExtraClaimMapping(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, extras ...configv1.ExtraMapping) field.ErrorList { + out := field.ErrorList{} + for i, extra := range extras { + out = append(out, validateExtra(ctx, path.Child("extra").Index(i), cel, costRecorder, extra)...) + } + + return out +} + +func validateExtra(ctx context.Context, path *field.Path, cel *celStore, costRecorder *costRecorder, extra configv1.ExtraMapping) field.ErrorList { + childPath := path.Child("valueExpression") + + return validateCELExpression(ctx, cel, costRecorder, childPath, &authenticationcel.ExtraMappingExpression{ + Key: extra.Key, + Expression: extra.ValueExpression, + }) +} + +type celCompileResult struct { + err error + cost uint64 +} + +func validateCELExpression(ctx context.Context, cel *celStore, costRecorder *costRecorder, path *field.Path, accessor authenticationcel.ExpressionAccessor) field.ErrorList { + // if context has been canceled, don't try to compile any expressions + if err := ctx.Err(); err != nil { + return field.ErrorList{field.InternalError(path, err)} + } + + result, err, _ := cel.compilingGroup.Do(accessor.GetExpression(), func() (interface{}, error) { + // if the expression is not currently being compiled, it might have already been compiled + if val, ok := cel.compiledStore.Get(accessor.GetExpression()); ok { + res, ok := val.(celCompileResult) + if !ok { + return nil, fmt.Errorf("expected return value from cache of compiled expressions to be of type celCompileResult but was %T", val) + } + + return res, nil + } + + // expression is not currently being compiled, and has not been compiled before (or has been long enough since it was last compiled that we dropped it). + // Let's compile it. + + // Asynchronously handle excessive compilation time so we + // can still log a warning in the event the process has died + // before compilation of the expression has finished. + warningChan := make(chan string, 1) + timer := cel.timerFactory.Timer(excessiveCompileDuration, func() { + defer close(warningChan) + warn := fmt.Sprintf("cel expression %q took excessively long to compile (%s)", accessor.GetExpression(), excessiveCompileDuration) + klog.Warning(warn) + warningChan <- warn + }) + + compRes, compErr := cel.compiler.CompileClaimsExpression(accessor) + + timer.Stop() + + res := celCompileResult{ + err: compErr, + } + + if compRes.AST != nil && compErr == nil { + cost, err := checker.Cost(compRes.AST.NativeRep(), &library.CostEstimator{ + SizeEstimator: cel.sizeEstimator, + }) + // Because we are only warning on excessive cost, we shouldn't prevent the create/update of the resource if we can successfully + // compile the expression but are unable to estimate the cost. The Structured Authentication Configuration feature does not + // gate on cost of expressions, so we are doing a best-effort warning here. + // Instead, default to our best estimate of the worst case cost. + if err != nil { + klog.Errorf("unable to estimate cost for expression %q: %v. Defaulting cost to %d", accessor.GetExpression(), err, fixedSize) + cost = checker.CostEstimate{Max: fixedSize} + } + + res.cost = cost.Max + } + + // check if we received a warning related to excessive compile time. If not, continue + select { + case warn := <-warningChan: + warning.AddWarning(ctx, "", warn) + default: + } + + cel.compiledStore.Add(accessor.GetExpression(), res) + + return res, nil + }) + if err != nil { + return field.ErrorList{field.InternalError(path, fmt.Errorf("running compilation of expression %q: %v", accessor.GetExpression(), err))} + } + + compileRes, ok := result.(celCompileResult) + if !ok { + return field.ErrorList{field.InternalError(path, fmt.Errorf("expected result to be of type celCompileResult, but got %T", result))} + } + + if compileRes.err != nil { + return field.ErrorList{field.Invalid(path, accessor.GetExpression(), compileRes.err.Error())} + } + + costRecorder.AddRecording(path, compileRes.cost) + + return nil +} + +type fixedSizeEstimator struct { + size uint64 +} + +func (fcse *fixedSizeEstimator) EstimateSize(element checker.AstNode) *checker.SizeEstimate { + return &checker.SizeEstimate{Min: fcse.size, Max: fcse.size} +} + +func (fcse *fixedSizeEstimator) EstimateCallCost(function, overloadID string, target *checker.AstNode, args []checker.AstNode) *checker.CallEstimate { + return nil +} diff --git a/vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go b/vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go index 303d5c3706..b28293e605 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go +++ b/vendor/k8s.io/kubernetes/pkg/controller/job/job_controller.go @@ -537,6 +537,12 @@ func (jm *Controller) deleteJob(logger klog.Logger, obj interface{}) { } } jm.enqueueLabelSelector(jobObj) + + key := cache.MetaObjectToName(jobObj).String() + err := jm.podBackoffStore.removeBackoffRecord(key) + if err != nil { + utilruntime.HandleError(fmt.Errorf("error removing backoff record %w", err)) + } } func (jm *Controller) enqueueLabelSelector(jobObj *batch.Job) { diff --git a/vendor/k8s.io/kubernetes/pkg/features/openshift_features.go b/vendor/k8s.io/kubernetes/pkg/features/openshift_features.go index 2ed4e14b85..51bbe0b37b 100644 --- a/vendor/k8s.io/kubernetes/pkg/features/openshift_features.go +++ b/vendor/k8s.io/kubernetes/pkg/features/openshift_features.go @@ -7,6 +7,7 @@ import ( var RouteExternalCertificate featuregate.Feature = "RouteExternalCertificate" var MinimumKubeletVersion featuregate.Feature = "MinimumKubeletVersion" +var StoragePerformantSecurityPolicy featuregate.Feature = "StoragePerformantSecurityPolicy" // registerOpenshiftFeatures injects openshift-specific feature gates func registerOpenshiftFeatures() { @@ -18,4 +19,8 @@ func registerOpenshiftFeatures() { defaultVersionedKubernetesFeatureGates[MinimumKubeletVersion] = featuregate.VersionedSpecs{ {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha}, } + // Introduced in 4.20 + defaultVersionedKubernetesFeatureGates[StoragePerformantSecurityPolicy] = featuregate.VersionedSpecs{ + {Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Alpha}, + } } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS b/vendor/k8s.io/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS index d484fa4fc2..a654af9e87 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/DOWNSTREAM_OWNERS @@ -13,5 +13,6 @@ reviewers: approvers: - sjenning - mrunalp + - rphillips component: node diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/allocation_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/allocation_manager.go index 5287ba169b..2eb701c0b9 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/allocation_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/allocation_manager.go @@ -109,14 +109,20 @@ func (m *manager) GetContainerResourceAllocation(podUID types.UID, containerName // UpdatePodFromAllocation overwrites the pod spec with the allocation. // This function does a deep copy only if updates are needed. func (m *manager) UpdatePodFromAllocation(pod *v1.Pod) (*v1.Pod, bool) { - // TODO(tallclair): This clones the whole cache, but we only need 1 pod. - allocs := m.allocated.GetPodResourceInfoMap() - return updatePodFromAllocation(pod, allocs) + if pod == nil { + return pod, false + } + + allocated, ok := m.allocated.GetPodResourceInfo(pod.UID) + if !ok { + return pod, false + } + + return updatePodFromAllocation(pod, allocated) } -func updatePodFromAllocation(pod *v1.Pod, allocs state.PodResourceInfoMap) (*v1.Pod, bool) { - allocated, found := allocs[pod.UID] - if !found { +func updatePodFromAllocation(pod *v1.Pod, allocated state.PodResourceInfo) (*v1.Pod, bool) { + if pod == nil { return pod, false } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state.go b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state.go index 96a2421f08..8022e10413 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state.go @@ -50,6 +50,7 @@ func (pr PodResourceInfoMap) Clone() PodResourceInfoMap { type Reader interface { GetContainerResources(podUID types.UID, containerName string) (v1.ResourceRequirements, bool) GetPodResourceInfoMap() PodResourceInfoMap + GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) } type writer interface { diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go index f6c5ce78c4..f41415c015 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_checkpoint.go @@ -112,13 +112,20 @@ func (sc *stateCheckpoint) GetContainerResources(podUID types.UID, containerName return sc.cache.GetContainerResources(podUID, containerName) } -// GetPodResourceInfoMap returns current pod resource information +// GetPodResourceInfoMap returns current pod resource information map func (sc *stateCheckpoint) GetPodResourceInfoMap() PodResourceInfoMap { sc.mux.RLock() defer sc.mux.RUnlock() return sc.cache.GetPodResourceInfoMap() } +// GetPodResourceInfo returns current pod resource information +func (sc *stateCheckpoint) GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) { + sc.mux.RLock() + defer sc.mux.RUnlock() + return sc.cache.GetPodResourceInfo(podUID) +} + // SetContainerResoruces sets resources information for a pod's container func (sc *stateCheckpoint) SetContainerResources(podUID types.UID, containerName string, resources v1.ResourceRequirements) error { sc.mux.Lock() @@ -172,6 +179,10 @@ func (sc *noopStateCheckpoint) GetPodResourceInfoMap() PodResourceInfoMap { return nil } +func (sc *noopStateCheckpoint) GetPodResourceInfo(_ types.UID) (PodResourceInfo, bool) { + return PodResourceInfo{}, false +} + func (sc *noopStateCheckpoint) SetContainerResources(_ types.UID, _ string, _ v1.ResourceRequirements) error { return nil } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_mem.go b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_mem.go index e7e44503c6..e4b5210524 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_mem.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/allocation/state/state_mem.go @@ -65,6 +65,14 @@ func (s *stateMemory) GetPodResourceInfoMap() PodResourceInfoMap { return s.podResources.Clone() } +func (s *stateMemory) GetPodResourceInfo(podUID types.UID) (PodResourceInfo, bool) { + s.RLock() + defer s.RUnlock() + + resourceInfo, ok := s.podResources[podUID] + return resourceInfo, ok +} + func (s *stateMemory) SetContainerResources(podUID types.UID, containerName string, resources v1.ResourceRequirements) error { s.Lock() defer s.Unlock() diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go index d69aba1e88..2a1525bae3 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/server_v1.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/klog/v2" podutil "k8s.io/kubernetes/pkg/api/v1/pod" kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/metrics" @@ -36,17 +37,21 @@ type v1PodResourcesServer struct { cpusProvider CPUsProvider memoryProvider MemoryProvider dynamicResourcesProvider DynamicResourcesProvider + useActivePods bool } // NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider // with device information provided by the DevicesProvider func NewV1PodResourcesServer(providers PodResourcesProviders) podresourcesv1.PodResourcesListerServer { + useActivePods := true + klog.InfoS("podresources", "method", "list", "useActivePods", useActivePods) return &v1PodResourcesServer{ podsProvider: providers.Pods, devicesProvider: providers.Devices, cpusProvider: providers.Cpus, memoryProvider: providers.Memory, dynamicResourcesProvider: providers.DynamicResources, + useActivePods: useActivePods, } } @@ -55,7 +60,13 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *podresourcesv1.Lis metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc() metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc() - pods := p.podsProvider.GetPods() + var pods []*v1.Pod + if p.useActivePods { + pods = p.podsProvider.GetActivePods() + } else { + pods = p.podsProvider.GetPods() + } + podResources := make([]*podresourcesv1.PodResources, len(pods)) p.devicesProvider.UpdateAllocatedDevices() diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go index ee1269d969..66d7c6cfda 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/podresources/types.go @@ -34,6 +34,7 @@ type DevicesProvider interface { // PodsProvider knows how to provide the pods admitted by the node type PodsProvider interface { + GetActivePods() []*v1.Pod GetPods() []*v1.Pod GetPodByName(namespace, name string) (*v1.Pod, bool) } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go index 6065037622..03823deca2 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go @@ -521,7 +521,10 @@ func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo, if isRuntimeClassInImageCriAPIEnabled { imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage) } + + im.imageRecordsLock.Lock() delete(im.imageRecords, imageKey) + im.imageRecordsLock.Unlock() metrics.ImageGarbageCollectedTotal.WithLabelValues(reason).Inc() return err diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go index 520da786d6..385179a648 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go @@ -3173,6 +3173,22 @@ func (kl *Kubelet) ListenAndServeReadOnly(address net.IP, port uint, tp trace.Tr server.ListenAndServeKubeletReadOnlyServer(kl, kl.resourceAnalyzer, kl.containerManager.GetHealthCheckers(), kl.flagz, address, port, tp) } +type kubeletPodsProvider struct { + kl *Kubelet +} + +func (pp *kubeletPodsProvider) GetActivePods() []*v1.Pod { + return pp.kl.GetActivePods() +} + +func (pp *kubeletPodsProvider) GetPods() []*v1.Pod { + return pp.kl.podManager.GetPods() +} + +func (pp *kubeletPodsProvider) GetPodByName(namespace, name string) (*v1.Pod, bool) { + return pp.kl.podManager.GetPodByName(namespace, name) +} + // ListenAndServePodResources runs the kubelet podresources grpc service func (kl *Kubelet) ListenAndServePodResources() { endpoint, err := util.LocalEndpoint(kl.getPodResourcesDir(), podresources.Socket) @@ -3182,7 +3198,7 @@ func (kl *Kubelet) ListenAndServePodResources() { } providers := podresources.PodResourcesProviders{ - Pods: kl.podManager, + Pods: &kubeletPodsProvider{kl: kl}, Devices: kl.containerManager, Cpus: kl.containerManager, Memory: kl.containerManager, diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go index 92321bd9bd..c449a3df35 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -137,7 +137,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, // If pod has exclusive cpu and the container in question has integer cpu requests // the cfs quota will not be enforced disableCPUQuota := utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableCPUQuotaWithExclusiveCPUs) && m.containerManager.ContainerHasExclusiveCPUs(pod, container) - klog.V(2).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) + klog.V(5).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) lcr := m.calculateLinuxResources(cpuRequest, cpuLimit, memoryLimit, disableCPUQuota) lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container, diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go index ebf8d4e620..bd2472199d 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux.go @@ -59,8 +59,8 @@ func (m *kubeGenericRuntimeManager) calculateSandboxResources(pod *v1.Pod) *runt // If pod has exclusive cpu the sandbox will not have cfs quote enforced disableCPUQuota := utilfeature.DefaultFeatureGate.Enabled(features.DisableCPUQuotaWithExclusiveCPUs) && m.containerManager.PodHasExclusiveCPUs(pod) - klog.V(2).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) + klog.V(5).InfoS("Enforcing CFS quota", "pod", klog.KObj(pod), "unlimited", disableCPUQuota) return m.calculateLinuxResources(cpuRequest, lim.Cpu(), lim.Memory(), disableCPUQuota) } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/managed/managed.go b/vendor/k8s.io/kubernetes/pkg/kubelet/managed/managed.go index 4063d5381d..d9266e440f 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/managed/managed.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/managed/managed.go @@ -117,14 +117,36 @@ func GenerateResourceName(workloadName string) v1.ResourceName { func updateContainers(workloadName string, pod *v1.Pod) error { updateContainer := func(container *v1.Container) error { if container.Resources.Requests == nil { - return fmt.Errorf("managed container %v does not have Resource.Requests", container.Name) + // Nothing to modify, but that is OK, it will not + // change the QoS class of the modified Pod + return nil } - if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { + + _, cpuOk := container.Resources.Requests[v1.ResourceCPU] + _, memoryOk := container.Resources.Requests[v1.ResourceMemory] + + // It is possible memory is configured using limits only and that implies + // requests with the same value, check for that in case memory requests + // are not present by themselves. + if !memoryOk && container.Resources.Limits != nil { + _, memoryOk = container.Resources.Limits[v1.ResourceMemory] + } + + // When both cpu and memory requests are missing, there is nothing + // to do + if !cpuOk && !memoryOk { + return nil + } + + // Both memory and cpu have to be set to make sure stripping them + // will not change the QoS class of the Pod + if !cpuOk { return fmt.Errorf("managed container %v does not have cpu requests", container.Name) } - if _, ok := container.Resources.Requests[v1.ResourceMemory]; !ok { + if !memoryOk { return fmt.Errorf("managed container %v does not have memory requests", container.Name) } + if container.Resources.Limits == nil { container.Resources.Limits = v1.ResourceList{} } diff --git a/vendor/k8s.io/kubernetes/pkg/registry/batch/job/strategy.go b/vendor/k8s.io/kubernetes/pkg/registry/batch/job/strategy.go index 1de03e459d..71cd60c059 100644 --- a/vendor/k8s.io/kubernetes/pkg/registry/batch/job/strategy.go +++ b/vendor/k8s.io/kubernetes/pkg/registry/batch/job/strategy.go @@ -379,6 +379,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt isUncountedTerminatedPodsChanged := !apiequality.Semantic.DeepEqual(oldJob.Status.UncountedTerminatedPods, newJob.Status.UncountedTerminatedPods) isReadyChanged := !ptr.Equal(oldJob.Status.Ready, newJob.Status.Ready) isTerminatingChanged := !ptr.Equal(oldJob.Status.Terminating, newJob.Status.Terminating) + isSuspendedWithZeroCompletions := ptr.Equal(newJob.Spec.Suspend, ptr.To(true)) && ptr.Equal(newJob.Spec.Completions, ptr.To[int32](0)) return batchvalidation.JobStatusValidationOptions{ // We allow to decrease the counter for succeeded pods for jobs which @@ -394,7 +395,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt RejectFailedJobWithoutFailureTarget: isJobFailedChanged || isFailedIndexesChanged, RejectCompleteJobWithoutSuccessCriteriaMet: isJobCompleteChanged || isJobSuccessCriteriaMetChanged, RejectFinishedJobWithActivePods: isJobFinishedChanged || isActiveChanged, - RejectFinishedJobWithoutStartTime: isJobFinishedChanged || isStartTimeChanged, + RejectFinishedJobWithoutStartTime: (isJobFinishedChanged || isStartTimeChanged) && !isSuspendedWithZeroCompletions, RejectFinishedJobWithUncountedTerminatedPods: isJobFinishedChanged || isUncountedTerminatedPodsChanged, RejectStartTimeUpdateForUnsuspendedJob: isStartTimeChanged, RejectCompletionTimeBeforeStartTime: isStartTimeChanged || isCompletionTimeChanged, diff --git a/vendor/modules.txt b/vendor/modules.txt index fd7cfe4ab0..262a162d10 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -818,7 +818,7 @@ github.com/x448/float16 # github.com/xlab/treeprint v1.2.0 ## explicit; go 1.13 github.com/xlab/treeprint -# go.etcd.io/etcd/api/v3 v3.5.21 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/api/v3 v3.5.21 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/api/v3/authpb go.etcd.io/etcd/api/v3/etcdserverpb @@ -826,7 +826,7 @@ go.etcd.io/etcd/api/v3/membershippb go.etcd.io/etcd/api/v3/mvccpb go.etcd.io/etcd/api/v3/v3rpc/rpctypes go.etcd.io/etcd/api/v3/version -# go.etcd.io/etcd/client/pkg/v3 v3.5.21 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/client/pkg/v3 v3.5.21 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/client/pkg/v3/fileutil go.etcd.io/etcd/client/pkg/v3/logutil @@ -834,7 +834,7 @@ go.etcd.io/etcd/client/pkg/v3/systemd go.etcd.io/etcd/client/pkg/v3/tlsutil go.etcd.io/etcd/client/pkg/v3/transport go.etcd.io/etcd/client/pkg/v3/types -# go.etcd.io/etcd/client/v3 v3.5.21 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/client/v3 v3.5.21 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c ## explicit; go 1.23.0 go.etcd.io/etcd/client/v3 go.etcd.io/etcd/client/v3/credentials @@ -1186,7 +1186,7 @@ gopkg.in/yaml.v2 # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 -# k8s.io/api v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/api +# k8s.io/api v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/api ## explicit; go 1.24.0 k8s.io/api/admission/v1 k8s.io/api/admission/v1beta1 @@ -1248,7 +1248,7 @@ k8s.io/api/storage/v1 k8s.io/api/storage/v1alpha1 k8s.io/api/storage/v1beta1 k8s.io/api/storagemigration/v1alpha1 -# k8s.io/apiextensions-apiserver v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver +# k8s.io/apiextensions-apiserver v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver ## explicit; go 1.24.0 k8s.io/apiextensions-apiserver/pkg/apihelpers k8s.io/apiextensions-apiserver/pkg/apis/apiextensions @@ -1295,7 +1295,7 @@ k8s.io/apiextensions-apiserver/pkg/generated/openapi k8s.io/apiextensions-apiserver/pkg/registry/customresource k8s.io/apiextensions-apiserver/pkg/registry/customresource/tableconvertor k8s.io/apiextensions-apiserver/pkg/registry/customresourcedefinition -# k8s.io/apimachinery v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery +# k8s.io/apimachinery v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery ## explicit; go 1.24.0 k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors @@ -1373,7 +1373,7 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/netutil k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver +# k8s.io/apiserver v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver ## explicit; go 1.24.0 k8s.io/apiserver/pkg/admission k8s.io/apiserver/pkg/admission/configuration @@ -1558,13 +1558,13 @@ k8s.io/apiserver/plugin/pkg/authenticator/token/oidc k8s.io/apiserver/plugin/pkg/authenticator/token/webhook k8s.io/apiserver/plugin/pkg/authorizer/webhook k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics -# k8s.io/cli-runtime v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime +# k8s.io/cli-runtime v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime ## explicit; go 1.24.0 k8s.io/cli-runtime/pkg/genericclioptions k8s.io/cli-runtime/pkg/genericiooptions k8s.io/cli-runtime/pkg/printers k8s.io/cli-runtime/pkg/resource -# k8s.io/client-go v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go +# k8s.io/client-go v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go ## explicit; go 1.24.0 k8s.io/client-go/applyconfigurations k8s.io/client-go/applyconfigurations/admissionregistration/v1 @@ -1933,7 +1933,7 @@ k8s.io/client-go/util/keyutil k8s.io/client-go/util/retry k8s.io/client-go/util/watchlist k8s.io/client-go/util/workqueue -# k8s.io/cloud-provider v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cloud-provider +# k8s.io/cloud-provider v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cloud-provider ## explicit; go 1.24.0 k8s.io/cloud-provider k8s.io/cloud-provider/api @@ -1952,14 +1952,14 @@ k8s.io/cloud-provider/service/helpers k8s.io/cloud-provider/volume k8s.io/cloud-provider/volume/errors k8s.io/cloud-provider/volume/helpers -# k8s.io/cluster-bootstrap v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cluster-bootstrap +# k8s.io/cluster-bootstrap v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cluster-bootstrap ## explicit; go 1.24.0 k8s.io/cluster-bootstrap/token/api k8s.io/cluster-bootstrap/token/jws k8s.io/cluster-bootstrap/token/util k8s.io/cluster-bootstrap/util/secrets k8s.io/cluster-bootstrap/util/tokens -# k8s.io/component-base v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base +# k8s.io/component-base v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base ## explicit; go 1.24.0 k8s.io/component-base/cli k8s.io/component-base/cli/flag @@ -1998,7 +1998,7 @@ k8s.io/component-base/zpages/features k8s.io/component-base/zpages/flagz k8s.io/component-base/zpages/httputil k8s.io/component-base/zpages/statusz -# k8s.io/component-helpers v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-helpers +# k8s.io/component-helpers v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-helpers ## explicit; go 1.24.0 k8s.io/component-helpers/apimachinery/lease k8s.io/component-helpers/apps/poddisruptionbudget @@ -2012,7 +2012,7 @@ k8s.io/component-helpers/scheduling/corev1 k8s.io/component-helpers/scheduling/corev1/nodeaffinity k8s.io/component-helpers/storage/ephemeral k8s.io/component-helpers/storage/volume -# k8s.io/controller-manager v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/controller-manager +# k8s.io/controller-manager v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/controller-manager ## explicit; go 1.24.0 k8s.io/controller-manager/app k8s.io/controller-manager/config @@ -2029,22 +2029,22 @@ k8s.io/controller-manager/pkg/informerfactory k8s.io/controller-manager/pkg/leadermigration k8s.io/controller-manager/pkg/leadermigration/config k8s.io/controller-manager/pkg/leadermigration/options -# k8s.io/cri-api v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-api +# k8s.io/cri-api v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-api ## explicit; go 1.24.0 k8s.io/cri-api/pkg/apis k8s.io/cri-api/pkg/apis/runtime/v1 k8s.io/cri-api/pkg/errors -# k8s.io/cri-client v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-client +# k8s.io/cri-client v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-client ## explicit; go 1.24.0 k8s.io/cri-client/pkg k8s.io/cri-client/pkg/internal k8s.io/cri-client/pkg/logs k8s.io/cri-client/pkg/util -# k8s.io/csi-translation-lib v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/csi-translation-lib +# k8s.io/csi-translation-lib v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/csi-translation-lib ## explicit; go 1.24.0 k8s.io/csi-translation-lib k8s.io/csi-translation-lib/plugins -# k8s.io/dynamic-resource-allocation v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/dynamic-resource-allocation +# k8s.io/dynamic-resource-allocation v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/dynamic-resource-allocation ## explicit; go 1.24.0 k8s.io/dynamic-resource-allocation/api k8s.io/dynamic-resource-allocation/cel @@ -2052,14 +2052,14 @@ k8s.io/dynamic-resource-allocation/internal/queue k8s.io/dynamic-resource-allocation/resourceclaim k8s.io/dynamic-resource-allocation/resourceslice/tracker k8s.io/dynamic-resource-allocation/structured -# k8s.io/endpointslice v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/endpointslice +# k8s.io/endpointslice v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/endpointslice ## explicit; go 1.24.0 k8s.io/endpointslice k8s.io/endpointslice/metrics k8s.io/endpointslice/topologycache k8s.io/endpointslice/trafficdist k8s.io/endpointslice/util -# k8s.io/externaljwt v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/externaljwt +# k8s.io/externaljwt v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/externaljwt ## explicit; go 1.24.0 k8s.io/externaljwt/apis/v1alpha1 # k8s.io/gengo/v2 v2.0.0-20250207200755-1244d31929d7 @@ -2080,13 +2080,13 @@ k8s.io/klog/v2/internal/severity k8s.io/klog/v2/internal/sloghandler k8s.io/klog/v2/internal/verbosity k8s.io/klog/v2/textlogger -# k8s.io/kms v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kms +# k8s.io/kms v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kms ## explicit; go 1.24.0 k8s.io/kms/apis/v1beta1 k8s.io/kms/apis/v2 k8s.io/kms/pkg/service k8s.io/kms/pkg/util -# k8s.io/kube-aggregator v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-aggregator +# k8s.io/kube-aggregator v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-aggregator ## explicit; go 1.24.0 k8s.io/kube-aggregator/pkg/apis/apiregistration k8s.io/kube-aggregator/pkg/apis/apiregistration/install @@ -2119,7 +2119,7 @@ k8s.io/kube-aggregator/pkg/controllers/status/remote k8s.io/kube-aggregator/pkg/registry/apiservice k8s.io/kube-aggregator/pkg/registry/apiservice/etcd k8s.io/kube-aggregator/pkg/registry/apiservice/rest -# k8s.io/kube-controller-manager v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-controller-manager +# k8s.io/kube-controller-manager v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-controller-manager ## explicit; go 1.24.0 k8s.io/kube-controller-manager/config/v1alpha1 # k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff @@ -2153,11 +2153,11 @@ k8s.io/kube-openapi/pkg/validation/spec k8s.io/kube-openapi/pkg/validation/strfmt k8s.io/kube-openapi/pkg/validation/strfmt/bson k8s.io/kube-openapi/pkg/validation/validate -# k8s.io/kube-scheduler v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-scheduler +# k8s.io/kube-scheduler v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-scheduler ## explicit; go 1.24.0 k8s.io/kube-scheduler/config/v1 k8s.io/kube-scheduler/extender/v1 -# k8s.io/kubectl v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl +# k8s.io/kubectl v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl ## explicit; go 1.24.0 k8s.io/kubectl/pkg/apps k8s.io/kubectl/pkg/cmd/apiresources @@ -2192,7 +2192,7 @@ k8s.io/kubectl/pkg/util/storage k8s.io/kubectl/pkg/util/templates k8s.io/kubectl/pkg/util/term k8s.io/kubectl/pkg/validation -# k8s.io/kubelet v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet +# k8s.io/kubelet v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet ## explicit; go 1.24.0 k8s.io/kubelet/config/v1 k8s.io/kubelet/config/v1alpha1 @@ -3044,7 +3044,7 @@ k8s.io/kubernetes/third_party/forked/gonum/graph/simple k8s.io/kubernetes/third_party/forked/gonum/graph/traverse k8s.io/kubernetes/third_party/forked/libcontainer/apparmor k8s.io/kubernetes/third_party/forked/libcontainer/utils -# k8s.io/metrics v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/metrics +# k8s.io/metrics v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/metrics ## explicit; go 1.24.0 k8s.io/metrics/pkg/apis/custom_metrics k8s.io/metrics/pkg/apis/custom_metrics/v1beta1 @@ -3059,10 +3059,10 @@ k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1 k8s.io/metrics/pkg/client/custom_metrics k8s.io/metrics/pkg/client/custom_metrics/scheme k8s.io/metrics/pkg/client/external_metrics -# k8s.io/mount-utils v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/mount-utils +# k8s.io/mount-utils v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/mount-utils ## explicit; go 1.24.0 k8s.io/mount-utils -# k8s.io/pod-security-admission v1.33.2 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/pod-security-admission +# k8s.io/pod-security-admission v1.33.3 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/pod-security-admission ## explicit; go 1.24.0 k8s.io/pod-security-admission/admission k8s.io/pod-security-admission/admission/api @@ -3245,6 +3245,6 @@ sigs.k8s.io/yaml/goyaml.v3 # k8s.io/sample-apiserver => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/sample-apiserver # k8s.io/sample-cli-plugin => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/sample-cli-plugin # k8s.io/sample-controller => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/sample-controller -# go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250722140445-b5ad268120cc -# go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250722140445-b5ad268120cc +# go.etcd.io/etcd/api/v3 => github.com/openshift/etcd/api/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/client/pkg/v3 => github.com/openshift/etcd/client/pkg/v3 v3.5.1-0.20250829062802-9c065d4d842c +# go.etcd.io/etcd/client/v3 => github.com/openshift/etcd/client/v3 v3.5.1-0.20250829062802-9c065d4d842c