From 76652fa4fabba907b826283b46898a27786ade26 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 13 Sep 2022 11:36:27 -0700 Subject: [PATCH] test/extended/prometheus: Consider telemeterClient.enabled I'd disabled Telemetry for the bulk of the CI fleet in openshift/release@3c1da8eb20 (OTA-740: ci-operator/step-registry/ipi/conf/telemetry: Disable Telemetry (openshift/release#32153), 2022-09-13). But that lead to many failures for: Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present This commit extends the checks for Telemetry enablement to include the monitoring-specific ConfigMap, as well as the previously-checked pull-secret token. I'm copy/pasting a subset of the monitoring configuration structure instead of vendoring the config, because we aren't vendoring the cluster-monitoring-operator in origin today, bumping to keep such vendoring up to date would be tedious, and the monitoring config API is unlikely to shift this knob around within the structure. It would be nice if the monitoring config schema moved into opensihft/api or somewhere else where it would be more clear that it was a cluster-admin-facing API, with the usual stability commitments, but it's not there today. --- cmd/openshift-tests/minimal.go | 2 +- test/extended/prometheus/prometheus.go | 82 +++++++++++++++---- .../generated/zz_generated.annotations.go | 2 +- test/extended/util/annotate/rules.go | 2 +- 4 files changed, 71 insertions(+), 17 deletions(-) diff --git a/cmd/openshift-tests/minimal.go b/cmd/openshift-tests/minimal.go index bbec5ed995c6..3ab8fd987ba7 100644 --- a/cmd/openshift-tests/minimal.go +++ b/cmd/openshift-tests/minimal.go @@ -2572,7 +2572,7 @@ var ( "[sig-builds][Feature:Builds] verify /run filesystem contents are writeable using a simple Docker Strategy Build [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {}, "[sig-builds][Feature:Builds] build have source revision metadata started build should contain source revision information [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {}, "[sig-network] multicast when using one of the OpenshiftSDN modes 'redhat/openshift-ovs-multitenant, redhat/openshift-ovs-networkpolicy' should allow multicast traffic in namespaces where it is enabled [Suite:openshift/conformance/parallel]": {}, - "[sig-instrumentation] Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {}, + "[sig-instrumentation] Prometheus when installed on the cluster should report telemetry [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {}, "[sig-cluster-lifecycle] cluster upgrade should complete in 90.00 minutes": {}, "[sig-imageregistry][Feature:ImageTriggers] Image change build triggers TestSimpleImageChangeBuildTriggerFromImageStreamTagCustomWithConfigChange [Suite:openshift/conformance/parallel]": {}, "[sig-auth][Feature:OAuthServer] [Token Expiration] Using a OAuth client with a non-default token max age to generate tokens that do not expire works as expected when using a code authorization flow [Suite:openshift/conformance/parallel]": {}, diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 24cbf0f5dd3c..495cd3a0a303 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -29,6 +29,7 @@ import ( e2e "k8s.io/kubernetes/test/e2e/framework" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" admissionapi "k8s.io/pod-security-admission/api" + "sigs.k8s.io/yaml" configv1 "github.com/openshift/api/config/v1" @@ -39,6 +40,16 @@ import ( helper "github.com/openshift/origin/test/extended/util/prometheus" ) +// ClusterMonitoringConfiguration is a subset of https://github.com/openshift/cluster-monitoring-operator/blob/8d331d78b22948d36c20da0552763ddd8a4e2093/pkg/manifests/config.go#L124-L136 +type ClusterMonitoringConfiguration struct { + TelemeterClientConfig *TelemeterClientConfig `json:"telemeterClient"` +} + +// TelemeterClientConfig is a subset of https://github.com/openshift/cluster-monitoring-operator/blob/8d331d78b22948d36c20da0552763ddd8a4e2093/pkg/manifests/config.go#L335-L342 +type TelemeterClientConfig struct { + Enabled *bool `json:"enabled"` +} + var _ = g.Describe("[sig-instrumentation][Late] OpenShift alerting rules [apigroup:image.openshift.io]", func() { defer g.GinkgoRecover() @@ -186,6 +197,7 @@ var _ = g.Describe("[sig-instrumentation][Late] OpenShift alerting rules [apigro var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { defer g.GinkgoRecover() + ctx := context.TODO() var ( oc = exutil.NewCLIWithoutNamespace("prometheus") ) @@ -382,8 +394,10 @@ sort_desc( }) g.It("shouldn't exceed the 650 series limit of total series sent via telemetry from each cluster", func() { - if !hasPullSecret(oc.AdminKubeClient(), "cloud.openshift.com") { - e2eskipper.Skipf("Telemetry is disabled") + if enabled, err := telemetryIsEnabled(ctx, oc.AdminKubeClient()); err != nil { + e2e.Failf("could not determine if Telemetry is enabled: %v", err) + } else { + e2eskipper.Skipf("Telemetry is disabled: %v", enabled) } // we only consider series sent since the beginning of the test @@ -419,6 +433,7 @@ sort_desc( var _ = g.Describe("[sig-instrumentation] Prometheus", func() { defer g.GinkgoRecover() + ctx := context.TODO() var ( oc = exutil.NewCLIWithPodSecurityLevel("prometheus", admissionapi.LevelBaseline) @@ -439,9 +454,11 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() { }) g.Describe("when installed on the cluster", func() { - g.It("should report telemetry if a cloud.openshift.com token is present [Late]", func() { - if !hasPullSecret(oc.AdminKubeClient(), "cloud.openshift.com") { - e2eskipper.Skipf("Telemetry is disabled") + g.It("should report telemetry [Late]", func() { + if enabled, err := telemetryIsEnabled(ctx, oc.AdminKubeClient()); err != nil { + e2e.Failf("could not determine if Telemetry is enabled: %v", err) + } else { + e2eskipper.Skipf("Telemetry is disabled: %v", enabled) } tests := map[string]bool{} @@ -900,17 +917,26 @@ func getBearerTokenURLViaPod(ns, execPodName, url, bearer string) (string, error return output, nil } -func hasPullSecret(client clientset.Interface, name string) bool { - scrt, err := client.CoreV1().Secrets("openshift-config").Get(context.Background(), "pull-secret", metav1.GetOptions{}) +// telemetryIsEnabled returns (nil, nil) if Telemetry is enabled, +// (error, nil) if Telemetry is not enabled, and (_, error) if it fails +// to determine whether or not Telemetry is enabled. +func telemetryIsEnabled(ctx context.Context, client clientset.Interface) (enabled error, err error) { + domain := "cloud.openshift.com" + if hasSecret, err := hasPullSecret(ctx, client, domain); err != nil || hasSecret != nil { + return hasSecret, err + } + + return isTelemeterClientEnabled(ctx, client) +} + +func hasPullSecret(ctx context.Context, client clientset.Interface, name string) (enabled error, err error) { + scrt, err := client.CoreV1().Secrets("openshift-config").Get(ctx, "pull-secret", metav1.GetOptions{}) if err != nil { - if kapierrs.IsNotFound(err) { - return false - } - e2e.Failf("could not retrieve pull-secret: %v", err) + return nil, fmt.Errorf("could not retrieve pull-secret: %w", err) } if scrt.Type != v1.SecretTypeDockerConfigJson { - e2e.Failf("error expecting secret type %s got %s", v1.SecretTypeDockerConfigJson, scrt.Type) + return nil, fmt.Errorf("error expecting openshift-config/pull-secret type %s got %s", v1.SecretTypeDockerConfigJson, scrt.Type) } ps := struct { @@ -920,9 +946,37 @@ func hasPullSecret(client clientset.Interface, name string) bool { }{} if err := json.Unmarshal(scrt.Data[v1.DockerConfigJsonKey], &ps); err != nil { - e2e.Failf("could not unmarshal pullSecret from openshift-config/pull-secret: %v", err) + return nil, fmt.Errorf("could not unmarshal pullSecret from openshift-config/pull-secret: %w", err) + } + + if len(ps.Auths[name].Auth) == 0 { + return fmt.Errorf("openshift-config/pull-secret does not contain auth for %s", name), nil + } + + return nil, nil +} + +func isTelemeterClientEnabled(ctx context.Context, client clientset.Interface) (enabled error, err error) { + config, err := client.CoreV1().ConfigMaps("openshift-monitoring").Get(ctx, "cluster-monitoring-config", metav1.GetOptions{}) + if err != nil { + if kapierrs.IsNotFound(err) { + return nil, nil // Telemetry is enabled by default + } + return nil, fmt.Errorf("could not retrieve monitoring configuration: %w", err) + } + var structuredConfig ClusterMonitoringConfiguration + if yamlConfig, ok := config.Data["config.yaml"]; !ok { + return nil, fmt.Errorf("openshift-monitoring/cluster-monitoring-config data lacks a config.yaml key: %v", config.Data) + } else if err := yaml.Unmarshal([]byte(yamlConfig), &structuredConfig); err != nil { + return nil, fmt.Errorf("error unmarshalling openshift-monitoring/cluster-monitoring-config config.yaml: %w", err) + } + if structuredConfig.TelemeterClientConfig == nil || structuredConfig.TelemeterClientConfig.Enabled == nil { + return nil, nil // Telemetry is enabled by default + } + if !*structuredConfig.TelemeterClientConfig.Enabled { + return fmt.Errorf("openshift-monitoring/cluster-monitoring-config telemeterClient enabled is: %t", *structuredConfig.TelemeterClientConfig.Enabled), nil } - return len(ps.Auths[name].Auth) > 0 + return nil, nil } func isTechPreviewCluster(oc *exutil.CLI) bool { diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 7c95730fde8e..5044d00ebc30 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -2167,7 +2167,7 @@ var annotations = map[string]string{ "[Top Level] [sig-instrumentation] Prometheus when installed on the cluster should provide named network metrics [apigroup:project.openshift.io]": "should provide named network metrics [apigroup:project.openshift.io] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]", - "[Top Level] [sig-instrumentation] Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present [Late]": "should report telemetry if a cloud.openshift.com token is present [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]", + "[Top Level] [sig-instrumentation] Prometheus when installed on the cluster should report telemetry [Late]": "should report telemetry [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]", "[Top Level] [sig-instrumentation] Prometheus when installed on the cluster should start and expose a secured proxy and unsecured metrics [apigroup:config.openshift.io]": "should start and expose a secured proxy and unsecured metrics [apigroup:config.openshift.io] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]", diff --git a/test/extended/util/annotate/rules.go b/test/extended/util/annotate/rules.go index da997c421ede..270a0f2e763d 100644 --- a/test/extended/util/annotate/rules.go +++ b/test/extended/util/annotate/rules.go @@ -244,7 +244,7 @@ var ( `\[sig-instrumentation\] Prometheus when installed on the cluster should have non-Pod host cAdvisor metrics`, `\[sig-instrumentation\] Prometheus when installed on the cluster should provide ingress metrics`, `\[sig-instrumentation\] Prometheus when installed on the cluster should provide named network metrics`, - `\[sig-instrumentation\] Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present \[Late\]`, + `\[sig-instrumentation\] Prometheus when installed on the cluster should report telemetry \[Late\]`, `\[sig-instrumentation\] Prometheus when installed on the cluster should start and expose a secured proxy and unsecured metrics`, `\[sig-instrumentation\] Prometheus when installed on the cluster shouldn't have failing rules evaluation`, `\[sig-instrumentation\] Prometheus when installed on the cluster shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured \[Early\]`,