Skip to content

Commit 76652fa

Browse files
committed
test/extended/prometheus: Consider telemeterClient.enabled
I'd disabled Telemetry for the bulk of the CI fleet in openshift/release@3c1da8eb20 (OTA-740: ci-operator/step-registry/ipi/conf/telemetry: Disable Telemetry (openshift/release#32153), 2022-09-13). But that lead to many failures for: Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present This commit extends the checks for Telemetry enablement to include the monitoring-specific ConfigMap, as well as the previously-checked pull-secret token. I'm copy/pasting a subset of the monitoring configuration structure instead of vendoring the config, because we aren't vendoring the cluster-monitoring-operator in origin today, bumping to keep such vendoring up to date would be tedious, and the monitoring config API is unlikely to shift this knob around within the structure. It would be nice if the monitoring config schema moved into opensihft/api or somewhere else where it would be more clear that it was a cluster-admin-facing API, with the usual stability commitments, but it's not there today.
1 parent 9509a2b commit 76652fa

4 files changed

Lines changed: 71 additions & 17 deletions

File tree

cmd/openshift-tests/minimal.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2572,7 +2572,7 @@ var (
25722572
"[sig-builds][Feature:Builds] verify /run filesystem contents are writeable using a simple Docker Strategy Build [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {},
25732573
"[sig-builds][Feature:Builds] build have source revision metadata started build should contain source revision information [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {},
25742574
"[sig-network] multicast when using one of the OpenshiftSDN modes 'redhat/openshift-ovs-multitenant, redhat/openshift-ovs-networkpolicy' should allow multicast traffic in namespaces where it is enabled [Suite:openshift/conformance/parallel]": {},
2575-
"[sig-instrumentation] Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {},
2575+
"[sig-instrumentation] Prometheus when installed on the cluster should report telemetry [Late] [Skipped:Disconnected] [Suite:openshift/conformance/parallel]": {},
25762576
"[sig-cluster-lifecycle] cluster upgrade should complete in 90.00 minutes": {},
25772577
"[sig-imageregistry][Feature:ImageTriggers] Image change build triggers TestSimpleImageChangeBuildTriggerFromImageStreamTagCustomWithConfigChange [Suite:openshift/conformance/parallel]": {},
25782578
"[sig-auth][Feature:OAuthServer] [Token Expiration] Using a OAuth client with a non-default token max age to generate tokens that do not expire works as expected when using a code authorization flow [Suite:openshift/conformance/parallel]": {},

test/extended/prometheus/prometheus.go

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
e2e "k8s.io/kubernetes/test/e2e/framework"
3030
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
3131
admissionapi "k8s.io/pod-security-admission/api"
32+
"sigs.k8s.io/yaml"
3233

3334
configv1 "github.com/openshift/api/config/v1"
3435

@@ -39,6 +40,16 @@ import (
3940
helper "github.com/openshift/origin/test/extended/util/prometheus"
4041
)
4142

43+
// ClusterMonitoringConfiguration is a subset of https://github.com/openshift/cluster-monitoring-operator/blob/8d331d78b22948d36c20da0552763ddd8a4e2093/pkg/manifests/config.go#L124-L136
44+
type ClusterMonitoringConfiguration struct {
45+
TelemeterClientConfig *TelemeterClientConfig `json:"telemeterClient"`
46+
}
47+
48+
// TelemeterClientConfig is a subset of https://github.com/openshift/cluster-monitoring-operator/blob/8d331d78b22948d36c20da0552763ddd8a4e2093/pkg/manifests/config.go#L335-L342
49+
type TelemeterClientConfig struct {
50+
Enabled *bool `json:"enabled"`
51+
}
52+
4253
var _ = g.Describe("[sig-instrumentation][Late] OpenShift alerting rules [apigroup:image.openshift.io]", func() {
4354
defer g.GinkgoRecover()
4455

@@ -186,6 +197,7 @@ var _ = g.Describe("[sig-instrumentation][Late] OpenShift alerting rules [apigro
186197

187198
var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() {
188199
defer g.GinkgoRecover()
200+
ctx := context.TODO()
189201
var (
190202
oc = exutil.NewCLIWithoutNamespace("prometheus")
191203
)
@@ -382,8 +394,10 @@ sort_desc(
382394
})
383395

384396
g.It("shouldn't exceed the 650 series limit of total series sent via telemetry from each cluster", func() {
385-
if !hasPullSecret(oc.AdminKubeClient(), "cloud.openshift.com") {
386-
e2eskipper.Skipf("Telemetry is disabled")
397+
if enabled, err := telemetryIsEnabled(ctx, oc.AdminKubeClient()); err != nil {
398+
e2e.Failf("could not determine if Telemetry is enabled: %v", err)
399+
} else {
400+
e2eskipper.Skipf("Telemetry is disabled: %v", enabled)
387401
}
388402

389403
// we only consider series sent since the beginning of the test
@@ -419,6 +433,7 @@ sort_desc(
419433

420434
var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
421435
defer g.GinkgoRecover()
436+
ctx := context.TODO()
422437
var (
423438
oc = exutil.NewCLIWithPodSecurityLevel("prometheus", admissionapi.LevelBaseline)
424439

@@ -439,9 +454,11 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
439454
})
440455

441456
g.Describe("when installed on the cluster", func() {
442-
g.It("should report telemetry if a cloud.openshift.com token is present [Late]", func() {
443-
if !hasPullSecret(oc.AdminKubeClient(), "cloud.openshift.com") {
444-
e2eskipper.Skipf("Telemetry is disabled")
457+
g.It("should report telemetry [Late]", func() {
458+
if enabled, err := telemetryIsEnabled(ctx, oc.AdminKubeClient()); err != nil {
459+
e2e.Failf("could not determine if Telemetry is enabled: %v", err)
460+
} else {
461+
e2eskipper.Skipf("Telemetry is disabled: %v", enabled)
445462
}
446463

447464
tests := map[string]bool{}
@@ -900,17 +917,26 @@ func getBearerTokenURLViaPod(ns, execPodName, url, bearer string) (string, error
900917
return output, nil
901918
}
902919

903-
func hasPullSecret(client clientset.Interface, name string) bool {
904-
scrt, err := client.CoreV1().Secrets("openshift-config").Get(context.Background(), "pull-secret", metav1.GetOptions{})
920+
// telemetryIsEnabled returns (nil, nil) if Telemetry is enabled,
921+
// (error, nil) if Telemetry is not enabled, and (_, error) if it fails
922+
// to determine whether or not Telemetry is enabled.
923+
func telemetryIsEnabled(ctx context.Context, client clientset.Interface) (enabled error, err error) {
924+
domain := "cloud.openshift.com"
925+
if hasSecret, err := hasPullSecret(ctx, client, domain); err != nil || hasSecret != nil {
926+
return hasSecret, err
927+
}
928+
929+
return isTelemeterClientEnabled(ctx, client)
930+
}
931+
932+
func hasPullSecret(ctx context.Context, client clientset.Interface, name string) (enabled error, err error) {
933+
scrt, err := client.CoreV1().Secrets("openshift-config").Get(ctx, "pull-secret", metav1.GetOptions{})
905934
if err != nil {
906-
if kapierrs.IsNotFound(err) {
907-
return false
908-
}
909-
e2e.Failf("could not retrieve pull-secret: %v", err)
935+
return nil, fmt.Errorf("could not retrieve pull-secret: %w", err)
910936
}
911937

912938
if scrt.Type != v1.SecretTypeDockerConfigJson {
913-
e2e.Failf("error expecting secret type %s got %s", v1.SecretTypeDockerConfigJson, scrt.Type)
939+
return nil, fmt.Errorf("error expecting openshift-config/pull-secret type %s got %s", v1.SecretTypeDockerConfigJson, scrt.Type)
914940
}
915941

916942
ps := struct {
@@ -920,9 +946,37 @@ func hasPullSecret(client clientset.Interface, name string) bool {
920946
}{}
921947

922948
if err := json.Unmarshal(scrt.Data[v1.DockerConfigJsonKey], &ps); err != nil {
923-
e2e.Failf("could not unmarshal pullSecret from openshift-config/pull-secret: %v", err)
949+
return nil, fmt.Errorf("could not unmarshal pullSecret from openshift-config/pull-secret: %w", err)
950+
}
951+
952+
if len(ps.Auths[name].Auth) == 0 {
953+
return fmt.Errorf("openshift-config/pull-secret does not contain auth for %s", name), nil
954+
}
955+
956+
return nil, nil
957+
}
958+
959+
func isTelemeterClientEnabled(ctx context.Context, client clientset.Interface) (enabled error, err error) {
960+
config, err := client.CoreV1().ConfigMaps("openshift-monitoring").Get(ctx, "cluster-monitoring-config", metav1.GetOptions{})
961+
if err != nil {
962+
if kapierrs.IsNotFound(err) {
963+
return nil, nil // Telemetry is enabled by default
964+
}
965+
return nil, fmt.Errorf("could not retrieve monitoring configuration: %w", err)
966+
}
967+
var structuredConfig ClusterMonitoringConfiguration
968+
if yamlConfig, ok := config.Data["config.yaml"]; !ok {
969+
return nil, fmt.Errorf("openshift-monitoring/cluster-monitoring-config data lacks a config.yaml key: %v", config.Data)
970+
} else if err := yaml.Unmarshal([]byte(yamlConfig), &structuredConfig); err != nil {
971+
return nil, fmt.Errorf("error unmarshalling openshift-monitoring/cluster-monitoring-config config.yaml: %w", err)
972+
}
973+
if structuredConfig.TelemeterClientConfig == nil || structuredConfig.TelemeterClientConfig.Enabled == nil {
974+
return nil, nil // Telemetry is enabled by default
975+
}
976+
if !*structuredConfig.TelemeterClientConfig.Enabled {
977+
return fmt.Errorf("openshift-monitoring/cluster-monitoring-config telemeterClient enabled is: %t", *structuredConfig.TelemeterClientConfig.Enabled), nil
924978
}
925-
return len(ps.Auths[name].Auth) > 0
979+
return nil, nil
926980
}
927981

928982
func isTechPreviewCluster(oc *exutil.CLI) bool {

test/extended/util/annotate/generated/zz_generated.annotations.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/extended/util/annotate/rules.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ var (
244244
`\[sig-instrumentation\] Prometheus when installed on the cluster should have non-Pod host cAdvisor metrics`,
245245
`\[sig-instrumentation\] Prometheus when installed on the cluster should provide ingress metrics`,
246246
`\[sig-instrumentation\] Prometheus when installed on the cluster should provide named network metrics`,
247-
`\[sig-instrumentation\] Prometheus when installed on the cluster should report telemetry if a cloud.openshift.com token is present \[Late\]`,
247+
`\[sig-instrumentation\] Prometheus when installed on the cluster should report telemetry \[Late\]`,
248248
`\[sig-instrumentation\] Prometheus when installed on the cluster should start and expose a secured proxy and unsecured metrics`,
249249
`\[sig-instrumentation\] Prometheus when installed on the cluster shouldn't have failing rules evaluation`,
250250
`\[sig-instrumentation\] Prometheus when installed on the cluster shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured \[Early\]`,

0 commit comments

Comments
 (0)